mirror of
https://github.com/microsoft/mimalloc.git
synced 2025-07-07 03:48:42 +03:00
wip: further progress on removing segments
This commit is contained in:
parent
71cfa45e76
commit
441d4fed9f
21 changed files with 2396 additions and 2492 deletions
|
@ -82,7 +82,7 @@ extern inline void* _mi_page_malloc_zero(mi_heap_t* heap, mi_page_t* page, size_
|
|||
|
||||
#if (MI_STAT>0)
|
||||
const size_t bsize = mi_page_usable_block_size(page);
|
||||
if (bsize <= MI_LARGE_OBJ_SIZE_MAX) {
|
||||
if (bsize <= MI_LARGE_MAX_OBJ_SIZE) {
|
||||
mi_heap_stat_increase(heap, normal, bsize);
|
||||
mi_heap_stat_counter_increase(heap, normal_count, 1);
|
||||
#if (MI_STAT>1)
|
||||
|
|
File diff suppressed because it is too large
Load diff
871
src/arena.c
871
src/arena.c
File diff suppressed because it is too large
Load diff
419
src/bitmap-old.c
Normal file
419
src/bitmap-old.c
Normal file
|
@ -0,0 +1,419 @@
|
|||
/* ----------------------------------------------------------------------------
|
||||
Copyright (c) 2019-2023 Microsoft Research, Daan Leijen
|
||||
This is free software; you can redistribute it and/or modify it under the
|
||||
terms of the MIT license. A copy of the license can be found in the file
|
||||
"LICENSE" at the root of this distribution.
|
||||
-----------------------------------------------------------------------------*/
|
||||
|
||||
/* ----------------------------------------------------------------------------
|
||||
Concurrent bitmap that can set/reset sequences of bits atomically,
|
||||
represented as an array of fields where each field is a machine word (`size_t`)
|
||||
|
||||
There are two api's; the standard one cannot have sequences that cross
|
||||
between the bitmap fields (and a sequence must be <= MI_BITMAP_FIELD_BITS).
|
||||
|
||||
The `_across` postfixed functions do allow sequences that can cross over
|
||||
between the fields. (This is used in arena allocation)
|
||||
---------------------------------------------------------------------------- */
|
||||
|
||||
#include "mimalloc.h"
|
||||
#include "mimalloc/internal.h"
|
||||
#include "mimalloc/bits.h"
|
||||
#include "bitmap.h"
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
Bitmap definition
|
||||
----------------------------------------------------------- */
|
||||
|
||||
// The bit mask for a given number of blocks at a specified bit index.
|
||||
static inline size_t mi_bitmap_mask_(size_t count, size_t bitidx) {
|
||||
mi_assert_internal(count + bitidx <= MI_BITMAP_FIELD_BITS);
|
||||
mi_assert_internal(count > 0);
|
||||
if (count >= MI_BITMAP_FIELD_BITS) return MI_BITMAP_FIELD_FULL;
|
||||
if (count == 0) return 0;
|
||||
return ((((size_t)1 << count) - 1) << bitidx);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
Claim a bit sequence atomically
|
||||
----------------------------------------------------------- */
|
||||
|
||||
// Try to atomically claim a sequence of `count` bits in a single
|
||||
// field at `idx` in `bitmap`. Returns `true` on success.
|
||||
bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx)
|
||||
{
|
||||
mi_assert_internal(bitmap_idx != NULL);
|
||||
mi_assert_internal(count <= MI_BITMAP_FIELD_BITS);
|
||||
mi_bitmap_field_t* field = &bitmap[idx];
|
||||
size_t map = mi_atomic_load_relaxed(field);
|
||||
if (map==MI_BITMAP_FIELD_FULL) return false; // short cut
|
||||
|
||||
// search for 0-bit sequence of length count
|
||||
const size_t mask = mi_bitmap_mask_(count, 0);
|
||||
const size_t bitidx_max = MI_BITMAP_FIELD_BITS - count;
|
||||
|
||||
#if MI_HAS_FAST_BITSCAN
|
||||
size_t bitidx = mi_ctz(~map); // quickly find the first zero bit if possible
|
||||
#else
|
||||
size_t bitidx = 0; // otherwise start at 0
|
||||
#endif
|
||||
size_t m = (mask << bitidx); // invariant: m == mask shifted by bitidx
|
||||
|
||||
// scan linearly for a free range of zero bits
|
||||
while (bitidx <= bitidx_max) {
|
||||
const size_t mapm = (map & m);
|
||||
if (mapm == 0) { // are the mask bits free at bitidx?
|
||||
mi_assert_internal((m >> bitidx) == mask); // no overflow?
|
||||
const size_t newmap = (map | m);
|
||||
mi_assert_internal((newmap^map) >> bitidx == mask);
|
||||
if (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)) { // TODO: use weak cas here?
|
||||
// no success, another thread claimed concurrently.. keep going (with updated `map`)
|
||||
continue;
|
||||
}
|
||||
else {
|
||||
// success, we claimed the bits!
|
||||
*bitmap_idx = mi_bitmap_index_create(idx, bitidx);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// on to the next bit range
|
||||
#if MI_HAS_FAST_BITSCAN
|
||||
mi_assert_internal(mapm != 0);
|
||||
const size_t shift = (count == 1 ? 1 : (MI_INTPTR_BITS - mi_clz(mapm) - bitidx));
|
||||
mi_assert_internal(shift > 0 && shift <= count);
|
||||
#else
|
||||
const size_t shift = 1;
|
||||
#endif
|
||||
bitidx += shift;
|
||||
m <<= shift;
|
||||
}
|
||||
}
|
||||
// no bits found
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
// Starts at idx, and wraps around to search in all `bitmap_fields` fields.
|
||||
// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never cross fields.
|
||||
bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx) {
|
||||
size_t idx = start_field_idx;
|
||||
for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) {
|
||||
if (idx >= bitmap_fields) { idx = 0; } // wrap
|
||||
if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
// Set `count` bits at `bitmap_idx` to 0 atomically
|
||||
// Returns `true` if all `count` bits were 1 previously.
|
||||
bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
|
||||
const size_t idx = mi_bitmap_index_field(bitmap_idx);
|
||||
const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
|
||||
const size_t mask = mi_bitmap_mask_(count, bitidx);
|
||||
mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
|
||||
// mi_assert_internal((bitmap[idx] & mask) == mask);
|
||||
const size_t prev = mi_atomic_and_acq_rel(&bitmap[idx], ~mask);
|
||||
return ((prev & mask) == mask);
|
||||
}
|
||||
|
||||
|
||||
// Set `count` bits at `bitmap_idx` to 1 atomically
|
||||
// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
|
||||
bool _mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_zero) {
|
||||
const size_t idx = mi_bitmap_index_field(bitmap_idx);
|
||||
const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
|
||||
const size_t mask = mi_bitmap_mask_(count, bitidx);
|
||||
mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
|
||||
//mi_assert_internal(any_zero != NULL || (bitmap[idx] & mask) == 0);
|
||||
size_t prev = mi_atomic_or_acq_rel(&bitmap[idx], mask);
|
||||
if (any_zero != NULL) { *any_zero = ((prev & mask) != mask); }
|
||||
return ((prev & mask) == 0);
|
||||
}
|
||||
|
||||
// Returns `true` if all `count` bits were 1. `any_ones` is `true` if there was at least one bit set to one.
|
||||
static bool mi_bitmap_is_claimedx(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_ones) {
|
||||
const size_t idx = mi_bitmap_index_field(bitmap_idx);
|
||||
const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
|
||||
const size_t mask = mi_bitmap_mask_(count, bitidx);
|
||||
mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
|
||||
const size_t field = mi_atomic_load_relaxed(&bitmap[idx]);
|
||||
if (any_ones != NULL) { *any_ones = ((field & mask) != 0); }
|
||||
return ((field & mask) == mask);
|
||||
}
|
||||
|
||||
// Try to set `count` bits at `bitmap_idx` from 0 to 1 atomically.
|
||||
// Returns `true` if successful when all previous `count` bits were 0.
|
||||
bool _mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
|
||||
const size_t idx = mi_bitmap_index_field(bitmap_idx);
|
||||
const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
|
||||
const size_t mask = mi_bitmap_mask_(count, bitidx);
|
||||
mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
|
||||
size_t expected = mi_atomic_load_relaxed(&bitmap[idx]);
|
||||
do {
|
||||
if ((expected & mask) != 0) return false;
|
||||
}
|
||||
while (!mi_atomic_cas_strong_acq_rel(&bitmap[idx], &expected, expected | mask));
|
||||
mi_assert_internal((expected & mask) == 0);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool _mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
|
||||
return mi_bitmap_is_claimedx(bitmap, bitmap_fields, count, bitmap_idx, NULL);
|
||||
}
|
||||
|
||||
bool _mi_bitmap_is_any_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
|
||||
bool any_ones;
|
||||
mi_bitmap_is_claimedx(bitmap, bitmap_fields, count, bitmap_idx, &any_ones);
|
||||
return any_ones;
|
||||
}
|
||||
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
// the `_across` functions work on bitmaps where sequences can cross over
|
||||
// between the fields. This is used in arena allocation
|
||||
//--------------------------------------------------------------------------
|
||||
|
||||
// Try to atomically claim a sequence of `count` bits starting from the field
|
||||
// at `idx` in `bitmap` and crossing into subsequent fields. Returns `true` on success.
|
||||
// Only needs to consider crossing into the next fields (see `mi_bitmap_try_find_from_claim_across`)
|
||||
static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t idx, const size_t count, const size_t retries, mi_bitmap_index_t* bitmap_idx, mi_stats_t* stats)
|
||||
{
|
||||
mi_assert_internal(bitmap_idx != NULL);
|
||||
|
||||
// check initial trailing zeros
|
||||
mi_bitmap_field_t* field = &bitmap[idx];
|
||||
size_t map = mi_atomic_load_relaxed(field);
|
||||
const size_t initial = mi_clz(map); // count of initial zeros starting at idx
|
||||
mi_assert_internal(initial <= MI_BITMAP_FIELD_BITS);
|
||||
if (initial == 0) return false;
|
||||
if (initial >= count) return _mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx); // no need to cross fields (this case won't happen for us)
|
||||
if (_mi_divide_up(count - initial, MI_BITMAP_FIELD_BITS) >= (bitmap_fields - idx)) return false; // not enough entries
|
||||
|
||||
// scan ahead
|
||||
size_t found = initial;
|
||||
size_t mask = 0; // mask bits for the final field
|
||||
while(found < count) {
|
||||
field++;
|
||||
map = mi_atomic_load_relaxed(field);
|
||||
const size_t mask_bits = (found + MI_BITMAP_FIELD_BITS <= count ? MI_BITMAP_FIELD_BITS : (count - found));
|
||||
mi_assert_internal(mask_bits > 0 && mask_bits <= MI_BITMAP_FIELD_BITS);
|
||||
mask = mi_bitmap_mask_(mask_bits, 0);
|
||||
if ((map & mask) != 0) return false; // some part is already claimed
|
||||
found += mask_bits;
|
||||
}
|
||||
mi_assert_internal(field < &bitmap[bitmap_fields]);
|
||||
|
||||
// we found a range of contiguous zeros up to the final field; mask contains mask in the final field
|
||||
// now try to claim the range atomically
|
||||
mi_bitmap_field_t* const final_field = field;
|
||||
const size_t final_mask = mask;
|
||||
mi_bitmap_field_t* const initial_field = &bitmap[idx];
|
||||
const size_t initial_idx = MI_BITMAP_FIELD_BITS - initial;
|
||||
const size_t initial_mask = mi_bitmap_mask_(initial, initial_idx);
|
||||
|
||||
// initial field
|
||||
size_t newmap;
|
||||
field = initial_field;
|
||||
map = mi_atomic_load_relaxed(field);
|
||||
do {
|
||||
newmap = (map | initial_mask);
|
||||
if ((map & initial_mask) != 0) { goto rollback; };
|
||||
} while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap));
|
||||
|
||||
// intermediate fields
|
||||
while (++field < final_field) {
|
||||
newmap = mi_bitmap_mask_(MI_BITMAP_FIELD_BITS, 0);
|
||||
map = 0;
|
||||
if (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)) { goto rollback; }
|
||||
}
|
||||
|
||||
// final field
|
||||
mi_assert_internal(field == final_field);
|
||||
map = mi_atomic_load_relaxed(field);
|
||||
do {
|
||||
newmap = (map | final_mask);
|
||||
if ((map & final_mask) != 0) { goto rollback; }
|
||||
} while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap));
|
||||
|
||||
// claimed!
|
||||
mi_stat_counter_increase(stats->arena_crossover_count,1);
|
||||
*bitmap_idx = mi_bitmap_index_create(idx, initial_idx);
|
||||
return true;
|
||||
|
||||
rollback:
|
||||
// roll back intermediate fields
|
||||
// (we just failed to claim `field` so decrement first)
|
||||
while (--field > initial_field) {
|
||||
newmap = 0;
|
||||
map = mi_bitmap_mask_(MI_BITMAP_FIELD_BITS, 0);
|
||||
mi_assert_internal(mi_atomic_load_relaxed(field) == map);
|
||||
mi_atomic_store_release(field, newmap);
|
||||
}
|
||||
if (field == initial_field) { // (if we failed on the initial field, `field + 1 == initial_field`)
|
||||
map = mi_atomic_load_relaxed(field);
|
||||
do {
|
||||
mi_assert_internal((map & initial_mask) == initial_mask);
|
||||
newmap = (map & ~initial_mask);
|
||||
} while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap));
|
||||
}
|
||||
mi_stat_counter_increase(stats->arena_rollback_count,1);
|
||||
// retry? (we make a recursive call instead of goto to be able to use const declarations)
|
||||
if (retries <= 2) {
|
||||
return mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, retries+1, bitmap_idx, stats);
|
||||
}
|
||||
else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Find `count` bits of zeros and set them to 1 atomically; returns `true` on success.
|
||||
// Starts at idx, and wraps around to search in all `bitmap_fields` fields.
|
||||
bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx, mi_stats_t* stats) {
|
||||
mi_assert_internal(count > 0);
|
||||
if (count <= 2) {
|
||||
// we don't bother with crossover fields for small counts
|
||||
return _mi_bitmap_try_find_from_claim(bitmap, bitmap_fields, start_field_idx, count, bitmap_idx);
|
||||
}
|
||||
|
||||
// visit the fields
|
||||
size_t idx = start_field_idx;
|
||||
for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) {
|
||||
if (idx >= bitmap_fields) { idx = 0; } // wrap
|
||||
// first try to claim inside a field
|
||||
/*
|
||||
if (count <= MI_BITMAP_FIELD_BITS) {
|
||||
if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
*/
|
||||
// if that fails, then try to claim across fields
|
||||
if (mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, 0, bitmap_idx, stats)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Helper for masks across fields; returns the mid count, post_mask may be 0
|
||||
static size_t mi_bitmap_mask_across(mi_bitmap_index_t bitmap_idx, size_t bitmap_fields, size_t count, size_t* pre_mask, size_t* mid_mask, size_t* post_mask) {
|
||||
MI_UNUSED(bitmap_fields);
|
||||
const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
|
||||
if mi_likely(bitidx + count <= MI_BITMAP_FIELD_BITS) {
|
||||
*pre_mask = mi_bitmap_mask_(count, bitidx);
|
||||
*mid_mask = 0;
|
||||
*post_mask = 0;
|
||||
mi_assert_internal(mi_bitmap_index_field(bitmap_idx) < bitmap_fields);
|
||||
return 0;
|
||||
}
|
||||
else {
|
||||
const size_t pre_bits = MI_BITMAP_FIELD_BITS - bitidx;
|
||||
mi_assert_internal(pre_bits < count);
|
||||
*pre_mask = mi_bitmap_mask_(pre_bits, bitidx);
|
||||
count -= pre_bits;
|
||||
const size_t mid_count = (count / MI_BITMAP_FIELD_BITS);
|
||||
*mid_mask = MI_BITMAP_FIELD_FULL;
|
||||
count %= MI_BITMAP_FIELD_BITS;
|
||||
*post_mask = (count==0 ? 0 : mi_bitmap_mask_(count, 0));
|
||||
mi_assert_internal(mi_bitmap_index_field(bitmap_idx) + mid_count + (count==0 ? 0 : 1) < bitmap_fields);
|
||||
return mid_count;
|
||||
}
|
||||
}
|
||||
|
||||
// Set `count` bits at `bitmap_idx` to 0 atomically
|
||||
// Returns `true` if all `count` bits were 1 previously.
|
||||
bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
|
||||
size_t idx = mi_bitmap_index_field(bitmap_idx);
|
||||
size_t pre_mask;
|
||||
size_t mid_mask;
|
||||
size_t post_mask;
|
||||
size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask);
|
||||
bool all_one = true;
|
||||
mi_bitmap_field_t* field = &bitmap[idx];
|
||||
size_t prev = mi_atomic_and_acq_rel(field++, ~pre_mask); // clear first part
|
||||
if ((prev & pre_mask) != pre_mask) all_one = false;
|
||||
while(mid_count-- > 0) {
|
||||
prev = mi_atomic_and_acq_rel(field++, ~mid_mask); // clear mid part
|
||||
if ((prev & mid_mask) != mid_mask) all_one = false;
|
||||
}
|
||||
if (post_mask!=0) {
|
||||
prev = mi_atomic_and_acq_rel(field, ~post_mask); // clear end part
|
||||
if ((prev & post_mask) != post_mask) all_one = false;
|
||||
}
|
||||
return all_one;
|
||||
}
|
||||
|
||||
// Set `count` bits at `bitmap_idx` to 1 atomically
|
||||
// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
|
||||
bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero) {
|
||||
size_t idx = mi_bitmap_index_field(bitmap_idx);
|
||||
size_t pre_mask;
|
||||
size_t mid_mask;
|
||||
size_t post_mask;
|
||||
size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask);
|
||||
bool all_zero = true;
|
||||
bool any_zero = false;
|
||||
_Atomic(size_t)*field = &bitmap[idx];
|
||||
size_t prev = mi_atomic_or_acq_rel(field++, pre_mask);
|
||||
if ((prev & pre_mask) != 0) all_zero = false;
|
||||
if ((prev & pre_mask) != pre_mask) any_zero = true;
|
||||
while (mid_count-- > 0) {
|
||||
prev = mi_atomic_or_acq_rel(field++, mid_mask);
|
||||
if ((prev & mid_mask) != 0) all_zero = false;
|
||||
if ((prev & mid_mask) != mid_mask) any_zero = true;
|
||||
}
|
||||
if (post_mask!=0) {
|
||||
prev = mi_atomic_or_acq_rel(field, post_mask);
|
||||
if ((prev & post_mask) != 0) all_zero = false;
|
||||
if ((prev & post_mask) != post_mask) any_zero = true;
|
||||
}
|
||||
if (pany_zero != NULL) { *pany_zero = any_zero; }
|
||||
return all_zero;
|
||||
}
|
||||
|
||||
|
||||
// Returns `true` if all `count` bits were 1.
|
||||
// `any_ones` is `true` if there was at least one bit set to one.
|
||||
static bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_ones) {
|
||||
size_t idx = mi_bitmap_index_field(bitmap_idx);
|
||||
size_t pre_mask;
|
||||
size_t mid_mask;
|
||||
size_t post_mask;
|
||||
size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask);
|
||||
bool all_ones = true;
|
||||
bool any_ones = false;
|
||||
mi_bitmap_field_t* field = &bitmap[idx];
|
||||
size_t prev = mi_atomic_load_relaxed(field++);
|
||||
if ((prev & pre_mask) != pre_mask) all_ones = false;
|
||||
if ((prev & pre_mask) != 0) any_ones = true;
|
||||
while (mid_count-- > 0) {
|
||||
prev = mi_atomic_load_relaxed(field++);
|
||||
if ((prev & mid_mask) != mid_mask) all_ones = false;
|
||||
if ((prev & mid_mask) != 0) any_ones = true;
|
||||
}
|
||||
if (post_mask!=0) {
|
||||
prev = mi_atomic_load_relaxed(field);
|
||||
if ((prev & post_mask) != post_mask) all_ones = false;
|
||||
if ((prev & post_mask) != 0) any_ones = true;
|
||||
}
|
||||
if (pany_ones != NULL) { *pany_ones = any_ones; }
|
||||
return all_ones;
|
||||
}
|
||||
|
||||
bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
|
||||
return mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, NULL);
|
||||
}
|
||||
|
||||
bool _mi_bitmap_is_any_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
|
||||
bool any_ones;
|
||||
mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, &any_ones);
|
||||
return any_ones;
|
||||
}
|
110
src/bitmap-old.h
Normal file
110
src/bitmap-old.h
Normal file
|
@ -0,0 +1,110 @@
|
|||
/* ----------------------------------------------------------------------------
|
||||
Copyright (c) 2019-2023 Microsoft Research, Daan Leijen
|
||||
This is free software; you can redistribute it and/or modify it under the
|
||||
terms of the MIT license. A copy of the license can be found in the file
|
||||
"LICENSE" at the root of this distribution.
|
||||
-----------------------------------------------------------------------------*/
|
||||
|
||||
/* ----------------------------------------------------------------------------
|
||||
Concurrent bitmap that can set/reset sequences of bits atomically,
|
||||
represented as an array of fields where each field is a machine word (`size_t`)
|
||||
|
||||
There are two api's; the standard one cannot have sequences that cross
|
||||
between the bitmap fields (and a sequence must be <= MI_BITMAP_FIELD_BITS).
|
||||
(this is used in region allocation)
|
||||
|
||||
The `_across` postfixed functions do allow sequences that can cross over
|
||||
between the fields. (This is used in arena allocation)
|
||||
---------------------------------------------------------------------------- */
|
||||
#pragma once
|
||||
#ifndef MI_BITMAP_H
|
||||
#define MI_BITMAP_H
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
Bitmap definition
|
||||
----------------------------------------------------------- */
|
||||
|
||||
#define MI_BITMAP_FIELD_BITS (8*MI_SIZE_SIZE)
|
||||
#define MI_BITMAP_FIELD_FULL (~((size_t)0)) // all bits set
|
||||
|
||||
// An atomic bitmap of `size_t` fields
|
||||
typedef _Atomic(size_t) mi_bitmap_field_t;
|
||||
typedef mi_bitmap_field_t* mi_bitmap_t;
|
||||
|
||||
// A bitmap index is the index of the bit in a bitmap.
|
||||
typedef size_t mi_bitmap_index_t;
|
||||
|
||||
// Create a bit index.
|
||||
static inline mi_bitmap_index_t mi_bitmap_index_create_ex(size_t idx, size_t bitidx) {
|
||||
mi_assert_internal(bitidx <= MI_BITMAP_FIELD_BITS);
|
||||
return (idx*MI_BITMAP_FIELD_BITS) + bitidx;
|
||||
}
|
||||
static inline mi_bitmap_index_t mi_bitmap_index_create(size_t idx, size_t bitidx) {
|
||||
mi_assert_internal(bitidx < MI_BITMAP_FIELD_BITS);
|
||||
return mi_bitmap_index_create_ex(idx,bitidx);
|
||||
}
|
||||
|
||||
// Get the field index from a bit index.
|
||||
static inline size_t mi_bitmap_index_field(mi_bitmap_index_t bitmap_idx) {
|
||||
return (bitmap_idx / MI_BITMAP_FIELD_BITS);
|
||||
}
|
||||
|
||||
// Get the bit index in a bitmap field
|
||||
static inline size_t mi_bitmap_index_bit_in_field(mi_bitmap_index_t bitmap_idx) {
|
||||
return (bitmap_idx % MI_BITMAP_FIELD_BITS);
|
||||
}
|
||||
|
||||
// Get the full bit index
|
||||
static inline size_t mi_bitmap_index_bit(mi_bitmap_index_t bitmap_idx) {
|
||||
return bitmap_idx;
|
||||
}
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
Claim a bit sequence atomically
|
||||
----------------------------------------------------------- */
|
||||
|
||||
// Try to atomically claim a sequence of `count` bits in a single
|
||||
// field at `idx` in `bitmap`. Returns `true` on success.
|
||||
bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx);
|
||||
|
||||
// Starts at idx, and wraps around to search in all `bitmap_fields` fields.
|
||||
// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never cross fields.
|
||||
bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx);
|
||||
|
||||
// Set `count` bits at `bitmap_idx` to 0 atomically
|
||||
// Returns `true` if all `count` bits were 1 previously.
|
||||
bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
|
||||
|
||||
// Try to set `count` bits at `bitmap_idx` from 0 to 1 atomically.
|
||||
// Returns `true` if successful when all previous `count` bits were 0.
|
||||
bool _mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
|
||||
|
||||
// Set `count` bits at `bitmap_idx` to 1 atomically
|
||||
// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
|
||||
bool _mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_zero);
|
||||
|
||||
bool _mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
|
||||
bool _mi_bitmap_is_any_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
|
||||
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
// the `_across` functions work on bitmaps where sequences can cross over
|
||||
// between the fields. This is used in arena allocation
|
||||
//--------------------------------------------------------------------------
|
||||
|
||||
// Find `count` bits of zeros and set them to 1 atomically; returns `true` on success.
|
||||
// Starts at idx, and wraps around to search in all `bitmap_fields` fields.
|
||||
bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx, mi_stats_t* stats);
|
||||
|
||||
// Set `count` bits at `bitmap_idx` to 0 atomically
|
||||
// Returns `true` if all `count` bits were 1 previously.
|
||||
bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
|
||||
|
||||
// Set `count` bits at `bitmap_idx` to 1 atomically
|
||||
// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
|
||||
bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero);
|
||||
|
||||
bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
|
||||
bool _mi_bitmap_is_any_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
|
||||
|
||||
#endif
|
940
src/bitmap.c
940
src/bitmap.c
|
@ -1,19 +1,12 @@
|
|||
/* ----------------------------------------------------------------------------
|
||||
Copyright (c) 2019-2023 Microsoft Research, Daan Leijen
|
||||
Copyright (c) 2019-2024 Microsoft Research, Daan Leijen
|
||||
This is free software; you can redistribute it and/or modify it under the
|
||||
terms of the MIT license. A copy of the license can be found in the file
|
||||
"LICENSE" at the root of this distribution.
|
||||
-----------------------------------------------------------------------------*/
|
||||
|
||||
/* ----------------------------------------------------------------------------
|
||||
Concurrent bitmap that can set/reset sequences of bits atomically,
|
||||
represented as an array of fields where each field is a machine word (`size_t`)
|
||||
|
||||
There are two api's; the standard one cannot have sequences that cross
|
||||
between the bitmap fields (and a sequence must be <= MI_BITMAP_FIELD_BITS).
|
||||
|
||||
The `_across` postfixed functions do allow sequences that can cross over
|
||||
between the fields. (This is used in arena allocation)
|
||||
Concurrent bitmap that can set/reset sequences of bits atomically
|
||||
---------------------------------------------------------------------------- */
|
||||
|
||||
#include "mimalloc.h"
|
||||
|
@ -21,399 +14,586 @@ between the fields. (This is used in arena allocation)
|
|||
#include "mimalloc/bits.h"
|
||||
#include "bitmap.h"
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
Bitmap definition
|
||||
----------------------------------------------------------- */
|
||||
/* --------------------------------------------------------------------------------
|
||||
bfields
|
||||
-------------------------------------------------------------------------------- */
|
||||
|
||||
// The bit mask for a given number of blocks at a specified bit index.
|
||||
static inline size_t mi_bitmap_mask_(size_t count, size_t bitidx) {
|
||||
mi_assert_internal(count + bitidx <= MI_BITMAP_FIELD_BITS);
|
||||
mi_assert_internal(count > 0);
|
||||
if (count >= MI_BITMAP_FIELD_BITS) return MI_BITMAP_FIELD_FULL;
|
||||
if (count == 0) return 0;
|
||||
return ((((size_t)1 << count) - 1) << bitidx);
|
||||
static inline size_t mi_bfield_ctz(mi_bfield_t x) {
|
||||
return mi_ctz(x);
|
||||
}
|
||||
|
||||
static inline size_t mi_bfield_clz(mi_bfield_t x) {
|
||||
return mi_clz(x);
|
||||
}
|
||||
|
||||
// find the least significant bit that is set (i.e. count trailing zero's)
|
||||
// return false if `x==0` (with `*idx` undefined) and true otherwise,
|
||||
// with the `idx` is set to the bit index (`0 <= *idx < MI_BFIELD_BITS`).
|
||||
static inline bool mi_bfield_find_least_bit(mi_bfield_t x, size_t* idx) {
|
||||
return mi_bsf(x,idx);
|
||||
}
|
||||
|
||||
static inline mi_bfield_t mi_bfield_rotate_right(mi_bfield_t x, size_t r) {
|
||||
return mi_rotr(x,r);
|
||||
}
|
||||
|
||||
// Set/clear a bit atomically. Returns `true` if the bit transitioned from 0 to 1 (or 1 to 0).
|
||||
static inline bool mi_bfield_atomic_xset(mi_bit_t set, _Atomic(mi_bfield_t)*b, size_t idx) {
|
||||
mi_assert_internal(idx < MI_BFIELD_BITS);
|
||||
const mi_bfield_t mask = ((mi_bfield_t)1)<<idx;
|
||||
if (set) {
|
||||
const mi_bfield_t old = mi_atomic(fetch_or_explicit)(b, mask, mi_memory_order(acq_rel));
|
||||
return ((old&mask) == 0);
|
||||
}
|
||||
else {
|
||||
mi_bfield_t old = mi_atomic(fetch_and_explicit)(b, ~mask, mi_memory_order(acq_rel));
|
||||
return ((old&mask) == mask);
|
||||
}
|
||||
}
|
||||
|
||||
// Set/clear a mask set of bits atomically, and return true of the mask bits transitioned from all 0's to 1's (or all 1's to 0's)
|
||||
// `already_xset` is true if all bits for the mask were already set/cleared.
|
||||
static bool mi_bfield_atomic_xset_mask(mi_bit_t set, _Atomic(mi_bfield_t)*b, mi_bfield_t mask, bool* already_xset) {
|
||||
mi_assert_internal(mask != 0);
|
||||
if (set) {
|
||||
mi_bfield_t old = *b;
|
||||
while (!mi_atomic_cas_weak_acq_rel(b, &old, old|mask)); // try to atomically set the mask bits until success
|
||||
*already_xset = ((old&mask) == mask);
|
||||
return ((old&mask) == 0);
|
||||
}
|
||||
else { // clear
|
||||
mi_bfield_t old = *b;
|
||||
while (!mi_atomic_cas_weak_acq_rel(b, &old, old&~mask)); // try to atomically clear the mask bits until success
|
||||
*already_xset = ((old&mask) == 0);
|
||||
return ((old&mask) == mask);
|
||||
}
|
||||
}
|
||||
|
||||
// Tries to set/clear a bit atomically, and returns true if the bit atomically transitioned from 0 to 1 (or 1 to 0)
|
||||
static bool mi_bfield_atomic_try_xset( mi_bit_t set, _Atomic(mi_bfield_t)*b, size_t idx) {
|
||||
mi_assert_internal(idx < MI_BFIELD_BITS);
|
||||
// for a single bit, we can always just set/clear and test afterwards if it was actually us that changed it first
|
||||
return mi_bfield_atomic_xset(set, b, idx);
|
||||
}
|
||||
|
||||
|
||||
// Tries to (un)set a mask atomically, and returns true if the mask bits atomically transitioned from 0 to mask (or mask to 0)
|
||||
// and false otherwise (leaving the bit field as is).
|
||||
static bool mi_bfield_atomic_try_xset_mask(mi_bit_t set, _Atomic(mi_bfield_t)* b, mi_bfield_t mask ) {
|
||||
mi_assert_internal(mask != 0);
|
||||
if (set) {
|
||||
mi_bfield_t old = *b;
|
||||
do {
|
||||
if ((old&mask) != 0) return false; // the mask bits are no longer 0
|
||||
} while (!mi_atomic_cas_weak_acq_rel(b, &old, old|mask)); // try to atomically set the mask bits
|
||||
return true;
|
||||
}
|
||||
else { // clear
|
||||
mi_bfield_t old = *b;
|
||||
do {
|
||||
if ((old&mask) != mask) return false; // the mask bits are no longer set
|
||||
} while (!mi_atomic_cas_weak_acq_rel(b, &old, old&~mask)); // try to atomically clear the mask bits
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
Claim a bit sequence atomically
|
||||
----------------------------------------------------------- */
|
||||
// Check if all bits corresponding to a mask are set/cleared.
|
||||
static bool mi_bfield_atomic_is_xset_mask(mi_bit_t set, _Atomic(mi_bfield_t)*b, mi_bfield_t mask) {
|
||||
mi_assert_internal(mask != 0);
|
||||
if (set) {
|
||||
return ((*b & mask) == mask);
|
||||
}
|
||||
else {
|
||||
return ((*b & mask) == 0);
|
||||
}
|
||||
}
|
||||
|
||||
// Try to atomically claim a sequence of `count` bits in a single
|
||||
// field at `idx` in `bitmap`. Returns `true` on success.
|
||||
bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx)
|
||||
{
|
||||
mi_assert_internal(bitmap_idx != NULL);
|
||||
mi_assert_internal(count <= MI_BITMAP_FIELD_BITS);
|
||||
mi_bitmap_field_t* field = &bitmap[idx];
|
||||
size_t map = mi_atomic_load_relaxed(field);
|
||||
if (map==MI_BITMAP_FIELD_FULL) return false; // short cut
|
||||
// Tries to set/clear a byte atomically, and returns true if the byte atomically transitioned from 0 to 0xFF (or 0xFF to 0)
|
||||
// and false otherwise (leaving the bit field as is).
|
||||
static bool mi_bfield_atomic_try_xset8(mi_bit_t set, _Atomic(mi_bfield_t)* b, size_t byte_idx ) {
|
||||
mi_assert_internal(byte_idx < MI_BFIELD_SIZE);
|
||||
const mi_bfield_t mask = ((mi_bfield_t)0xFF)<<(byte_idx*8);
|
||||
return mi_bfield_atomic_try_xset_mask(set,b,mask);
|
||||
}
|
||||
|
||||
// search for 0-bit sequence of length count
|
||||
const size_t mask = mi_bitmap_mask_(count, 0);
|
||||
const size_t bitidx_max = MI_BITMAP_FIELD_BITS - count;
|
||||
|
||||
#if MI_HAS_FAST_BITSCAN
|
||||
size_t bitidx = mi_ctz(~map); // quickly find the first zero bit if possible
|
||||
#else
|
||||
size_t bitidx = 0; // otherwise start at 0
|
||||
#endif
|
||||
size_t m = (mask << bitidx); // invariant: m == mask shifted by bitidx
|
||||
/* --------------------------------------------------------------------------------
|
||||
bitmap chunks
|
||||
-------------------------------------------------------------------------------- */
|
||||
|
||||
// scan linearly for a free range of zero bits
|
||||
while (bitidx <= bitidx_max) {
|
||||
const size_t mapm = (map & m);
|
||||
if (mapm == 0) { // are the mask bits free at bitidx?
|
||||
mi_assert_internal((m >> bitidx) == mask); // no overflow?
|
||||
const size_t newmap = (map | m);
|
||||
mi_assert_internal((newmap^map) >> bitidx == mask);
|
||||
if (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)) { // TODO: use weak cas here?
|
||||
// no success, another thread claimed concurrently.. keep going (with updated `map`)
|
||||
continue;
|
||||
static bool mi_bitmap_chunk_try_xset(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx ) {
|
||||
mi_assert_internal(cidx < MI_BITMAP_CHUNK_BITS);
|
||||
const size_t i = cidx / MI_BFIELD_BITS;
|
||||
const size_t idx = cidx % MI_BFIELD_BITS;
|
||||
return mi_bfield_atomic_try_xset( set, &chunk->bfields[i], idx);
|
||||
}
|
||||
|
||||
static bool mi_bitmap_chunk_try_xset8(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t byte_idx ) {
|
||||
mi_assert_internal(byte_idx*8 < MI_BITMAP_CHUNK_BITS);
|
||||
const size_t i = byte_idx / MI_BFIELD_SIZE;
|
||||
const size_t ibyte_idx = byte_idx % MI_BFIELD_SIZE;
|
||||
return mi_bfield_atomic_try_xset8( set, &chunk->bfields[i], ibyte_idx);
|
||||
}
|
||||
|
||||
// Set/clear a sequence of `n` bits within a chunk. Returns true if all bits transitioned from 0 to 1 (or 1 to 0)
|
||||
static bool mi_bitmap_chunk_xsetN(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx, size_t n, bool* palready_xset) {
|
||||
mi_assert_internal(cidx + n < MI_BITMAP_CHUNK_BITS);
|
||||
mi_assert_internal(n>0);
|
||||
bool all_transition = true;
|
||||
bool all_already_xset = true;
|
||||
size_t idx = cidx % MI_BFIELD_BITS;
|
||||
size_t field = cidx / MI_BFIELD_BITS;
|
||||
while (n > 0) {
|
||||
size_t m = MI_BFIELD_BITS - idx; // m is the bits to xset in this field
|
||||
if (m > n) { m = n; }
|
||||
mi_assert_internal(idx + m <= MI_BFIELD_BITS);
|
||||
mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS);
|
||||
const size_t mask = (m == MI_BFIELD_BITS ? ~MI_ZU(0) : ((MI_ZU(1)<<m)-1) << idx);
|
||||
bool already_xset;
|
||||
all_transition = all_transition && mi_bfield_atomic_xset_mask(set, &chunk->bfields[field], mask, &already_xset);
|
||||
all_already_xset = all_already_xset && already_xset;
|
||||
// next field
|
||||
field++;
|
||||
idx = 0;
|
||||
n -= m;
|
||||
}
|
||||
*palready_xset = all_already_xset;
|
||||
return all_transition;
|
||||
}
|
||||
|
||||
// Check if a sequence of `n` bits within a chunk are all set/cleared.
|
||||
static bool mi_bitmap_chunk_is_xsetN(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx, size_t n) {
|
||||
mi_assert_internal(cidx + n < MI_BITMAP_CHUNK_BITS);
|
||||
mi_assert_internal(n>0);
|
||||
bool all_xset = true;
|
||||
size_t idx = cidx % MI_BFIELD_BITS;
|
||||
size_t field = cidx / MI_BFIELD_BITS;
|
||||
while (n > 0) {
|
||||
size_t m = MI_BFIELD_BITS - idx; // m is the bits to xset in this field
|
||||
if (m > n) { m = n; }
|
||||
mi_assert_internal(idx + m <= MI_BFIELD_BITS);
|
||||
mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS);
|
||||
const size_t mask = (m == MI_BFIELD_BITS ? ~MI_ZU(0) : ((MI_ZU(1)<<m)-1) << idx);
|
||||
all_xset = all_xset && mi_bfield_atomic_is_xset_mask(set, &chunk->bfields[field], mask);
|
||||
// next field
|
||||
field++;
|
||||
idx = 0;
|
||||
n -= m;
|
||||
}
|
||||
return all_xset;
|
||||
}
|
||||
|
||||
// Try to atomically set/clear a sequence of `n` bits within a chunk. Returns true if all bits transitioned from 0 to 1 (or 1 to 0),
|
||||
// and false otherwise leaving all bit fields as is.
|
||||
static bool mi_bitmap_chunk_try_xsetN(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx, size_t n) {
|
||||
mi_assert_internal(cidx + n < MI_BITMAP_CHUNK_BITS);
|
||||
mi_assert_internal(n>0);
|
||||
if (n==0) return true;
|
||||
size_t start_idx = cidx % MI_BFIELD_BITS;
|
||||
size_t start_field = cidx / MI_BFIELD_BITS;
|
||||
size_t end_field = MI_BITMAP_CHUNK_FIELDS;
|
||||
size_t mask_mid = 0;
|
||||
size_t mask_end = 0;
|
||||
|
||||
// first field
|
||||
size_t field = start_field;
|
||||
size_t m = MI_BFIELD_BITS - start_idx; // m is the bits to xset in this field
|
||||
if (m > n) { m = n; }
|
||||
mi_assert_internal(start_idx + m <= MI_BFIELD_BITS);
|
||||
mi_assert_internal(start_field < MI_BITMAP_CHUNK_FIELDS);
|
||||
const size_t mask_start = (m == MI_BFIELD_BITS ? ~MI_ZU(0) : ((MI_ZU(1)<<m)-1) << start_idx);
|
||||
if (!mi_bfield_atomic_try_xset_mask(set, &chunk->bfields[field], mask_start)) return false;
|
||||
|
||||
// done?
|
||||
n -= m;
|
||||
if (n==0) return true;
|
||||
|
||||
// continue with mid fields and last field: if these fail we need to recover by unsetting previous fields
|
||||
|
||||
// mid fields
|
||||
while (n >= MI_BFIELD_BITS) {
|
||||
field++;
|
||||
mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS);
|
||||
mask_mid = ~MI_ZU(0);
|
||||
if (!mi_bfield_atomic_try_xset_mask(set, &chunk->bfields[field], mask_mid)) goto restore;
|
||||
n -= MI_BFIELD_BITS;
|
||||
}
|
||||
|
||||
// last field
|
||||
if (n > 0) {
|
||||
mi_assert_internal(n < MI_BFIELD_BITS);
|
||||
field++;
|
||||
mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS);
|
||||
end_field = field;
|
||||
mask_end = (MI_ZU(1)<<n)-1;
|
||||
if (!mi_bfield_atomic_try_xset_mask(set, &chunk->bfields[field], mask_end)) goto restore;
|
||||
}
|
||||
|
||||
return true;
|
||||
|
||||
restore:
|
||||
// field is on the field that failed to set atomically; we need to restore all previous fields
|
||||
mi_assert_internal(field > start_field);
|
||||
while( field > start_field) {
|
||||
field--;
|
||||
const size_t mask = (field == start_field ? mask_start : (field == end_field ? mask_end : mask_mid));
|
||||
bool already_xset;
|
||||
mi_bfield_atomic_xset_mask(!set, &chunk->bfields[field], mask, &already_xset);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
// find least 1-bit in a chunk and try unset it atomically
|
||||
// set `*pidx` to thi bit index (0 <= *pidx < MI_BITMAP_CHUNK_BITS) on success.
|
||||
// todo: try neon version
|
||||
static inline bool mi_bitmap_chunk_find_and_try_clear(mi_bitmap_chunk_t* chunk, size_t* pidx) {
|
||||
#if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256)
|
||||
while(true) {
|
||||
const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
|
||||
if (_mm256_testz_si256(vec,vec)) return false; // vec == 0 ?
|
||||
const __m256i vcmp = _mm256_cmpeq_epi64(vec, _mm256_setzero_si256()); // (elem64 == 0 ? -1 : 0)
|
||||
const uint32_t mask = ~_mm256_movemask_epi8(vcmp); // mask of most significant bit of each byte (so each 8 bits in the mask will be all 1 or all 0)
|
||||
mi_assert_internal(mask != 0);
|
||||
const size_t chunk_idx = _tzcnt_u32(mask) / 8; // tzcnt == 0, 8, 16, or 24
|
||||
mi_assert_internal(chunk_idx < MI_BITMAP_CHUNK_FIELDS);
|
||||
size_t cidx;
|
||||
if (mi_bfield_find_least_bit(chunk->bfields[chunk_idx],&cidx)) { // find the bit that is set
|
||||
if mi_likely(mi_bfield_atomic_try_xset(MI_BIT_CLEAR,&chunk->bfields[chunk_idx], cidx)) { // unset atomically
|
||||
*pidx = (chunk_idx*MI_BFIELD_BITS) + cidx;
|
||||
mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
// try again
|
||||
}
|
||||
#else
|
||||
size_t idx;
|
||||
for(int i = 0; i < MI_BITMAP_CHUNK_FIELDS; i++) {
|
||||
size_t idx;
|
||||
if mi_unlikely(mi_bfield_find_least_bit(chunk->bfields[i],&idx)) { // find least 1-bit
|
||||
if mi_likely(mi_bfield_atomic_try_xset(MI_BIT_CLEAR,&chunk->bfields[i],idx)) { // try unset atomically
|
||||
*pidx = (i*MI_BFIELD_BITS + idx);
|
||||
mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
// find least byte in a chunk with all bits set, and try unset it atomically
|
||||
// set `*pidx` to its bit index (0 <= *pidx < MI_BITMAP_CHUNK_BITS) on success.
|
||||
// todo: try neon version
|
||||
static inline bool mi_bitmap_chunk_find_and_try_clear8(mi_bitmap_chunk_t* chunk, size_t* pidx) {
|
||||
#if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256)
|
||||
while(true) {
|
||||
const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
|
||||
const __m256i vcmp = _mm256_cmpeq_epi8(vec, _mm256_set1_epi64x(~0)); // (byte == ~0 ? -1 : 0)
|
||||
const uint32_t mask = _mm256_movemask_epi8(vcmp); // mask of most significant bit of each byte
|
||||
if (mask == 0) return false;
|
||||
const size_t i = _tzcnt_u32(mask);
|
||||
mi_assert_internal(8*i < MI_BITMAP_CHUNK_BITS);
|
||||
const size_t chunk_idx = i / MI_BFIELD_SIZE;
|
||||
const size_t byte_idx = i % MI_BFIELD_SIZE;
|
||||
if mi_likely(mi_bfield_atomic_try_xset8(MI_BIT_CLEAR,&chunk->bfields[chunk_idx],byte_idx)) { // try to unset atomically
|
||||
*pidx = (chunk_idx*MI_BFIELD_BITS) + (byte_idx*8);
|
||||
mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS);
|
||||
return true;
|
||||
}
|
||||
// try again
|
||||
}
|
||||
#else
|
||||
size_t idx;
|
||||
for(int i = 0; i < MI_BITMAP_CHUNK_FIELDS; i++) {
|
||||
const mi_bfield_t x = chunk->bfields[i];
|
||||
// has_set8 has low bit in each byte set if the byte in x == 0xFF
|
||||
const mi_bfield_t has_set8 = ((~x - MI_BFIELD_LO_BIT8) & // high bit set if byte in x is 0xFF or < 0x7F
|
||||
(x & MI_BFIELD_HI_BIT8)) // high bit set if byte in x is >= 0x80
|
||||
>> 7; // shift high bit to low bit
|
||||
size_t idx;
|
||||
if mi_unlikely(mi_bfield_find_least_bit(has_set8,&idx)) { // find least 1-bit
|
||||
mi_assert_internal(idx <= (MI_BFIELD_BITS - 8));
|
||||
mi_assert_internal((idx%8)==0);
|
||||
const size_t byte_idx = idx/8;
|
||||
if mi_likely(mi_bfield_atomic_try_xset8(MI_BIT_CLEAR,&chunk->bfields[i],byte_idx)) { // unset the byte atomically
|
||||
*pidx = (i*MI_BFIELD_BITS) + idx;
|
||||
mi_assert_internal(*pidx + 8 <= MI_BITMAP_CHUNK_BITS);
|
||||
return true;
|
||||
}
|
||||
// else continue
|
||||
}
|
||||
}
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
// find a sequence of `n` bits in a chunk with all `n` bits set, and try unset it atomically
|
||||
// set `*pidx` to its bit index (0 <= *pidx <= MI_BITMAP_CHUNK_BITS - n) on success.
|
||||
// todo: try avx2 and neon version
|
||||
// todo: allow spanning across bfield boundaries?
|
||||
static inline bool mi_bitmap_chunk_find_and_try_clearN(mi_bitmap_chunk_t* chunk, size_t n, size_t* pidx) {
|
||||
if (n == 0 || n > MI_BFIELD_BITS) return false; // TODO: allow larger?
|
||||
const mi_bfield_t mask = (n==MI_BFIELD_BITS ? ~((mi_bfield_t)0) : (((mi_bfield_t)1) << n)-1);
|
||||
for(int i = 0; i < MI_BITMAP_CHUNK_FIELDS; i++) {
|
||||
mi_bfield_t b = chunk->bfields[i];
|
||||
size_t bshift = 0;
|
||||
size_t idx;
|
||||
while (mi_bfield_find_least_bit(b, &idx)) { // find least 1-bit
|
||||
b >>= idx;
|
||||
bshift += idx;
|
||||
if (bshift + n >= MI_BFIELD_BITS) break;
|
||||
|
||||
if ((b&mask) == mask) { // found a match
|
||||
mi_assert_internal( ((mask << bshift) >> bshift) == mask );
|
||||
if mi_likely(mi_bfield_atomic_try_xset_mask(MI_BIT_CLEAR,&chunk->bfields[i],mask<<bshift)) {
|
||||
*pidx = (i*MI_BFIELD_BITS) + bshift;
|
||||
mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS);
|
||||
mi_assert_internal(*pidx + n <= MI_BITMAP_CHUNK_BITS);
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
// if failed to atomically commit, try again from this position
|
||||
b = (chunk->bfields[i] >> bshift);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// success, we claimed the bits!
|
||||
*bitmap_idx = mi_bitmap_index_create(idx, bitidx);
|
||||
return true;
|
||||
// advance
|
||||
const size_t ones = mi_bfield_ctz(~b); // skip all ones (since it didn't fit the mask)
|
||||
mi_assert_internal(ones>0);
|
||||
bshift += ones;
|
||||
b >>= ones;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
// are all bits in a bitmap chunk set?
|
||||
static bool mi_bitmap_chunk_all_are_set(mi_bitmap_chunk_t* chunk) {
|
||||
#if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256)
|
||||
const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
|
||||
return _mm256_test_all_ones(vec);
|
||||
#else
|
||||
// written like this for vectorization
|
||||
mi_bfield_t x = chunk->bfields[0];
|
||||
for(int i = 1; i < MI_BITMAP_CHUNK_FIELDS; i++) {
|
||||
x = x & chunk->bfields[i];
|
||||
}
|
||||
return (~x == 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
// are all bits in a bitmap chunk clear?
|
||||
static bool mi_bitmap_chunk_all_are_clear(mi_bitmap_chunk_t* chunk) {
|
||||
#if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256)
|
||||
const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
|
||||
return _mm256_testz_si256( vec, vec );
|
||||
#else
|
||||
// written like this for vectorization
|
||||
mi_bfield_t x = chunk->bfields[0];
|
||||
for(int i = 1; i < MI_BITMAP_CHUNK_FIELDS; i++) {
|
||||
x = x | chunk->bfields[i];
|
||||
}
|
||||
return (x == 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------------------
|
||||
bitmap
|
||||
-------------------------------------------------------------------------------- */
|
||||
// initialize a bitmap to all unset; avoid a mem_zero if `already_zero` is true
|
||||
void mi_bitmap_init(mi_bitmap_t* bitmap, bool already_zero) {
|
||||
if (!already_zero) {
|
||||
_mi_memzero_aligned(bitmap, sizeof(*bitmap));
|
||||
}
|
||||
}
|
||||
|
||||
// Set/clear a sequence of `n` bits in the bitmap (and can cross chunks). Not atomic so only use if local to a thread.
|
||||
void mi_bitmap_unsafe_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) {
|
||||
mi_assert_internal(n>0);
|
||||
mi_assert_internal(idx + n<=MI_BITMAP_MAX_BITS);
|
||||
|
||||
// first chunk
|
||||
size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
|
||||
const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
|
||||
size_t m = MI_BITMAP_CHUNK_BITS - cidx;
|
||||
if (m > n) { m = n; }
|
||||
bool already_xset;
|
||||
mi_bitmap_chunk_xsetN(set, &bitmap->chunks[chunk_idx], cidx, m, &already_xset);
|
||||
|
||||
// n can be large so use memset for efficiency for all in-between chunks
|
||||
chunk_idx++;
|
||||
n -= m;
|
||||
const size_t mid_chunks = n / MI_BITMAP_CHUNK_BITS;
|
||||
if (mid_chunks > 0) {
|
||||
_mi_memset(&bitmap->chunks[chunk_idx], (set ? ~0 : 0), MI_BITMAP_CHUNK_BITS/8);
|
||||
chunk_idx += mid_chunks;
|
||||
n -= mid_chunks * MI_BITMAP_CHUNK_BITS;
|
||||
}
|
||||
|
||||
// last chunk
|
||||
if (n > 0) {
|
||||
mi_assert_internal(n < MI_BITMAP_CHUNK_BITS);
|
||||
mi_assert_internal(chunk_idx < MI_BITMAP_CHUNK_FIELDS);
|
||||
mi_bitmap_chunk_xsetN(set, &bitmap->chunks[chunk_idx], 0, n, &already_xset);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Try to set/clear a bit in the bitmap; returns `true` if atomically transitioned from 0 to 1 (or 1 to 0),
|
||||
// and false otherwise leaving the bitmask as is.
|
||||
bool mi_bitmap_try_xset(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx) {
|
||||
mi_assert_internal(idx < MI_BITMAP_MAX_BITS);
|
||||
const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
|
||||
const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
|
||||
return mi_bitmap_chunk_try_xset( set, &bitmap->chunks[chunk_idx], cidx);
|
||||
}
|
||||
|
||||
// Try to set/clear a byte in the bitmap; returns `true` if atomically transitioned from 0 to 0xFF (or 0xFF to 0)
|
||||
// and false otherwise leaving the bitmask as is.
|
||||
bool mi_bitmap_try_xset8(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx) {
|
||||
mi_assert_internal(idx < MI_BITMAP_MAX_BITS);
|
||||
mi_assert_internal(idx%8 == 0);
|
||||
const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
|
||||
const size_t byte_idx = (idx % MI_BITMAP_CHUNK_BITS)/8;
|
||||
return mi_bitmap_chunk_try_xset8( set, &bitmap->chunks[chunk_idx],byte_idx);
|
||||
}
|
||||
|
||||
// Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's)
|
||||
// and false otherwise leaving the bitmask as is.
|
||||
// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)!
|
||||
bool mi_bitmap_try_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) {
|
||||
mi_assert_internal(n>0);
|
||||
mi_assert_internal(n<=MI_BITMAP_CHUNK_BITS);
|
||||
if (n==1) { return mi_bitmap_try_xset(set,bitmap,idx); }
|
||||
if (n==8) { return mi_bitmap_try_xset8(set,bitmap,idx); }
|
||||
|
||||
mi_assert_internal(idx + n <= MI_BITMAP_MAX_BITS);
|
||||
const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
|
||||
const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
|
||||
mi_assert_internal(cidx + n <= MI_BITMAP_CHUNK_BITS); // don't cross chunks (for now)
|
||||
if (cidx + n > MI_BITMAP_CHUNK_BITS) { n = MI_BITMAP_CHUNK_BITS - cidx; } // paranoia
|
||||
return mi_bitmap_chunk_try_xsetN( set, &bitmap->chunks[chunk_idx], cidx, n);
|
||||
}
|
||||
|
||||
// Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's).
|
||||
// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)!
|
||||
bool mi_bitmap_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n, bool* already_xset) {
|
||||
mi_assert_internal(n>0);
|
||||
mi_assert_internal(n<=MI_BITMAP_CHUNK_BITS);
|
||||
bool local_already_xset;
|
||||
if (already_xset==NULL) { already_xset = &local_already_xset; }
|
||||
// if (n==1) { return mi_bitmap_xset(set, bitmap, idx); }
|
||||
// if (n==8) { return mi_bitmap_xset8(set, bitmap, idx); }
|
||||
|
||||
mi_assert_internal(idx + n <= MI_BITMAP_MAX_BITS);
|
||||
const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
|
||||
const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
|
||||
mi_assert_internal(cidx + n <= MI_BITMAP_CHUNK_BITS); // don't cross chunks (for now)
|
||||
if (cidx + n > MI_BITMAP_CHUNK_BITS) { n = MI_BITMAP_CHUNK_BITS - cidx; } // paranoia
|
||||
return mi_bitmap_chunk_xsetN(set, &bitmap->chunks[chunk_idx], cidx, n, already_xset);
|
||||
}
|
||||
|
||||
// Is a sequence of n bits already all set/cleared?
|
||||
bool mi_bitmap_is_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) {
|
||||
mi_assert_internal(n>0);
|
||||
mi_assert_internal(n<=MI_BITMAP_CHUNK_BITS);
|
||||
mi_assert_internal(idx + n <= MI_BITMAP_MAX_BITS);
|
||||
const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
|
||||
const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
|
||||
mi_assert_internal(cidx + n <= MI_BITMAP_CHUNK_BITS); // don't cross chunks (for now)
|
||||
if (cidx + n > MI_BITMAP_CHUNK_BITS) { n = MI_BITMAP_CHUNK_BITS - cidx; } // paranoia
|
||||
return mi_bitmap_chunk_is_xsetN(set, &bitmap->chunks[chunk_idx], cidx, n);
|
||||
}
|
||||
|
||||
|
||||
#define mi_bitmap_forall_set_chunks(bitmap,start,decl_chunk_idx) \
|
||||
{ size_t _set_idx; \
|
||||
size_t _start = start % MI_BFIELD_BITS; \
|
||||
mi_bfield_t _any_set = mi_bfield_rotate_right(bitmap->any_set, _start); \
|
||||
while (mi_bfield_find_least_bit(_any_set,&_set_idx)) { \
|
||||
decl_chunk_idx = (_set_idx + _start) % MI_BFIELD_BITS;
|
||||
|
||||
#define mi_bitmap_forall_set_chunks_end() \
|
||||
_start += _set_idx+1; /* so chunk_idx stays valid */ \
|
||||
_any_set >>= _set_idx; /* skip scanned bits (and avoid UB with (idx+1)) */ \
|
||||
_any_set >>= 1; \
|
||||
} \
|
||||
}
|
||||
|
||||
// Find a set bit in a bitmap and atomically unset it. Returns true on success,
|
||||
// and in that case sets the index: `0 <= *pidx < MI_BITMAP_MAX_BITS`.
|
||||
// The low `MI_BFIELD_BITS` of start are used to set the start point of the search
|
||||
// (to reduce thread contention).
|
||||
bool mi_bitmap_try_find_and_clear(mi_bitmap_t* bitmap, size_t* pidx, size_t start) {
|
||||
mi_bitmap_forall_set_chunks(bitmap,start,size_t chunk_idx)
|
||||
{
|
||||
size_t cidx;
|
||||
if mi_likely(mi_bitmap_chunk_find_and_try_clear(&bitmap->chunks[chunk_idx],&cidx)) {
|
||||
*pidx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx;
|
||||
mi_assert_internal(*pidx < MI_BITMAP_MAX_BITS);
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
// on to the next bit range
|
||||
#if MI_HAS_FAST_BITSCAN
|
||||
mi_assert_internal(mapm != 0);
|
||||
const size_t shift = (count == 1 ? 1 : (MI_INTPTR_BITS - mi_clz(mapm) - bitidx));
|
||||
mi_assert_internal(shift > 0 && shift <= count);
|
||||
#else
|
||||
const size_t shift = 1;
|
||||
#endif
|
||||
bitidx += shift;
|
||||
m <<= shift;
|
||||
}
|
||||
}
|
||||
// no bits found
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
// Starts at idx, and wraps around to search in all `bitmap_fields` fields.
|
||||
// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never cross fields.
|
||||
bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx) {
|
||||
size_t idx = start_field_idx;
|
||||
for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) {
|
||||
if (idx >= bitmap_fields) { idx = 0; } // wrap
|
||||
if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
// Set `count` bits at `bitmap_idx` to 0 atomically
|
||||
// Returns `true` if all `count` bits were 1 previously.
|
||||
bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
|
||||
const size_t idx = mi_bitmap_index_field(bitmap_idx);
|
||||
const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
|
||||
const size_t mask = mi_bitmap_mask_(count, bitidx);
|
||||
mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
|
||||
// mi_assert_internal((bitmap[idx] & mask) == mask);
|
||||
const size_t prev = mi_atomic_and_acq_rel(&bitmap[idx], ~mask);
|
||||
return ((prev & mask) == mask);
|
||||
}
|
||||
|
||||
|
||||
// Set `count` bits at `bitmap_idx` to 1 atomically
|
||||
// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
|
||||
bool _mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_zero) {
|
||||
const size_t idx = mi_bitmap_index_field(bitmap_idx);
|
||||
const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
|
||||
const size_t mask = mi_bitmap_mask_(count, bitidx);
|
||||
mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
|
||||
//mi_assert_internal(any_zero != NULL || (bitmap[idx] & mask) == 0);
|
||||
size_t prev = mi_atomic_or_acq_rel(&bitmap[idx], mask);
|
||||
if (any_zero != NULL) { *any_zero = ((prev & mask) != mask); }
|
||||
return ((prev & mask) == 0);
|
||||
}
|
||||
|
||||
// Returns `true` if all `count` bits were 1. `any_ones` is `true` if there was at least one bit set to one.
|
||||
static bool mi_bitmap_is_claimedx(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_ones) {
|
||||
const size_t idx = mi_bitmap_index_field(bitmap_idx);
|
||||
const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
|
||||
const size_t mask = mi_bitmap_mask_(count, bitidx);
|
||||
mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
|
||||
const size_t field = mi_atomic_load_relaxed(&bitmap[idx]);
|
||||
if (any_ones != NULL) { *any_ones = ((field & mask) != 0); }
|
||||
return ((field & mask) == mask);
|
||||
}
|
||||
|
||||
// Try to set `count` bits at `bitmap_idx` from 0 to 1 atomically.
|
||||
// Returns `true` if successful when all previous `count` bits were 0.
|
||||
bool _mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
|
||||
const size_t idx = mi_bitmap_index_field(bitmap_idx);
|
||||
const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
|
||||
const size_t mask = mi_bitmap_mask_(count, bitidx);
|
||||
mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
|
||||
size_t expected = mi_atomic_load_relaxed(&bitmap[idx]);
|
||||
do {
|
||||
if ((expected & mask) != 0) return false;
|
||||
}
|
||||
while (!mi_atomic_cas_strong_acq_rel(&bitmap[idx], &expected, expected | mask));
|
||||
mi_assert_internal((expected & mask) == 0);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool _mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
|
||||
return mi_bitmap_is_claimedx(bitmap, bitmap_fields, count, bitmap_idx, NULL);
|
||||
}
|
||||
|
||||
bool _mi_bitmap_is_any_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
|
||||
bool any_ones;
|
||||
mi_bitmap_is_claimedx(bitmap, bitmap_fields, count, bitmap_idx, &any_ones);
|
||||
return any_ones;
|
||||
}
|
||||
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
// the `_across` functions work on bitmaps where sequences can cross over
|
||||
// between the fields. This is used in arena allocation
|
||||
//--------------------------------------------------------------------------
|
||||
|
||||
// Try to atomically claim a sequence of `count` bits starting from the field
|
||||
// at `idx` in `bitmap` and crossing into subsequent fields. Returns `true` on success.
|
||||
// Only needs to consider crossing into the next fields (see `mi_bitmap_try_find_from_claim_across`)
|
||||
static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t idx, const size_t count, const size_t retries, mi_bitmap_index_t* bitmap_idx, mi_stats_t* stats)
|
||||
{
|
||||
mi_assert_internal(bitmap_idx != NULL);
|
||||
|
||||
// check initial trailing zeros
|
||||
mi_bitmap_field_t* field = &bitmap[idx];
|
||||
size_t map = mi_atomic_load_relaxed(field);
|
||||
const size_t initial = mi_clz(map); // count of initial zeros starting at idx
|
||||
mi_assert_internal(initial <= MI_BITMAP_FIELD_BITS);
|
||||
if (initial == 0) return false;
|
||||
if (initial >= count) return _mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx); // no need to cross fields (this case won't happen for us)
|
||||
if (_mi_divide_up(count - initial, MI_BITMAP_FIELD_BITS) >= (bitmap_fields - idx)) return false; // not enough entries
|
||||
|
||||
// scan ahead
|
||||
size_t found = initial;
|
||||
size_t mask = 0; // mask bits for the final field
|
||||
while(found < count) {
|
||||
field++;
|
||||
map = mi_atomic_load_relaxed(field);
|
||||
const size_t mask_bits = (found + MI_BITMAP_FIELD_BITS <= count ? MI_BITMAP_FIELD_BITS : (count - found));
|
||||
mi_assert_internal(mask_bits > 0 && mask_bits <= MI_BITMAP_FIELD_BITS);
|
||||
mask = mi_bitmap_mask_(mask_bits, 0);
|
||||
if ((map & mask) != 0) return false; // some part is already claimed
|
||||
found += mask_bits;
|
||||
}
|
||||
mi_assert_internal(field < &bitmap[bitmap_fields]);
|
||||
|
||||
// we found a range of contiguous zeros up to the final field; mask contains mask in the final field
|
||||
// now try to claim the range atomically
|
||||
mi_bitmap_field_t* const final_field = field;
|
||||
const size_t final_mask = mask;
|
||||
mi_bitmap_field_t* const initial_field = &bitmap[idx];
|
||||
const size_t initial_idx = MI_BITMAP_FIELD_BITS - initial;
|
||||
const size_t initial_mask = mi_bitmap_mask_(initial, initial_idx);
|
||||
|
||||
// initial field
|
||||
size_t newmap;
|
||||
field = initial_field;
|
||||
map = mi_atomic_load_relaxed(field);
|
||||
do {
|
||||
newmap = (map | initial_mask);
|
||||
if ((map & initial_mask) != 0) { goto rollback; };
|
||||
} while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap));
|
||||
|
||||
// intermediate fields
|
||||
while (++field < final_field) {
|
||||
newmap = mi_bitmap_mask_(MI_BITMAP_FIELD_BITS, 0);
|
||||
map = 0;
|
||||
if (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)) { goto rollback; }
|
||||
}
|
||||
|
||||
// final field
|
||||
mi_assert_internal(field == final_field);
|
||||
map = mi_atomic_load_relaxed(field);
|
||||
do {
|
||||
newmap = (map | final_mask);
|
||||
if ((map & final_mask) != 0) { goto rollback; }
|
||||
} while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap));
|
||||
|
||||
// claimed!
|
||||
mi_stat_counter_increase(stats->arena_crossover_count,1);
|
||||
*bitmap_idx = mi_bitmap_index_create(idx, initial_idx);
|
||||
return true;
|
||||
|
||||
rollback:
|
||||
// roll back intermediate fields
|
||||
// (we just failed to claim `field` so decrement first)
|
||||
while (--field > initial_field) {
|
||||
newmap = 0;
|
||||
map = mi_bitmap_mask_(MI_BITMAP_FIELD_BITS, 0);
|
||||
mi_assert_internal(mi_atomic_load_relaxed(field) == map);
|
||||
mi_atomic_store_release(field, newmap);
|
||||
}
|
||||
if (field == initial_field) { // (if we failed on the initial field, `field + 1 == initial_field`)
|
||||
map = mi_atomic_load_relaxed(field);
|
||||
do {
|
||||
mi_assert_internal((map & initial_mask) == initial_mask);
|
||||
newmap = (map & ~initial_mask);
|
||||
} while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap));
|
||||
}
|
||||
mi_stat_counter_increase(stats->arena_rollback_count,1);
|
||||
// retry? (we make a recursive call instead of goto to be able to use const declarations)
|
||||
if (retries <= 2) {
|
||||
return mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, retries+1, bitmap_idx, stats);
|
||||
}
|
||||
else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Find `count` bits of zeros and set them to 1 atomically; returns `true` on success.
|
||||
// Starts at idx, and wraps around to search in all `bitmap_fields` fields.
|
||||
bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx, mi_stats_t* stats) {
|
||||
mi_assert_internal(count > 0);
|
||||
if (count <= 2) {
|
||||
// we don't bother with crossover fields for small counts
|
||||
return _mi_bitmap_try_find_from_claim(bitmap, bitmap_fields, start_field_idx, count, bitmap_idx);
|
||||
}
|
||||
|
||||
// visit the fields
|
||||
size_t idx = start_field_idx;
|
||||
for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) {
|
||||
if (idx >= bitmap_fields) { idx = 0; } // wrap
|
||||
// first try to claim inside a field
|
||||
/*
|
||||
if (count <= MI_BITMAP_FIELD_BITS) {
|
||||
if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) {
|
||||
return true;
|
||||
// we may find that all are unset only on a second iteration but that is ok as
|
||||
// _any_set is a conservative approximation.
|
||||
if (mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) {
|
||||
mi_bfield_atomic_xset(MI_BIT_CLEAR,&bitmap->any_set,chunk_idx);
|
||||
}
|
||||
}
|
||||
*/
|
||||
// if that fails, then try to claim across fields
|
||||
if (mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, 0, bitmap_idx, stats)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
mi_bitmap_forall_set_chunks_end();
|
||||
return false;
|
||||
}
|
||||
|
||||
// Helper for masks across fields; returns the mid count, post_mask may be 0
|
||||
static size_t mi_bitmap_mask_across(mi_bitmap_index_t bitmap_idx, size_t bitmap_fields, size_t count, size_t* pre_mask, size_t* mid_mask, size_t* post_mask) {
|
||||
MI_UNUSED(bitmap_fields);
|
||||
const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
|
||||
if mi_likely(bitidx + count <= MI_BITMAP_FIELD_BITS) {
|
||||
*pre_mask = mi_bitmap_mask_(count, bitidx);
|
||||
*mid_mask = 0;
|
||||
*post_mask = 0;
|
||||
mi_assert_internal(mi_bitmap_index_field(bitmap_idx) < bitmap_fields);
|
||||
return 0;
|
||||
}
|
||||
else {
|
||||
const size_t pre_bits = MI_BITMAP_FIELD_BITS - bitidx;
|
||||
mi_assert_internal(pre_bits < count);
|
||||
*pre_mask = mi_bitmap_mask_(pre_bits, bitidx);
|
||||
count -= pre_bits;
|
||||
const size_t mid_count = (count / MI_BITMAP_FIELD_BITS);
|
||||
*mid_mask = MI_BITMAP_FIELD_FULL;
|
||||
count %= MI_BITMAP_FIELD_BITS;
|
||||
*post_mask = (count==0 ? 0 : mi_bitmap_mask_(count, 0));
|
||||
mi_assert_internal(mi_bitmap_index_field(bitmap_idx) + mid_count + (count==0 ? 0 : 1) < bitmap_fields);
|
||||
return mid_count;
|
||||
|
||||
// Find a byte in the bitmap with all bits set (0xFF) and atomically unset it to zero.
|
||||
// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-8`.
|
||||
bool mi_bitmap_try_find_and_clear8(mi_bitmap_t* bitmap, size_t start, size_t* pidx ) {
|
||||
mi_bitmap_forall_set_chunks(bitmap,start,size_t chunk_idx)
|
||||
{
|
||||
size_t cidx;
|
||||
if mi_likely(mi_bitmap_chunk_find_and_try_clear8(&bitmap->chunks[chunk_idx],&cidx)) {
|
||||
*pidx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx;
|
||||
mi_assert_internal(*pidx <= MI_BITMAP_MAX_BITS-8);
|
||||
mi_assert_internal((*pidx % 8) == 0);
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
if (mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) {
|
||||
mi_bfield_atomic_xset(MI_BIT_CLEAR,&bitmap->any_set,chunk_idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
mi_bitmap_forall_set_chunks_end();
|
||||
return false;
|
||||
}
|
||||
|
||||
// Set `count` bits at `bitmap_idx` to 0 atomically
|
||||
// Returns `true` if all `count` bits were 1 previously.
|
||||
bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
|
||||
size_t idx = mi_bitmap_index_field(bitmap_idx);
|
||||
size_t pre_mask;
|
||||
size_t mid_mask;
|
||||
size_t post_mask;
|
||||
size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask);
|
||||
bool all_one = true;
|
||||
mi_bitmap_field_t* field = &bitmap[idx];
|
||||
size_t prev = mi_atomic_and_acq_rel(field++, ~pre_mask); // clear first part
|
||||
if ((prev & pre_mask) != pre_mask) all_one = false;
|
||||
while(mid_count-- > 0) {
|
||||
prev = mi_atomic_and_acq_rel(field++, ~mid_mask); // clear mid part
|
||||
if ((prev & mid_mask) != mid_mask) all_one = false;
|
||||
// Find a sequence of `n` bits in the bitmap with all bits set, and atomically unset all.
|
||||
// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-n`.
|
||||
bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t start, size_t n, size_t* pidx ) {
|
||||
// TODO: allow at least MI_BITMAP_CHUNK_BITS and probably larger
|
||||
// TODO: allow spanning across chunk boundaries
|
||||
if (n == 0 || n > MI_BFIELD_BITS) return false;
|
||||
mi_bitmap_forall_set_chunks(bitmap,start,size_t chunk_idx)
|
||||
{
|
||||
size_t cidx;
|
||||
if mi_likely(mi_bitmap_chunk_find_and_try_clearN(&bitmap->chunks[chunk_idx],n,&cidx)) {
|
||||
*pidx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx;
|
||||
mi_assert_internal(*pidx <= MI_BITMAP_MAX_BITS-n);
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
if (mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) {
|
||||
mi_bfield_atomic_xset(MI_BIT_CLEAR,&bitmap->any_set,chunk_idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (post_mask!=0) {
|
||||
prev = mi_atomic_and_acq_rel(field, ~post_mask); // clear end part
|
||||
if ((prev & post_mask) != post_mask) all_one = false;
|
||||
}
|
||||
return all_one;
|
||||
}
|
||||
|
||||
// Set `count` bits at `bitmap_idx` to 1 atomically
|
||||
// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
|
||||
bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero) {
|
||||
size_t idx = mi_bitmap_index_field(bitmap_idx);
|
||||
size_t pre_mask;
|
||||
size_t mid_mask;
|
||||
size_t post_mask;
|
||||
size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask);
|
||||
bool all_zero = true;
|
||||
bool any_zero = false;
|
||||
_Atomic(size_t)*field = &bitmap[idx];
|
||||
size_t prev = mi_atomic_or_acq_rel(field++, pre_mask);
|
||||
if ((prev & pre_mask) != 0) all_zero = false;
|
||||
if ((prev & pre_mask) != pre_mask) any_zero = true;
|
||||
while (mid_count-- > 0) {
|
||||
prev = mi_atomic_or_acq_rel(field++, mid_mask);
|
||||
if ((prev & mid_mask) != 0) all_zero = false;
|
||||
if ((prev & mid_mask) != mid_mask) any_zero = true;
|
||||
}
|
||||
if (post_mask!=0) {
|
||||
prev = mi_atomic_or_acq_rel(field, post_mask);
|
||||
if ((prev & post_mask) != 0) all_zero = false;
|
||||
if ((prev & post_mask) != post_mask) any_zero = true;
|
||||
}
|
||||
if (pany_zero != NULL) { *pany_zero = any_zero; }
|
||||
return all_zero;
|
||||
}
|
||||
|
||||
|
||||
// Returns `true` if all `count` bits were 1.
|
||||
// `any_ones` is `true` if there was at least one bit set to one.
|
||||
static bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_ones) {
|
||||
size_t idx = mi_bitmap_index_field(bitmap_idx);
|
||||
size_t pre_mask;
|
||||
size_t mid_mask;
|
||||
size_t post_mask;
|
||||
size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask);
|
||||
bool all_ones = true;
|
||||
bool any_ones = false;
|
||||
mi_bitmap_field_t* field = &bitmap[idx];
|
||||
size_t prev = mi_atomic_load_relaxed(field++);
|
||||
if ((prev & pre_mask) != pre_mask) all_ones = false;
|
||||
if ((prev & pre_mask) != 0) any_ones = true;
|
||||
while (mid_count-- > 0) {
|
||||
prev = mi_atomic_load_relaxed(field++);
|
||||
if ((prev & mid_mask) != mid_mask) all_ones = false;
|
||||
if ((prev & mid_mask) != 0) any_ones = true;
|
||||
}
|
||||
if (post_mask!=0) {
|
||||
prev = mi_atomic_load_relaxed(field);
|
||||
if ((prev & post_mask) != post_mask) all_ones = false;
|
||||
if ((prev & post_mask) != 0) any_ones = true;
|
||||
}
|
||||
if (pany_ones != NULL) { *pany_ones = any_ones; }
|
||||
return all_ones;
|
||||
}
|
||||
|
||||
bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
|
||||
return mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, NULL);
|
||||
}
|
||||
|
||||
bool _mi_bitmap_is_any_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
|
||||
bool any_ones;
|
||||
mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, &any_ones);
|
||||
return any_ones;
|
||||
mi_bitmap_forall_set_chunks_end();
|
||||
return false;
|
||||
}
|
||||
|
|
154
src/bitmap.h
154
src/bitmap.h
|
@ -6,105 +6,87 @@ terms of the MIT license. A copy of the license can be found in the file
|
|||
-----------------------------------------------------------------------------*/
|
||||
|
||||
/* ----------------------------------------------------------------------------
|
||||
Concurrent bitmap that can set/reset sequences of bits atomically,
|
||||
represented as an array of fields where each field is a machine word (`size_t`)
|
||||
|
||||
There are two api's; the standard one cannot have sequences that cross
|
||||
between the bitmap fields (and a sequence must be <= MI_BITMAP_FIELD_BITS).
|
||||
(this is used in region allocation)
|
||||
|
||||
The `_across` postfixed functions do allow sequences that can cross over
|
||||
between the fields. (This is used in arena allocation)
|
||||
Concurrent bitmap that can set/reset sequences of bits atomically
|
||||
---------------------------------------------------------------------------- */
|
||||
#pragma once
|
||||
#ifndef MI_BITMAP_H
|
||||
#define MI_BITMAP_H
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
Bitmap definition
|
||||
----------------------------------------------------------- */
|
||||
/* --------------------------------------------------------------------------------
|
||||
Definitions
|
||||
-------------------------------------------------------------------------------- */
|
||||
|
||||
#define MI_BITMAP_FIELD_BITS (8*MI_SIZE_SIZE)
|
||||
#define MI_BITMAP_FIELD_FULL (~((size_t)0)) // all bits set
|
||||
typedef size_t mi_bfield_t;
|
||||
|
||||
// An atomic bitmap of `size_t` fields
|
||||
typedef _Atomic(size_t) mi_bitmap_field_t;
|
||||
typedef mi_bitmap_field_t* mi_bitmap_t;
|
||||
#define MI_BFIELD_BITS_SHIFT (MI_SIZE_SHIFT+3)
|
||||
#define MI_BFIELD_BITS (1 << MI_BFIELD_BITS_SHIFT)
|
||||
#define MI_BFIELD_SIZE (MI_BFIELD_BITS/8)
|
||||
#define MI_BFIELD_BITS_MOD_MASK (MI_BFIELD_BITS - 1)
|
||||
#define MI_BFIELD_LO_BIT8 ((~(mi_bfield_t(0)))/0xFF) // 0x01010101 ..
|
||||
#define MI_BFIELD_HI_BIT8 (MI_BFIELD_LO_BIT8 << 7) // 0x80808080 ..
|
||||
|
||||
// A bitmap index is the index of the bit in a bitmap.
|
||||
typedef size_t mi_bitmap_index_t;
|
||||
#define MI_BITMAP_CHUNK_FIELDS (MI_BITMAP_CHUNK_BITS / MI_BFIELD_BITS)
|
||||
#define MI_BITMAP_CHUNK_BITS_MOD_MASK (MI_BITMAP_CHUNK_BITS - 1)
|
||||
|
||||
// Create a bit index.
|
||||
static inline mi_bitmap_index_t mi_bitmap_index_create_ex(size_t idx, size_t bitidx) {
|
||||
mi_assert_internal(bitidx <= MI_BITMAP_FIELD_BITS);
|
||||
return (idx*MI_BITMAP_FIELD_BITS) + bitidx;
|
||||
}
|
||||
static inline mi_bitmap_index_t mi_bitmap_index_create(size_t idx, size_t bitidx) {
|
||||
mi_assert_internal(bitidx < MI_BITMAP_FIELD_BITS);
|
||||
return mi_bitmap_index_create_ex(idx,bitidx);
|
||||
}
|
||||
|
||||
// Get the field index from a bit index.
|
||||
static inline size_t mi_bitmap_index_field(mi_bitmap_index_t bitmap_idx) {
|
||||
return (bitmap_idx / MI_BITMAP_FIELD_BITS);
|
||||
}
|
||||
|
||||
// Get the bit index in a bitmap field
|
||||
static inline size_t mi_bitmap_index_bit_in_field(mi_bitmap_index_t bitmap_idx) {
|
||||
return (bitmap_idx % MI_BITMAP_FIELD_BITS);
|
||||
}
|
||||
|
||||
// Get the full bit index
|
||||
static inline size_t mi_bitmap_index_bit(mi_bitmap_index_t bitmap_idx) {
|
||||
return bitmap_idx;
|
||||
}
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
Claim a bit sequence atomically
|
||||
----------------------------------------------------------- */
|
||||
|
||||
// Try to atomically claim a sequence of `count` bits in a single
|
||||
// field at `idx` in `bitmap`. Returns `true` on success.
|
||||
bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx);
|
||||
|
||||
// Starts at idx, and wraps around to search in all `bitmap_fields` fields.
|
||||
// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never cross fields.
|
||||
bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx);
|
||||
|
||||
// Set `count` bits at `bitmap_idx` to 0 atomically
|
||||
// Returns `true` if all `count` bits were 1 previously.
|
||||
bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
|
||||
|
||||
// Try to set `count` bits at `bitmap_idx` from 0 to 1 atomically.
|
||||
// Returns `true` if successful when all previous `count` bits were 0.
|
||||
bool _mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
|
||||
|
||||
// Set `count` bits at `bitmap_idx` to 1 atomically
|
||||
// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
|
||||
bool _mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_zero);
|
||||
|
||||
bool _mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
|
||||
bool _mi_bitmap_is_any_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
|
||||
typedef mi_decl_align(32) struct mi_bitmap_chunk_s {
|
||||
_Atomic(mi_bfield_t) bfields[MI_BITMAP_CHUNK_FIELDS];
|
||||
} mi_bitmap_chunk_t;
|
||||
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
// the `_across` functions work on bitmaps where sequences can cross over
|
||||
// between the fields. This is used in arena allocation
|
||||
//--------------------------------------------------------------------------
|
||||
typedef mi_decl_align(32) struct mi_bitmap_s {
|
||||
mi_bitmap_chunk_t chunks[MI_BFIELD_BITS];
|
||||
_Atomic(mi_bfield_t)any_set;
|
||||
} mi_bitmap_t;
|
||||
|
||||
// Find `count` bits of zeros and set them to 1 atomically; returns `true` on success.
|
||||
// Starts at idx, and wraps around to search in all `bitmap_fields` fields.
|
||||
bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx, mi_stats_t* stats);
|
||||
#define MI_BITMAP_MAX_BITS (MI_BFIELD_BITS * MI_BITMAP_CHUNK_BITS) // 16k bits on 64bit, 8k bits on 32bit
|
||||
|
||||
// Set `count` bits at `bitmap_idx` to 0 atomically
|
||||
// Returns `true` if all `count` bits were 1 previously.
|
||||
bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
|
||||
/* --------------------------------------------------------------------------------
|
||||
Bitmap
|
||||
-------------------------------------------------------------------------------- */
|
||||
|
||||
// Set `count` bits at `bitmap_idx` to 1 atomically
|
||||
// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
|
||||
bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero);
|
||||
typedef bool mi_bit_t;
|
||||
#define MI_BIT_SET (true)
|
||||
#define MI_BIT_CLEAR (false)
|
||||
|
||||
bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
|
||||
bool _mi_bitmap_is_any_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
|
||||
// initialize a bitmap to all unset; avoid a mem_zero if `already_zero` is true
|
||||
void mi_bitmap_init(mi_bitmap_t* bitmap, bool already_zero);
|
||||
|
||||
#endif
|
||||
// Set/clear a sequence of `n` bits in the bitmap (and can cross chunks). Not atomic so only use if local to a thread.
|
||||
void mi_bitmap_unsafe_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n);
|
||||
|
||||
// Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from all 0's to 1's (or all 1's to 0's).
|
||||
// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)!
|
||||
// If `already_xset` is not NULL, it is set to true if all the bits were already all set/cleared.
|
||||
bool mi_bitmap_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n, bool* already_xset);
|
||||
|
||||
// Is a sequence of n bits already all set/cleared?
|
||||
bool mi_bitmap_is_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n);
|
||||
|
||||
// Try to set/clear a bit in the bitmap; returns `true` if atomically transitioned from 0 to 1 (or 1 to 0)
|
||||
// and false otherwise leaving the bitmask as is.
|
||||
mi_decl_nodiscard bool mi_bitmap_try_xset(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx);
|
||||
|
||||
// Try to set/clear a byte in the bitmap; returns `true` if atomically transitioned from 0 to 0xFF (or 0xFF to 0)
|
||||
// and false otherwise leaving the bitmask as is.
|
||||
mi_decl_nodiscard bool mi_bitmap_try_xset8(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx);
|
||||
|
||||
// Try to set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's)
|
||||
// and false otherwise leaving the bitmask as is.
|
||||
// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)!
|
||||
mi_decl_nodiscard bool mi_bitmap_try_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n);
|
||||
|
||||
// Find a set bit in a bitmap and atomically unset it. Returns true on success,
|
||||
// and in that case sets the index: `0 <= *pidx < MI_BITMAP_MAX_BITS`.
|
||||
// The low `MI_BFIELD_BITS` of start are used to set the start point of the search
|
||||
// (to reduce thread contention).
|
||||
mi_decl_nodiscard bool mi_bitmap_try_find_and_clear(mi_bitmap_t* bitmap, size_t* pidx, size_t start);
|
||||
|
||||
// Find a byte in the bitmap with all bits set (0xFF) and atomically unset it to zero.
|
||||
// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-8`.
|
||||
mi_decl_nodiscard bool mi_bitmap_try_find_and_clear8(mi_bitmap_t* bitmap, size_t start, size_t* pidx );
|
||||
|
||||
// Find a sequence of `n` bits in the bitmap with all bits set, and atomically unset all.
|
||||
// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-n`.
|
||||
mi_decl_nodiscard bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t start, size_t n, size_t* pidx );
|
||||
|
||||
#endif // MI_XBITMAP_H
|
||||
|
|
118
src/free.c
118
src/free.c
|
@ -24,7 +24,7 @@ static void mi_stat_free(const mi_page_t* page, const mi_block_t* block);
|
|||
// ------------------------------------------------------
|
||||
|
||||
// forward declaration of multi-threaded free (`_mt`) (or free in huge block if compiled with MI_HUGE_PAGE_ABANDON)
|
||||
static mi_decl_noinline void mi_free_block_mt(mi_page_t* page, mi_segment_t* segment, mi_block_t* block);
|
||||
static mi_decl_noinline void mi_free_block_mt(mi_page_t* page, mi_block_t* block);
|
||||
|
||||
// regular free of a (thread local) block pointer
|
||||
// fast path written carefully to prevent spilling on the stack
|
||||
|
@ -57,7 +57,7 @@ static inline void mi_free_block_local(mi_page_t* page, mi_block_t* block, bool
|
|||
mi_block_t* _mi_page_ptr_unalign(const mi_page_t* page, const void* p) {
|
||||
mi_assert_internal(page!=NULL && p!=NULL);
|
||||
|
||||
size_t diff = (uint8_t*)p - page->page_start;
|
||||
size_t diff = (uint8_t*)p - mi_page_start(page);
|
||||
size_t adjust;
|
||||
if mi_likely(page->block_size_shift != 0) {
|
||||
adjust = diff & (((size_t)1 << page->block_size_shift) - 1);
|
||||
|
@ -82,72 +82,55 @@ static inline void mi_block_check_unguard(mi_page_t* page, mi_block_t* block, vo
|
|||
#endif
|
||||
|
||||
// free a local pointer (page parameter comes first for better codegen)
|
||||
static void mi_decl_noinline mi_free_generic_local(mi_page_t* page, mi_segment_t* segment, void* p) mi_attr_noexcept {
|
||||
MI_UNUSED(segment);
|
||||
static void mi_decl_noinline mi_free_generic_local(mi_page_t* page, void* p) mi_attr_noexcept {
|
||||
mi_block_t* const block = (mi_page_has_aligned(page) ? _mi_page_ptr_unalign(page, p) : (mi_block_t*)p);
|
||||
mi_block_check_unguard(page, block, p);
|
||||
mi_free_block_local(page, block, true /* track stats */, true /* check for a full page */);
|
||||
}
|
||||
|
||||
// free a pointer owned by another thread (page parameter comes first for better codegen)
|
||||
static void mi_decl_noinline mi_free_generic_mt(mi_page_t* page, mi_segment_t* segment, void* p) mi_attr_noexcept {
|
||||
static void mi_decl_noinline mi_free_generic_mt(mi_page_t* page, void* p) mi_attr_noexcept {
|
||||
mi_block_t* const block = _mi_page_ptr_unalign(page, p); // don't check `has_aligned` flag to avoid a race (issue #865)
|
||||
mi_block_check_unguard(page, block, p);
|
||||
mi_free_block_mt(page, segment, block);
|
||||
mi_free_block_mt(page, block);
|
||||
}
|
||||
|
||||
// generic free (for runtime integration)
|
||||
void mi_decl_noinline _mi_free_generic(mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept {
|
||||
if (is_local) mi_free_generic_local(page,segment,p);
|
||||
else mi_free_generic_mt(page,segment,p);
|
||||
void mi_decl_noinline _mi_free_generic(mi_page_t* page, bool is_local, void* p) mi_attr_noexcept {
|
||||
if (is_local) mi_free_generic_local(page,p);
|
||||
else mi_free_generic_mt(page,p);
|
||||
}
|
||||
|
||||
// Get the segment data belonging to a pointer
|
||||
// This is just a single `and` in release mode but does further checks in debug mode
|
||||
// (and secure mode) to see if this was a valid pointer.
|
||||
static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* msg)
|
||||
static inline mi_page_t* mi_checked_ptr_page(const void* p, const char* msg)
|
||||
{
|
||||
MI_UNUSED(msg);
|
||||
|
||||
#if (MI_DEBUG>0)
|
||||
MI_UNUSED_RELEASE(msg);
|
||||
#if MI_DEBUG
|
||||
if mi_unlikely(((uintptr_t)p & (MI_INTPTR_SIZE - 1)) != 0 && !mi_option_is_enabled(mi_option_guarded_precise)) {
|
||||
_mi_error_message(EINVAL, "%s: invalid (unaligned) pointer: %p\n", msg, p);
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
mi_segment_t* const segment = _mi_ptr_segment(p);
|
||||
if mi_unlikely(segment==NULL) return segment;
|
||||
|
||||
#if (MI_DEBUG>0)
|
||||
if mi_unlikely(!mi_is_in_heap_region(p)) {
|
||||
_mi_warning_message("%s: pointer might not point to a valid heap region: %p\n"
|
||||
"(this may still be a valid very large allocation (over 64MiB))\n", msg, p);
|
||||
if mi_likely(_mi_ptr_cookie(segment) == segment->cookie) {
|
||||
_mi_warning_message("(yes, the previous pointer %p was valid after all)\n", p);
|
||||
}
|
||||
mi_page_t* const page = _mi_ptr_page(p);
|
||||
#if MI_DEBUG
|
||||
if (page == MI_PAGE_PTR_INVALID) {
|
||||
_mi_error_message(EINVAL, "%s: invalid pointer: %p\n", msg, p);
|
||||
}
|
||||
#endif
|
||||
#if (MI_DEBUG>0 || MI_SECURE>=4)
|
||||
if mi_unlikely(_mi_ptr_cookie(segment) != segment->cookie) {
|
||||
_mi_error_message(EINVAL, "%s: pointer does not point to a valid heap space: %p\n", msg, p);
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
return segment;
|
||||
return page;
|
||||
}
|
||||
|
||||
// Free a block
|
||||
// Fast path written carefully to prevent register spilling on the stack
|
||||
void mi_free(void* p) mi_attr_noexcept
|
||||
{
|
||||
mi_segment_t* const segment = mi_checked_ptr_segment(p,"mi_free");
|
||||
if mi_unlikely(segment==NULL) return;
|
||||
|
||||
const bool is_local = (_mi_prim_thread_id() == mi_atomic_load_relaxed(&segment->thread_id));
|
||||
mi_page_t* const page = _mi_segment_page_of(segment, p);
|
||||
mi_page_t* const page = mi_checked_ptr_page(p,"mi_free");
|
||||
if mi_unlikely(page==NULL) return;
|
||||
|
||||
|
||||
const bool is_local = (_mi_prim_thread_id() == mi_page_thread_id(page));
|
||||
if mi_likely(is_local) { // thread-local free?
|
||||
if mi_likely(page->flags.full_aligned == 0) { // and it is not a full page (full pages need to move from the full bin), nor has aligned blocks (aligned blocks need to be unaligned)
|
||||
// thread-local, aligned, and not a full page
|
||||
|
@ -156,12 +139,12 @@ void mi_free(void* p) mi_attr_noexcept
|
|||
}
|
||||
else {
|
||||
// page is full or contains (inner) aligned blocks; use generic path
|
||||
mi_free_generic_local(page, segment, p);
|
||||
mi_free_generic_local(page, p);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// not thread-local; use generic path
|
||||
mi_free_generic_mt(page, segment, p);
|
||||
mi_free_generic_mt(page, p);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -169,10 +152,8 @@ void mi_free(void* p) mi_attr_noexcept
|
|||
bool _mi_free_delayed_block(mi_block_t* block) {
|
||||
// get segment and page
|
||||
mi_assert_internal(block!=NULL);
|
||||
const mi_segment_t* const segment = _mi_ptr_segment(block);
|
||||
mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie);
|
||||
mi_assert_internal(_mi_thread_id() == segment->thread_id);
|
||||
mi_page_t* const page = _mi_segment_page_of(segment, block);
|
||||
mi_page_t* const page = mi_checked_ptr_page(block,"_mi_free_delayed_block");
|
||||
mi_assert_internal(_mi_thread_id() == mi_page_thread_id(page));
|
||||
|
||||
// Clear the no-delayed flag so delayed freeing is used again for this page.
|
||||
// This must be done before collecting the free lists on this page -- otherwise
|
||||
|
@ -242,20 +223,19 @@ static void mi_decl_noinline mi_free_block_delayed_mt( mi_page_t* page, mi_block
|
|||
}
|
||||
|
||||
// Multi-threaded free (`_mt`) (or free in huge block if compiled with MI_HUGE_PAGE_ABANDON)
|
||||
static void mi_decl_noinline mi_free_block_mt(mi_page_t* page, mi_segment_t* segment, mi_block_t* block)
|
||||
static void mi_decl_noinline mi_free_block_mt(mi_page_t* page, mi_block_t* block)
|
||||
{
|
||||
// first see if the segment was abandoned and if we can reclaim it into our thread
|
||||
if (_mi_option_get_fast(mi_option_abandoned_reclaim_on_free) != 0 &&
|
||||
#if MI_HUGE_PAGE_ABANDON
|
||||
segment->page_kind != MI_PAGE_HUGE &&
|
||||
#endif
|
||||
mi_atomic_load_relaxed(&segment->thread_id) == 0 && // segment is abandoned?
|
||||
// first see if the page was abandoned and if we can reclaim it into our thread
|
||||
if (mi_page_is_abandoned(page) &&
|
||||
(_mi_option_get_fast(mi_option_abandoned_reclaim_on_free) != 0 ||
|
||||
mi_page_is_singleton(page) // only one block, and we are free-ing it
|
||||
) &&
|
||||
mi_prim_get_default_heap() != (mi_heap_t*)&_mi_heap_empty) // and we did not already exit this thread (without this check, a fresh heap will be initalized (issue #944))
|
||||
{
|
||||
// the segment is abandoned, try to reclaim it into our heap
|
||||
if (_mi_segment_attempt_reclaim(mi_heap_get_default(), segment)) {
|
||||
mi_assert_internal(_mi_thread_id() == mi_atomic_load_relaxed(&segment->thread_id));
|
||||
mi_assert_internal(mi_heap_get_default()->tld->segments.subproc == segment->subproc);
|
||||
// the page is abandoned, try to reclaim it into our heap
|
||||
if (_mi_heap_try_reclaim(mi_heap_get_default(), page)) { // TODO: avoid putting it in the full free queue
|
||||
mi_assert_internal(_mi_thread_id() == mi_page_thread_id(page));
|
||||
// mi_assert_internal(mi_heap_get_default()->tld->subproc == page->subproc);
|
||||
mi_free(block); // recursively free as now it will be a local free in our heap
|
||||
return;
|
||||
}
|
||||
|
@ -272,17 +252,12 @@ static void mi_decl_noinline mi_free_block_mt(mi_page_t* page, mi_segment_t* seg
|
|||
// for small size, ensure we can fit the delayed thread pointers without triggering overflow detection
|
||||
_mi_padding_shrink(page, block, sizeof(mi_block_t));
|
||||
|
||||
if (segment->page_kind == MI_PAGE_HUGE) {
|
||||
#if MI_HUGE_PAGE_ABANDON
|
||||
// huge page segments are always abandoned and can be freed immediately
|
||||
_mi_segment_huge_page_free(segment, page, block);
|
||||
return;
|
||||
#else
|
||||
if (mi_page_is_huge(page)) {
|
||||
mi_assert_internal(mi_page_is_singleton(page));
|
||||
// huge pages are special as they occupy the entire segment
|
||||
// as these are large we reset the memory occupied by the page so it is available to other threads
|
||||
// (as the owning thread needs to actually free the memory later).
|
||||
_mi_segment_huge_page_reset(segment, page, block);
|
||||
#endif
|
||||
_mi_os_reset(mi_page_start(page), mi_page_block_size(page), NULL); // resets conservatively
|
||||
}
|
||||
else {
|
||||
#if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN // note: when tracking, cannot use mi_usable_size with multi-threading
|
||||
|
@ -316,9 +291,8 @@ static size_t mi_decl_noinline mi_page_usable_aligned_size_of(const mi_page_t* p
|
|||
}
|
||||
|
||||
static inline size_t _mi_usable_size(const void* p, const char* msg) mi_attr_noexcept {
|
||||
const mi_segment_t* const segment = mi_checked_ptr_segment(p, msg);
|
||||
if mi_unlikely(segment==NULL) return 0;
|
||||
const mi_page_t* const page = _mi_segment_page_of(segment, p);
|
||||
const mi_page_t* const page = mi_checked_ptr_page(p,msg);
|
||||
if mi_unlikely(page==NULL) return 0;
|
||||
if mi_likely(!mi_page_has_aligned(page)) {
|
||||
const mi_block_t* block = (const mi_block_t*)p;
|
||||
return mi_page_usable_size_of(page, block);
|
||||
|
@ -514,20 +488,20 @@ static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) {
|
|||
// only maintain stats for smaller objects if requested
|
||||
#if (MI_STAT>0)
|
||||
static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) {
|
||||
#if (MI_STAT < 2)
|
||||
#if (MI_STAT < 2)
|
||||
MI_UNUSED(block);
|
||||
#endif
|
||||
#endif
|
||||
mi_heap_t* const heap = mi_heap_get_default();
|
||||
const size_t bsize = mi_page_usable_block_size(page);
|
||||
#if (MI_STAT>1)
|
||||
#if (MI_STAT>1)
|
||||
const size_t usize = mi_page_usable_size_of(page, block);
|
||||
mi_heap_stat_decrease(heap, malloc, usize);
|
||||
#endif
|
||||
if (bsize <= MI_LARGE_OBJ_SIZE_MAX) {
|
||||
#endif
|
||||
if (bsize <= MI_LARGE_MAX_OBJ_SIZE) {
|
||||
mi_heap_stat_decrease(heap, normal, bsize);
|
||||
#if (MI_STAT > 1)
|
||||
#if (MI_STAT > 1)
|
||||
mi_heap_stat_decrease(heap, normal_bins[_mi_bin(bsize)], 1);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
else {
|
||||
const size_t bpsize = mi_page_block_size(page); // match stat in page.c:mi_huge_page_alloc
|
||||
|
|
|
@ -7,11 +7,8 @@ terms of the MIT license. A copy of the license can be found in the file
|
|||
|
||||
#include "mimalloc.h"
|
||||
#include "mimalloc/internal.h"
|
||||
#include "mimalloc/atomic.h"
|
||||
#include "mimalloc/prim.h" // mi_prim_get_default_heap
|
||||
|
||||
#include <string.h> // memset, memcpy
|
||||
|
||||
#if defined(_MSC_VER) && (_MSC_VER < 1920)
|
||||
#pragma warning(disable:4204) // non-constant aggregate initializer
|
||||
#endif
|
||||
|
@ -258,7 +255,7 @@ static void mi_heap_reset_pages(mi_heap_t* heap) {
|
|||
mi_assert_internal(heap != NULL);
|
||||
mi_assert_internal(mi_heap_is_initialized(heap));
|
||||
// TODO: copy full empty heap instead?
|
||||
memset(&heap->pages_free_direct, 0, sizeof(heap->pages_free_direct));
|
||||
_mi_memset(&heap->pages_free_direct, 0, sizeof(heap->pages_free_direct));
|
||||
_mi_memcpy_aligned(&heap->pages, &_mi_heap_empty.pages, sizeof(heap->pages));
|
||||
heap->thread_delayed_free = NULL;
|
||||
heap->page_count = 0;
|
||||
|
|
55
src/os.c
55
src/os.c
|
@ -59,6 +59,10 @@ size_t _mi_os_large_page_size(void) {
|
|||
return (mi_os_mem_config.large_page_size != 0 ? mi_os_mem_config.large_page_size : _mi_os_page_size());
|
||||
}
|
||||
|
||||
size_t _mi_os_virtual_address_bits(void) {
|
||||
return mi_os_mem_config.virtual_address_bits;
|
||||
}
|
||||
|
||||
bool _mi_os_use_large_page(size_t size, size_t alignment) {
|
||||
// if we have access, check the size and alignment requirements
|
||||
if (mi_os_mem_config.large_page_size == 0 || !mi_option_is_enabled(mi_option_allow_large_os_pages)) return false;
|
||||
|
@ -103,58 +107,10 @@ static void* mi_align_down_ptr(void* p, size_t alignment) {
|
|||
return (void*)_mi_align_down((uintptr_t)p, alignment);
|
||||
}
|
||||
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
aligned hinting
|
||||
-------------------------------------------------------------- */
|
||||
|
||||
// On systems with enough virtual address bits, we can do efficient aligned allocation by using
|
||||
// the 2TiB to 30TiB area to allocate those. If we have at least 46 bits of virtual address
|
||||
// space (64TiB) we use this technique. (but see issue #939)
|
||||
#if (MI_INTPTR_SIZE >= 8) && !defined(MI_NO_ALIGNED_HINT)
|
||||
static mi_decl_cache_align _Atomic(uintptr_t)aligned_base;
|
||||
|
||||
// Return a MI_SEGMENT_SIZE aligned address that is probably available.
|
||||
// If this returns NULL, the OS will determine the address but on some OS's that may not be
|
||||
// properly aligned which can be more costly as it needs to be adjusted afterwards.
|
||||
// For a size > 1GiB this always returns NULL in order to guarantee good ASLR randomization;
|
||||
// (otherwise an initial large allocation of say 2TiB has a 50% chance to include (known) addresses
|
||||
// in the middle of the 2TiB - 6TiB address range (see issue #372))
|
||||
|
||||
#define MI_HINT_BASE ((uintptr_t)2 << 40) // 2TiB start
|
||||
#define MI_HINT_AREA ((uintptr_t)4 << 40) // upto 6TiB (since before win8 there is "only" 8TiB available to processes)
|
||||
#define MI_HINT_MAX ((uintptr_t)30 << 40) // wrap after 30TiB (area after 32TiB is used for huge OS pages)
|
||||
|
||||
void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size)
|
||||
{
|
||||
if (try_alignment <= 1 || try_alignment > MI_SEGMENT_SIZE) return NULL;
|
||||
if (mi_os_mem_config.virtual_address_bits < 46) return NULL; // < 64TiB virtual address space
|
||||
size = _mi_align_up(size, MI_SEGMENT_SIZE);
|
||||
if (size > 1*MI_GiB) return NULL; // guarantee the chance of fixed valid address is at most 1/(MI_HINT_AREA / 1<<30) = 1/4096.
|
||||
#if (MI_SECURE>0)
|
||||
size += MI_SEGMENT_SIZE; // put in `MI_SEGMENT_SIZE` virtual gaps between hinted blocks; this splits VLA's but increases guarded areas.
|
||||
#endif
|
||||
|
||||
uintptr_t hint = mi_atomic_add_acq_rel(&aligned_base, size);
|
||||
if (hint == 0 || hint > MI_HINT_MAX) { // wrap or initialize
|
||||
uintptr_t init = MI_HINT_BASE;
|
||||
#if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of aligned allocations unless in debug mode
|
||||
uintptr_t r = _mi_heap_random_next(mi_prim_get_default_heap());
|
||||
init = init + ((MI_SEGMENT_SIZE * ((r>>17) & 0xFFFFF)) % MI_HINT_AREA); // (randomly 20 bits)*4MiB == 0 to 4TiB
|
||||
#endif
|
||||
uintptr_t expected = hint + size;
|
||||
mi_atomic_cas_strong_acq_rel(&aligned_base, &expected, init);
|
||||
hint = mi_atomic_add_acq_rel(&aligned_base, size); // this may still give 0 or > MI_HINT_MAX but that is ok, it is a hint after all
|
||||
}
|
||||
if (hint%try_alignment != 0) return NULL;
|
||||
return (void*)hint;
|
||||
}
|
||||
#else
|
||||
void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size) {
|
||||
MI_UNUSED(try_alignment); MI_UNUSED(size);
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
|
@ -380,12 +336,10 @@ void* _mi_os_zalloc(size_t size, mi_memid_t* memid, mi_stats_t* stats) {
|
|||
----------------------------------------------------------- */
|
||||
|
||||
void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t offset, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* stats) {
|
||||
mi_assert(offset <= MI_SEGMENT_SIZE);
|
||||
mi_assert(offset <= size);
|
||||
mi_assert((alignment % _mi_os_page_size()) == 0);
|
||||
*memid = _mi_memid_none();
|
||||
if (stats == NULL) stats = &_mi_stats_main;
|
||||
if (offset > MI_SEGMENT_SIZE) return NULL;
|
||||
if (offset == 0) {
|
||||
// regular aligned allocation
|
||||
return _mi_os_alloc_aligned(size, alignment, commit, allow_large, memid, stats);
|
||||
|
@ -605,7 +559,6 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {
|
|||
#endif
|
||||
}
|
||||
end = start + size;
|
||||
mi_assert_internal(end % MI_SEGMENT_SIZE == 0);
|
||||
} while (!mi_atomic_cas_strong_acq_rel(&mi_huge_start, &huge_start, end));
|
||||
|
||||
if (total_size != NULL) *total_size = size;
|
||||
|
|
90
src/page-map.c
Normal file
90
src/page-map.c
Normal file
|
@ -0,0 +1,90 @@
|
|||
/*----------------------------------------------------------------------------
|
||||
Copyright (c) 2023-2024, Microsoft Research, Daan Leijen
|
||||
This is free software; you can redistribute it and/or modify it under the
|
||||
terms of the MIT license. A copy of the license can be found in the file
|
||||
"LICENSE" at the root of this distribution.
|
||||
-----------------------------------------------------------------------------*/
|
||||
|
||||
#include "mimalloc.h"
|
||||
#include "mimalloc/internal.h"
|
||||
#include "bitmap.h"
|
||||
|
||||
mi_decl_cache_align signed char* _mi_page_map = NULL;
|
||||
static bool mi_page_map_all_committed = false;
|
||||
static size_t mi_blocks_per_commit_bit = 1;
|
||||
static mi_memid_t mi_page_map_memid;
|
||||
static mi_bitmap_t mi_page_map_commit;
|
||||
|
||||
static bool mi_page_map_init(void) {
|
||||
size_t vbits = _mi_os_virtual_address_bits();
|
||||
if (vbits >= 48) vbits = 47;
|
||||
// 1 byte per block = 2 GiB for 128 TiB address space (48 bit = 256 TiB address space)
|
||||
// 64 KiB for 4 GiB address space (on 32-bit)
|
||||
const size_t page_map_size = (MI_ZU(1) << (vbits >> MI_ARENA_BLOCK_SHIFT));
|
||||
|
||||
const size_t min_commit_size = _mi_divide_up(page_map_size,MI_BITMAP_MAX_BITS);
|
||||
mi_blocks_per_commit_bit = mi_block_count_of_size(min_commit_size);
|
||||
|
||||
mi_page_map_all_committed = _mi_os_has_overcommit(); // commit on-access on Linux systems
|
||||
_mi_page_map = (int8_t*)_mi_os_alloc_aligned(page_map_size, 0, mi_page_map_all_committed, true, &mi_page_map_memid, NULL);
|
||||
if (_mi_page_map==NULL) {
|
||||
_mi_error_message(ENOMEM, "unable to reserve virtual memory for the page map (%zu KiB)\n", page_map_size / MI_KiB);
|
||||
return false;
|
||||
}
|
||||
if (mi_page_map_memid.initially_committed && !mi_page_map_memid.initially_zero) {
|
||||
_mi_warning_message("the page map was committed on-demand but not zero initialized!\n");
|
||||
_mi_memzero_aligned(_mi_page_map, page_map_size);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static size_t mi_page_map_get_idx(mi_page_t* page, uint8_t** page_start, size_t* block_count) {
|
||||
size_t page_size;
|
||||
*page_start = mi_page_area(page, &page_size);
|
||||
if (page_size > MI_LARGE_PAGE_SIZE) { page_size = MI_LARGE_PAGE_SIZE; } // furthest interior pointer
|
||||
*block_count = mi_block_count_of_size(page_size);
|
||||
return ((uintptr_t)*page_start >> MI_ARENA_BLOCK_SHIFT);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void _mi_page_map_register(mi_page_t* page) {
|
||||
if mi_unlikely(_mi_page_map == NULL) {
|
||||
if (!mi_page_map_init()) return;
|
||||
}
|
||||
uint8_t* page_start;
|
||||
size_t block_count;
|
||||
const size_t idx = mi_page_map_get_idx(page, &page_start, &block_count);
|
||||
|
||||
// is the page map area that contains the page address committed?
|
||||
if (!mi_page_map_all_committed) {
|
||||
const size_t commit_bit_count = _mi_divide_up(block_count, mi_blocks_per_commit_bit);
|
||||
const size_t commit_bit_idx = idx / mi_blocks_per_commit_bit;
|
||||
for (size_t i = 0; i < commit_bit_count; i++) { // per bit to avoid crossing over bitmap chunks
|
||||
if (mi_bitmap_is_xsetN(MI_BIT_CLEAR, &mi_page_map_commit, commit_bit_idx + i, 1)) {
|
||||
// this may race, in which case we do multiple commits (which is ok)
|
||||
_mi_os_commit(page_start + (i*mi_blocks_per_commit_bit*MI_ARENA_BLOCK_SIZE), mi_blocks_per_commit_bit* MI_ARENA_BLOCK_SIZE, NULL, NULL);
|
||||
mi_bitmap_xsetN(MI_BIT_SET, &mi_page_map_commit, commit_bit_idx + i, 1, NULL);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// set the offsets
|
||||
for (int i = 0; i < block_count; i++) {
|
||||
mi_assert_internal(i < 128);
|
||||
_mi_page_map[idx + i] = (int8_t)(-i-1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void _mi_page_map_unregister(mi_page_t* page) {
|
||||
mi_assert_internal(_mi_page_map != NULL);
|
||||
|
||||
// get index and count
|
||||
uint8_t* page_start;
|
||||
size_t block_count;
|
||||
const size_t idx = mi_page_map_get_idx(page, &page_start, &block_count);
|
||||
|
||||
// unset the offsets
|
||||
_mi_memzero(_mi_page_map + idx, block_count);
|
||||
}
|
67
src/page.c
67
src/page.c
|
@ -59,7 +59,7 @@ static inline uint8_t* mi_page_area(const mi_page_t* page) {
|
|||
|
||||
static bool mi_page_list_is_valid(mi_page_t* page, mi_block_t* p) {
|
||||
size_t psize;
|
||||
uint8_t* page_area = _mi_segment_page_start(_mi_page_segment(page), page, &psize);
|
||||
uint8_t* page_area = mi_page_area(page, &psize);
|
||||
mi_block_t* start = (mi_block_t*)page_area;
|
||||
mi_block_t* end = (mi_block_t*)(page_area + psize);
|
||||
while(p != NULL) {
|
||||
|
@ -83,10 +83,7 @@ static bool mi_page_is_valid_init(mi_page_t* page) {
|
|||
mi_assert_internal(page->capacity <= page->reserved);
|
||||
|
||||
// const size_t bsize = mi_page_block_size(page);
|
||||
mi_segment_t* segment = _mi_page_segment(page);
|
||||
uint8_t* start = mi_page_start(page);
|
||||
mi_assert_internal(start == _mi_segment_page_start(segment,page,NULL));
|
||||
mi_assert_internal(page->is_huge == (segment->page_kind == MI_PAGE_HUGE));
|
||||
//mi_assert_internal(start + page->capacity*page->block_size == page->top);
|
||||
|
||||
mi_assert_internal(mi_page_list_is_valid(page,page->free));
|
||||
|
@ -122,15 +119,11 @@ bool _mi_page_is_valid(mi_page_t* page) {
|
|||
mi_assert_internal(page->keys[0] != 0);
|
||||
#endif
|
||||
if (mi_page_heap(page)!=NULL) {
|
||||
mi_segment_t* segment = _mi_page_segment(page);
|
||||
mi_assert_internal(!_mi_process_is_initialized || segment->thread_id == mi_page_heap(page)->thread_id || segment->thread_id==0);
|
||||
#if MI_HUGE_PAGE_ABANDON
|
||||
if (segment->page_kind != MI_PAGE_HUGE)
|
||||
#endif
|
||||
mi_assert_internal(!_mi_process_is_initialized || page->thread_id == mi_page_heap(page)->thread_id || page->thread_id==0);
|
||||
{
|
||||
mi_page_queue_t* pq = mi_page_queue_of(page);
|
||||
mi_assert_internal(mi_page_queue_contains(pq, page));
|
||||
mi_assert_internal(pq->block_size==mi_page_block_size(page) || mi_page_block_size(page) > MI_LARGE_OBJ_SIZE_MAX || mi_page_is_in_full(page));
|
||||
mi_assert_internal(pq->block_size==mi_page_block_size(page) || mi_page_block_size(page) > MI_LARGE_MAX_OBJ_SIZE || mi_page_is_in_full(page));
|
||||
mi_assert_internal(mi_heap_contains_queue(mi_page_heap(page),pq));
|
||||
}
|
||||
}
|
||||
|
@ -274,16 +267,13 @@ static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size
|
|||
#if !MI_HUGE_PAGE_ABANDON
|
||||
mi_assert_internal(pq != NULL);
|
||||
mi_assert_internal(mi_heap_contains_queue(heap, pq));
|
||||
mi_assert_internal(page_alignment > 0 || block_size > MI_LARGE_OBJ_SIZE_MAX || block_size == pq->block_size);
|
||||
mi_assert_internal(page_alignment > 0 || block_size > MI_LARGE_MAX_OBJ_SIZE || block_size == pq->block_size);
|
||||
#endif
|
||||
mi_page_t* page = _mi_segment_page_alloc(heap, block_size, page_alignment, &heap->tld->segments, &heap->tld->os);
|
||||
mi_page_t* page = _mi_heap_page_alloc(heap, block_size, page_alignment);
|
||||
if (page == NULL) {
|
||||
// this may be out-of-memory, or an abandoned page was reclaimed (and in our queue)
|
||||
return NULL;
|
||||
}
|
||||
#if MI_HUGE_PAGE_ABANDON
|
||||
mi_assert_internal(pq==NULL || _mi_page_segment(page)->page_kind != MI_PAGE_HUGE);
|
||||
#endif
|
||||
mi_assert_internal(pq!=NULL || mi_page_block_size(page) >= block_size);
|
||||
// a fresh page was found, initialize it
|
||||
const size_t full_block_size = (pq == NULL || mi_page_is_huge(page) ? mi_page_block_size(page) : block_size); // see also: mi_segment_huge_page_alloc
|
||||
|
@ -384,7 +374,6 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) {
|
|||
mi_heap_t* pheap = mi_page_heap(page);
|
||||
|
||||
// remove from our page list
|
||||
mi_segments_tld_t* segments_tld = &pheap->tld->segments;
|
||||
mi_page_queue_remove(pq, page);
|
||||
|
||||
// page is no longer associated with our heap
|
||||
|
@ -399,8 +388,8 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) {
|
|||
#endif
|
||||
|
||||
// and abandon it
|
||||
mi_assert_internal(mi_page_heap(page) == NULL);
|
||||
_mi_segment_page_abandon(page,segments_tld);
|
||||
mi_assert_internal(mi_page_is_abandoned(page));
|
||||
_mi_arena_page_abandon(page,&pheap->tld);
|
||||
}
|
||||
|
||||
// force abandon a page
|
||||
|
@ -411,8 +400,7 @@ void _mi_page_force_abandon(mi_page_t* page) {
|
|||
|
||||
// ensure this page is no longer in the heap delayed free list
|
||||
_mi_heap_delayed_free_all(heap);
|
||||
// We can still access the page meta-info even if it is freed as we ensure
|
||||
// in `mi_segment_force_abandon` that the segment is not freed (yet)
|
||||
// TODO: can we still access the page meta-info even if it is freed?
|
||||
if (page->capacity == 0) return; // it may have been freed now
|
||||
|
||||
// and now unlink it from the page queue and abandon (or free)
|
||||
|
@ -433,17 +421,18 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) {
|
|||
mi_assert_internal(mi_page_all_free(page));
|
||||
mi_assert_internal(mi_page_thread_free_flag(page)!=MI_DELAYED_FREEING);
|
||||
|
||||
mi_heap_t* pheap = mi_page_heap(page);
|
||||
|
||||
// no more aligned blocks in here
|
||||
mi_page_set_has_aligned(page, false);
|
||||
|
||||
// remove from the page list
|
||||
// (no need to do _mi_heap_delayed_free first as all blocks are already free)
|
||||
mi_segments_tld_t* segments_tld = &mi_page_heap(page)->tld->segments;
|
||||
mi_page_queue_remove(pq, page);
|
||||
|
||||
// and free it
|
||||
mi_page_set_heap(page,NULL);
|
||||
_mi_segment_page_free(page, force, segments_tld);
|
||||
_mi_arena_page_free(page, force, &pheap->tld);
|
||||
}
|
||||
|
||||
#define MI_MAX_RETIRE_SIZE MI_LARGE_OBJ_SIZE_MAX // should be less than size for MI_BIN_HUGE
|
||||
|
@ -474,7 +463,7 @@ void _mi_page_retire(mi_page_t* page) mi_attr_noexcept {
|
|||
if mi_likely( /* bsize < MI_MAX_RETIRE_SIZE && */ !mi_page_queue_is_special(pq)) { // not full or huge queue?
|
||||
if (pq->last==page && pq->first==page) { // the only page in the queue?
|
||||
mi_stat_counter_increase(_mi_stats_main.page_no_retire,1);
|
||||
page->retire_expire = (bsize <= MI_SMALL_OBJ_SIZE_MAX ? MI_RETIRE_CYCLES : MI_RETIRE_CYCLES/4);
|
||||
page->retire_expire = (bsize <= MI_SMALL_MAX_OBJ_SIZE ? MI_RETIRE_CYCLES : MI_RETIRE_CYCLES/4);
|
||||
mi_heap_t* heap = mi_page_heap(page);
|
||||
mi_assert_internal(pq >= heap->pages);
|
||||
const size_t index = pq - heap->pages;
|
||||
|
@ -639,7 +628,7 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld)
|
|||
|
||||
size_t page_size;
|
||||
//uint8_t* page_start =
|
||||
_mi_segment_page_start(_mi_page_segment(page), page, &page_size);
|
||||
mi_page_area(page, &page_size);
|
||||
mi_stat_counter_increase(tld->stats.pages_extended, 1);
|
||||
|
||||
// calculate the extend count
|
||||
|
@ -676,15 +665,13 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld)
|
|||
// Initialize a fresh page
|
||||
static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi_tld_t* tld) {
|
||||
mi_assert(page != NULL);
|
||||
mi_segment_t* segment = _mi_page_segment(page);
|
||||
mi_assert(segment != NULL);
|
||||
mi_assert_internal(block_size > 0);
|
||||
// set fields
|
||||
mi_page_set_heap(page, heap);
|
||||
page->block_size = block_size;
|
||||
size_t page_size;
|
||||
page->page_start = _mi_segment_page_start(segment, page, &page_size);
|
||||
mi_track_mem_noaccess(page->page_start,page_size);
|
||||
uint8_t* page_start = mi_page_area(page, &page_size);
|
||||
mi_track_mem_noaccess(page_start,page_size);
|
||||
mi_assert_internal(page_size / block_size < (1L<<16));
|
||||
page->reserved = (uint16_t)(page_size / block_size);
|
||||
mi_assert_internal(page->reserved > 0);
|
||||
|
@ -692,15 +679,15 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
|
|||
page->keys[0] = _mi_heap_random_next(heap);
|
||||
page->keys[1] = _mi_heap_random_next(heap);
|
||||
#endif
|
||||
page->free_is_zero = page->is_zero_init;
|
||||
page->free_is_zero = page->memid.initially_zero;
|
||||
#if MI_DEBUG>2
|
||||
if (page->is_zero_init) {
|
||||
if (page->memid.initially_zero) {
|
||||
mi_track_mem_defined(page->page_start, page_size);
|
||||
mi_assert_expensive(mi_mem_is_zero(page->page_start, page_size));
|
||||
mi_assert_expensive(mi_mem_is_zero(page_start, page_size));
|
||||
}
|
||||
#endif
|
||||
if (block_size > 0 && _mi_is_power_of_two(block_size)) {
|
||||
page->block_size_shift = (uint8_t)(mi_ctz((uintptr_t)block_size));
|
||||
page->block_size_shift = (uint8_t)mi_ctz(block_size);
|
||||
}
|
||||
else {
|
||||
page->block_size_shift = 0;
|
||||
|
@ -734,13 +721,6 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
|
|||
// search for a best next page to use for at most N pages (often cut short if immediate blocks are available)
|
||||
#define MI_MAX_CANDIDATE_SEARCH (8)
|
||||
|
||||
// is the page not yet used up to its reserved space?
|
||||
static bool mi_page_is_expandable(const mi_page_t* page) {
|
||||
mi_assert_internal(page != NULL);
|
||||
mi_assert_internal(page->capacity <= page->reserved);
|
||||
return (page->capacity < page->reserved);
|
||||
}
|
||||
|
||||
|
||||
// Find a page with free blocks of `page->block_size`.
|
||||
static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* pq, bool first_try)
|
||||
|
@ -907,7 +887,7 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size, size_t page_a
|
|||
#if MI_HUGE_PAGE_ABANDON
|
||||
mi_page_queue_t* pq = NULL;
|
||||
#else
|
||||
mi_page_queue_t* pq = mi_page_queue(heap, MI_LARGE_OBJ_SIZE_MAX+1); // always in the huge queue regardless of the block size
|
||||
mi_page_queue_t* pq = mi_page_queue(heap, MI_LARGE_MAX_OBJ_SIZE+1); // always in the huge queue regardless of the block size
|
||||
mi_assert_internal(mi_page_queue_is_huge(pq));
|
||||
#endif
|
||||
mi_page_t* page = mi_page_fresh_alloc(heap, pq, block_size, page_alignment);
|
||||
|
@ -915,10 +895,9 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size, size_t page_a
|
|||
mi_assert_internal(mi_page_block_size(page) >= size);
|
||||
mi_assert_internal(mi_page_immediate_available(page));
|
||||
mi_assert_internal(mi_page_is_huge(page));
|
||||
mi_assert_internal(_mi_page_segment(page)->page_kind == MI_PAGE_HUGE);
|
||||
mi_assert_internal(_mi_page_segment(page)->used==1);
|
||||
mi_assert_internal(mi_page_is_singleton(page));
|
||||
#if MI_HUGE_PAGE_ABANDON
|
||||
mi_assert_internal(_mi_page_segment(page)->thread_id==0); // abandoned, not in the huge queue
|
||||
mi_assert_internal(mi_page_is_abandoned(page));
|
||||
mi_page_set_heap(page, NULL);
|
||||
#endif
|
||||
mi_heap_stat_increase(heap, huge, mi_page_block_size(page));
|
||||
|
@ -933,7 +912,7 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size, size_t page_a
|
|||
static mi_page_t* mi_find_page(mi_heap_t* heap, size_t size, size_t huge_alignment) mi_attr_noexcept {
|
||||
// huge allocation?
|
||||
const size_t req_size = size - MI_PADDING_SIZE; // correct for padding_size in case of an overflow on `size`
|
||||
if mi_unlikely(req_size > (MI_LARGE_OBJ_SIZE_MAX - MI_PADDING_SIZE) || huge_alignment > 0) {
|
||||
if mi_unlikely(req_size > (MI_LARGE_MAX_OBJ_SIZE - MI_PADDING_SIZE) || huge_alignment > 0) {
|
||||
if mi_unlikely(req_size > MI_MAX_ALLOC_SIZE) {
|
||||
_mi_error_message(EOVERFLOW, "allocation request is too large (%zu bytes)\n", req_size);
|
||||
return NULL;
|
||||
|
|
|
@ -20,7 +20,7 @@ terms of the MIT license. A copy of the license can be found in the file
|
|||
// containing the whole library. If it is linked first
|
||||
// it will override all the standard library allocation
|
||||
// functions (on Unix's).
|
||||
#include "alloc.c" // includes alloc-override.c
|
||||
#include "alloc.c" // includes alloc-override.c and free.c
|
||||
#include "alloc-aligned.c"
|
||||
#include "alloc-posix.c"
|
||||
#include "arena.c"
|
||||
|
@ -31,6 +31,7 @@ terms of the MIT license. A copy of the license can be found in the file
|
|||
#include "options.c"
|
||||
#include "os.c"
|
||||
#include "page.c" // includes page-queue.c
|
||||
#include "page-map.c"
|
||||
#include "random.c"
|
||||
#include "segment.c"
|
||||
#include "segment-map.c"
|
||||
|
|
599
src/xbitmap.c
599
src/xbitmap.c
|
@ -1,599 +0,0 @@
|
|||
/* ----------------------------------------------------------------------------
|
||||
Copyright (c) 2019-2024 Microsoft Research, Daan Leijen
|
||||
This is free software; you can redistribute it and/or modify it under the
|
||||
terms of the MIT license. A copy of the license can be found in the file
|
||||
"LICENSE" at the root of this distribution.
|
||||
-----------------------------------------------------------------------------*/
|
||||
|
||||
/* ----------------------------------------------------------------------------
|
||||
Concurrent bitmap that can set/reset sequences of bits atomically
|
||||
---------------------------------------------------------------------------- */
|
||||
|
||||
#include "mimalloc.h"
|
||||
#include "mimalloc/internal.h"
|
||||
#include "mimalloc/bits.h"
|
||||
#include "xbitmap.h"
|
||||
|
||||
/* --------------------------------------------------------------------------------
|
||||
bfields
|
||||
-------------------------------------------------------------------------------- */
|
||||
|
||||
static inline size_t mi_bfield_ctz(mi_bfield_t x) {
|
||||
return mi_ctz(x);
|
||||
}
|
||||
|
||||
static inline size_t mi_bfield_clz(mi_bfield_t x) {
|
||||
return mi_clz(x);
|
||||
}
|
||||
|
||||
// find the least significant bit that is set (i.e. count trailing zero's)
|
||||
// return false if `x==0` (with `*idx` undefined) and true otherwise,
|
||||
// with the `idx` is set to the bit index (`0 <= *idx < MI_BFIELD_BITS`).
|
||||
static inline bool mi_bfield_find_least_bit(mi_bfield_t x, size_t* idx) {
|
||||
return mi_bsf(x,idx);
|
||||
}
|
||||
|
||||
static inline mi_bfield_t mi_bfield_rotate_right(mi_bfield_t x, size_t r) {
|
||||
return mi_rotr(x,r);
|
||||
}
|
||||
|
||||
// Set/clear a bit atomically. Returns `true` if the bit transitioned from 0 to 1 (or 1 to 0).
|
||||
static inline bool mi_bfield_atomic_xset(mi_bit_t set, _Atomic(mi_bfield_t)*b, size_t idx) {
|
||||
mi_assert_internal(idx < MI_BFIELD_BITS);
|
||||
const mi_bfield_t mask = ((mi_bfield_t)1)<<idx;
|
||||
if (set) {
|
||||
const mi_bfield_t old = mi_atomic(fetch_or_explicit)(b, mask, mi_memory_order(acq_rel));
|
||||
return ((old&mask) == 0);
|
||||
}
|
||||
else {
|
||||
mi_bfield_t old = mi_atomic(fetch_and_explicit)(b, ~mask, mi_memory_order(acq_rel));
|
||||
return ((old&mask) == mask);
|
||||
}
|
||||
}
|
||||
|
||||
// Set/clear a mask set of bits atomically, and return true of the mask bits transitioned from all 0's to 1's (or all 1's to 0's)
|
||||
// `already_xset` is true if all bits for the mask were already set/cleared.
|
||||
static bool mi_bfield_atomic_xset_mask(mi_bit_t set, _Atomic(mi_bfield_t)*b, mi_bfield_t mask, bool* already_xset) {
|
||||
mi_assert_internal(mask != 0);
|
||||
if (set) {
|
||||
mi_bfield_t old = *b;
|
||||
while (!mi_atomic_cas_weak_acq_rel(b, &old, old|mask)); // try to atomically set the mask bits until success
|
||||
*already_xset = ((old&mask) == mask);
|
||||
return ((old&mask) == 0);
|
||||
}
|
||||
else { // clear
|
||||
mi_bfield_t old = *b;
|
||||
while (!mi_atomic_cas_weak_acq_rel(b, &old, old&~mask)); // try to atomically clear the mask bits until success
|
||||
*already_xset = ((old&mask) == 0);
|
||||
return ((old&mask) == mask);
|
||||
}
|
||||
}
|
||||
|
||||
// Tries to set/clear a bit atomically, and returns true if the bit atomically transitioned from 0 to 1 (or 1 to 0)
|
||||
static bool mi_bfield_atomic_try_xset( mi_bit_t set, _Atomic(mi_bfield_t)*b, size_t idx) {
|
||||
mi_assert_internal(idx < MI_BFIELD_BITS);
|
||||
// for a single bit, we can always just set/clear and test afterwards if it was actually us that changed it first
|
||||
return mi_bfield_atomic_xset(set, b, idx);
|
||||
}
|
||||
|
||||
|
||||
// Tries to (un)set a mask atomically, and returns true if the mask bits atomically transitioned from 0 to mask (or mask to 0)
|
||||
// and false otherwise (leaving the bit field as is).
|
||||
static bool mi_bfield_atomic_try_xset_mask(mi_bit_t set, _Atomic(mi_bfield_t)* b, mi_bfield_t mask ) {
|
||||
mi_assert_internal(mask != 0);
|
||||
if (set) {
|
||||
mi_bfield_t old = *b;
|
||||
do {
|
||||
if ((old&mask) != 0) return false; // the mask bits are no longer 0
|
||||
} while (!mi_atomic_cas_weak_acq_rel(b, &old, old|mask)); // try to atomically set the mask bits
|
||||
return true;
|
||||
}
|
||||
else { // clear
|
||||
mi_bfield_t old = *b;
|
||||
do {
|
||||
if ((old&mask) != mask) return false; // the mask bits are no longer set
|
||||
} while (!mi_atomic_cas_weak_acq_rel(b, &old, old&~mask)); // try to atomically clear the mask bits
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Check if all bits corresponding to a mask are set/cleared.
|
||||
static bool mi_bfield_atomic_is_xset_mask(mi_bit_t set, _Atomic(mi_bfield_t)*b, mi_bfield_t mask) {
|
||||
mi_assert_internal(mask != 0);
|
||||
if (set) {
|
||||
return ((*b & mask) == mask);
|
||||
}
|
||||
else {
|
||||
return ((*b & mask) == 0);
|
||||
}
|
||||
}
|
||||
|
||||
// Tries to set/clear a byte atomically, and returns true if the byte atomically transitioned from 0 to 0xFF (or 0xFF to 0)
|
||||
// and false otherwise (leaving the bit field as is).
|
||||
static bool mi_bfield_atomic_try_xset8(mi_bit_t set, _Atomic(mi_bfield_t)* b, size_t byte_idx ) {
|
||||
mi_assert_internal(byte_idx < MI_BFIELD_SIZE);
|
||||
const mi_bfield_t mask = ((mi_bfield_t)0xFF)<<(byte_idx*8);
|
||||
return mi_bfield_atomic_try_xset_mask(set,b,mask);
|
||||
}
|
||||
|
||||
|
||||
/* --------------------------------------------------------------------------------
|
||||
bitmap chunks
|
||||
-------------------------------------------------------------------------------- */
|
||||
|
||||
static bool mi_bitmap_chunk_try_xset(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx ) {
|
||||
mi_assert_internal(cidx < MI_BITMAP_CHUNK_BITS);
|
||||
const size_t i = cidx / MI_BFIELD_BITS;
|
||||
const size_t idx = cidx % MI_BFIELD_BITS;
|
||||
return mi_bfield_atomic_try_xset( set, &chunk->bfields[i], idx);
|
||||
}
|
||||
|
||||
static bool mi_bitmap_chunk_try_xset8(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t byte_idx ) {
|
||||
mi_assert_internal(byte_idx*8 < MI_BITMAP_CHUNK_BITS);
|
||||
const size_t i = byte_idx / MI_BFIELD_SIZE;
|
||||
const size_t ibyte_idx = byte_idx % MI_BFIELD_SIZE;
|
||||
return mi_bfield_atomic_try_xset8( set, &chunk->bfields[i], ibyte_idx);
|
||||
}
|
||||
|
||||
// Set/clear a sequence of `n` bits within a chunk. Returns true if all bits transitioned from 0 to 1 (or 1 to 0)
|
||||
static bool mi_bitmap_chunk_xsetN(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx, size_t n, bool* palready_xset) {
|
||||
mi_assert_internal(cidx + n < MI_BITMAP_CHUNK_BITS);
|
||||
mi_assert_internal(n>0);
|
||||
bool all_transition = true;
|
||||
bool all_already_xset = true;
|
||||
size_t idx = cidx % MI_BFIELD_BITS;
|
||||
size_t field = cidx / MI_BFIELD_BITS;
|
||||
while (n > 0) {
|
||||
size_t m = MI_BFIELD_BITS - idx; // m is the bits to xset in this field
|
||||
if (m > n) { m = n; }
|
||||
mi_assert_internal(idx + m <= MI_BFIELD_BITS);
|
||||
mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS);
|
||||
const size_t mask = (m == MI_BFIELD_BITS ? ~MI_ZU(0) : ((MI_ZU(1)<<m)-1) << idx);
|
||||
bool already_xset;
|
||||
all_transition = all_transition && mi_bfield_atomic_xset_mask(set, &chunk->bfields[field], mask, &already_xset);
|
||||
all_already_xset = all_already_xset && already_xset;
|
||||
// next field
|
||||
field++;
|
||||
idx = 0;
|
||||
n -= m;
|
||||
}
|
||||
*palready_xset = all_already_xset;
|
||||
return all_transition;
|
||||
}
|
||||
|
||||
// Check if a sequence of `n` bits within a chunk are all set/cleared.
|
||||
static bool mi_bitmap_chunk_is_xsetN(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx, size_t n) {
|
||||
mi_assert_internal(cidx + n < MI_BITMAP_CHUNK_BITS);
|
||||
mi_assert_internal(n>0);
|
||||
bool all_xset = true;
|
||||
size_t idx = cidx % MI_BFIELD_BITS;
|
||||
size_t field = cidx / MI_BFIELD_BITS;
|
||||
while (n > 0) {
|
||||
size_t m = MI_BFIELD_BITS - idx; // m is the bits to xset in this field
|
||||
if (m > n) { m = n; }
|
||||
mi_assert_internal(idx + m <= MI_BFIELD_BITS);
|
||||
mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS);
|
||||
const size_t mask = (m == MI_BFIELD_BITS ? ~MI_ZU(0) : ((MI_ZU(1)<<m)-1) << idx);
|
||||
all_xset = all_xset && mi_bfield_atomic_is_xset_mask(set, &chunk->bfields[field], mask);
|
||||
// next field
|
||||
field++;
|
||||
idx = 0;
|
||||
n -= m;
|
||||
}
|
||||
return all_xset;
|
||||
}
|
||||
|
||||
// Try to atomically set/clear a sequence of `n` bits within a chunk. Returns true if all bits transitioned from 0 to 1 (or 1 to 0),
|
||||
// and false otherwise leaving all bit fields as is.
|
||||
static bool mi_bitmap_chunk_try_xsetN(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx, size_t n) {
|
||||
mi_assert_internal(cidx + n < MI_BITMAP_CHUNK_BITS);
|
||||
mi_assert_internal(n>0);
|
||||
if (n==0) return true;
|
||||
size_t start_idx = cidx % MI_BFIELD_BITS;
|
||||
size_t start_field = cidx / MI_BFIELD_BITS;
|
||||
size_t end_field = MI_BITMAP_CHUNK_FIELDS;
|
||||
size_t mask_mid = 0;
|
||||
size_t mask_end = 0;
|
||||
|
||||
// first field
|
||||
size_t field = start_field;
|
||||
size_t m = MI_BFIELD_BITS - start_idx; // m is the bits to xset in this field
|
||||
if (m > n) { m = n; }
|
||||
mi_assert_internal(start_idx + m <= MI_BFIELD_BITS);
|
||||
mi_assert_internal(start_field < MI_BITMAP_CHUNK_FIELDS);
|
||||
const size_t mask_start = (m == MI_BFIELD_BITS ? ~MI_ZU(0) : ((MI_ZU(1)<<m)-1) << start_idx);
|
||||
if (!mi_bfield_atomic_try_xset_mask(set, &chunk->bfields[field], mask_start)) return false;
|
||||
|
||||
// done?
|
||||
n -= m;
|
||||
if (n==0) return true;
|
||||
|
||||
// continue with mid fields and last field: if these fail we need to recover by unsetting previous fields
|
||||
|
||||
// mid fields
|
||||
while (n >= MI_BFIELD_BITS) {
|
||||
field++;
|
||||
mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS);
|
||||
mask_mid = ~MI_ZU(0);
|
||||
if (!mi_bfield_atomic_try_xset_mask(set, &chunk->bfields[field], mask_mid)) goto restore;
|
||||
n -= MI_BFIELD_BITS;
|
||||
}
|
||||
|
||||
// last field
|
||||
if (n > 0) {
|
||||
mi_assert_internal(n < MI_BFIELD_BITS);
|
||||
field++;
|
||||
mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS);
|
||||
end_field = field;
|
||||
mask_end = (MI_ZU(1)<<n)-1;
|
||||
if (!mi_bfield_atomic_try_xset_mask(set, &chunk->bfields[field], mask_end)) goto restore;
|
||||
}
|
||||
|
||||
return true;
|
||||
|
||||
restore:
|
||||
// field is on the field that failed to set atomically; we need to restore all previous fields
|
||||
mi_assert_internal(field > start_field);
|
||||
while( field > start_field) {
|
||||
field--;
|
||||
const size_t mask = (field == start_field ? mask_start : (field == end_field ? mask_end : mask_mid));
|
||||
bool already_xset;
|
||||
mi_bfield_atomic_xset_mask(!set, &chunk->bfields[field], mask, &already_xset);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
// find least 1-bit in a chunk and try unset it atomically
|
||||
// set `*pidx` to thi bit index (0 <= *pidx < MI_BITMAP_CHUNK_BITS) on success.
|
||||
// todo: try neon version
|
||||
static inline bool mi_bitmap_chunk_find_and_try_clear(mi_bitmap_chunk_t* chunk, size_t* pidx) {
|
||||
#if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256)
|
||||
while(true) {
|
||||
const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
|
||||
if (_mm256_testz_si256(vec,vec)) return false; // vec == 0 ?
|
||||
const __m256i vcmp = _mm256_cmpeq_epi64(vec, _mm256_setzero_si256()); // (elem64 == 0 ? -1 : 0)
|
||||
const uint32_t mask = ~_mm256_movemask_epi8(vcmp); // mask of most significant bit of each byte (so each 8 bits in the mask will be all 1 or all 0)
|
||||
mi_assert_internal(mask != 0);
|
||||
const size_t chunk_idx = _tzcnt_u32(mask) / 8; // tzcnt == 0, 8, 16, or 24
|
||||
mi_assert_internal(chunk_idx < MI_BITMAP_CHUNK_FIELDS);
|
||||
size_t cidx;
|
||||
if (mi_bfield_find_least_bit(chunk->bfields[chunk_idx],&cidx)) { // find the bit that is set
|
||||
if mi_likely(mi_bfield_atomic_try_xset(MI_BIT_CLEAR,&chunk->bfields[chunk_idx], cidx)) { // unset atomically
|
||||
*pidx = (chunk_idx*MI_BFIELD_BITS) + cidx;
|
||||
mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
// try again
|
||||
}
|
||||
#else
|
||||
size_t idx;
|
||||
for(int i = 0; i < MI_BITMAP_CHUNK_FIELDS; i++) {
|
||||
size_t idx;
|
||||
if mi_unlikely(mi_bfield_find_least_bit(chunk->bfields[i],&idx)) { // find least 1-bit
|
||||
if mi_likely(mi_bfield_atomic_try_xset(MI_BIT_CLEAR,&chunk->bfields[i],idx)) { // try unset atomically
|
||||
*pidx = (i*MI_BFIELD_BITS + idx);
|
||||
mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
// find least byte in a chunk with all bits set, and try unset it atomically
|
||||
// set `*pidx` to its bit index (0 <= *pidx < MI_BITMAP_CHUNK_BITS) on success.
|
||||
// todo: try neon version
|
||||
static inline bool mi_bitmap_chunk_find_and_try_clear8(mi_bitmap_chunk_t* chunk, size_t* pidx) {
|
||||
#if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256)
|
||||
while(true) {
|
||||
const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
|
||||
const __m256i vcmp = _mm256_cmpeq_epi8(vec, _mm256_set1_epi64x(~0)); // (byte == ~0 ? -1 : 0)
|
||||
const uint32_t mask = _mm256_movemask_epi8(vcmp); // mask of most significant bit of each byte
|
||||
if (mask == 0) return false;
|
||||
const size_t i = _tzcnt_u32(mask);
|
||||
mi_assert_internal(8*i < MI_BITMAP_CHUNK_BITS);
|
||||
const size_t chunk_idx = i / MI_BFIELD_SIZE;
|
||||
const size_t byte_idx = i % MI_BFIELD_SIZE;
|
||||
if mi_likely(mi_bfield_atomic_try_xset8(MI_BIT_CLEAR,&chunk->bfields[chunk_idx],byte_idx)) { // try to unset atomically
|
||||
*pidx = (chunk_idx*MI_BFIELD_BITS) + (byte_idx*8);
|
||||
mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS);
|
||||
return true;
|
||||
}
|
||||
// try again
|
||||
}
|
||||
#else
|
||||
size_t idx;
|
||||
for(int i = 0; i < MI_BITMAP_CHUNK_FIELDS; i++) {
|
||||
const mi_bfield_t x = chunk->bfields[i];
|
||||
// has_set8 has low bit in each byte set if the byte in x == 0xFF
|
||||
const mi_bfield_t has_set8 = ((~x - MI_BFIELD_LO_BIT8) & // high bit set if byte in x is 0xFF or < 0x7F
|
||||
(x & MI_BFIELD_HI_BIT8)) // high bit set if byte in x is >= 0x80
|
||||
>> 7; // shift high bit to low bit
|
||||
size_t idx;
|
||||
if mi_unlikely(mi_bfield_find_least_bit(has_set8,&idx)) { // find least 1-bit
|
||||
mi_assert_internal(idx <= (MI_BFIELD_BITS - 8));
|
||||
mi_assert_internal((idx%8)==0);
|
||||
const size_t byte_idx = idx/8;
|
||||
if mi_likely(mi_bfield_atomic_try_xset8(MI_BIT_CLEAR,&chunk->bfields[i],byte_idx)) { // unset the byte atomically
|
||||
*pidx = (i*MI_BFIELD_BITS) + idx;
|
||||
mi_assert_internal(*pidx + 8 <= MI_BITMAP_CHUNK_BITS);
|
||||
return true;
|
||||
}
|
||||
// else continue
|
||||
}
|
||||
}
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
// find a sequence of `n` bits in a chunk with all `n` bits set, and try unset it atomically
|
||||
// set `*pidx` to its bit index (0 <= *pidx <= MI_BITMAP_CHUNK_BITS - n) on success.
|
||||
// todo: try avx2 and neon version
|
||||
// todo: allow spanning across bfield boundaries?
|
||||
static inline bool mi_bitmap_chunk_find_and_try_clearN(mi_bitmap_chunk_t* chunk, size_t n, size_t* pidx) {
|
||||
if (n == 0 || n > MI_BFIELD_BITS) return false; // TODO: allow larger?
|
||||
const mi_bfield_t mask = (n==MI_BFIELD_BITS ? ~((mi_bfield_t)0) : (((mi_bfield_t)1) << n)-1);
|
||||
for(int i = 0; i < MI_BITMAP_CHUNK_FIELDS; i++) {
|
||||
mi_bfield_t b = chunk->bfields[i];
|
||||
size_t bshift = 0;
|
||||
size_t idx;
|
||||
while (mi_bfield_find_least_bit(b, &idx)) { // find least 1-bit
|
||||
b >>= idx;
|
||||
bshift += idx;
|
||||
if (bshift + n >= MI_BFIELD_BITS) break;
|
||||
|
||||
if ((b&mask) == mask) { // found a match
|
||||
mi_assert_internal( ((mask << bshift) >> bshift) == mask );
|
||||
if mi_likely(mi_bfield_atomic_try_xset_mask(MI_BIT_CLEAR,&chunk->bfields[i],mask<<bshift)) {
|
||||
*pidx = (i*MI_BFIELD_BITS) + bshift;
|
||||
mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS);
|
||||
mi_assert_internal(*pidx + n <= MI_BITMAP_CHUNK_BITS);
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
// if failed to atomically commit, try again from this position
|
||||
b = (chunk->bfields[i] >> bshift);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// advance
|
||||
const size_t ones = mi_bfield_ctz(~b); // skip all ones (since it didn't fit the mask)
|
||||
mi_assert_internal(ones>0);
|
||||
bshift += ones;
|
||||
b >>= ones;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
// are all bits in a bitmap chunk set?
|
||||
static bool mi_bitmap_chunk_all_are_set(mi_bitmap_chunk_t* chunk) {
|
||||
#if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256)
|
||||
const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
|
||||
return _mm256_test_all_ones(vec);
|
||||
#else
|
||||
// written like this for vectorization
|
||||
mi_bfield_t x = chunk->bfields[0];
|
||||
for(int i = 1; i < MI_BITMAP_CHUNK_FIELDS; i++) {
|
||||
x = x & chunk->bfields[i];
|
||||
}
|
||||
return (~x == 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
// are all bits in a bitmap chunk clear?
|
||||
static bool mi_bitmap_chunk_all_are_clear(mi_bitmap_chunk_t* chunk) {
|
||||
#if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256)
|
||||
const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
|
||||
return _mm256_testz_si256( vec, vec );
|
||||
#else
|
||||
// written like this for vectorization
|
||||
mi_bfield_t x = chunk->bfields[0];
|
||||
for(int i = 1; i < MI_BITMAP_CHUNK_FIELDS; i++) {
|
||||
x = x | chunk->bfields[i];
|
||||
}
|
||||
return (x == 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------------------
|
||||
bitmap
|
||||
-------------------------------------------------------------------------------- */
|
||||
// initialize a bitmap to all unset; avoid a mem_zero if `already_zero` is true
|
||||
void mi_bitmap_init(mi_bitmap_t* bitmap, bool already_zero) {
|
||||
if (!already_zero) {
|
||||
_mi_memzero_aligned(bitmap, sizeof(*bitmap));
|
||||
}
|
||||
}
|
||||
|
||||
// Set/clear a sequence of `n` bits in the bitmap (and can cross chunks). Not atomic so only use if local to a thread.
|
||||
void mi_bitmap_unsafe_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) {
|
||||
mi_assert_internal(n>0);
|
||||
mi_assert_internal(idx + n<=MI_BITMAP_MAX_BITS);
|
||||
|
||||
// first chunk
|
||||
size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
|
||||
const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
|
||||
size_t m = MI_BITMAP_CHUNK_BITS - cidx;
|
||||
if (m > n) { m = n; }
|
||||
bool already_xset;
|
||||
mi_bitmap_chunk_xsetN(set, &bitmap->chunks[chunk_idx], cidx, m, &already_xset);
|
||||
|
||||
// n can be large so use memset for efficiency for all in-between chunks
|
||||
chunk_idx++;
|
||||
n -= m;
|
||||
const size_t mid_chunks = n / MI_BITMAP_CHUNK_BITS;
|
||||
if (mid_chunks > 0) {
|
||||
_mi_memset(&bitmap->chunks[chunk_idx], (set ? ~0 : 0), MI_BITMAP_CHUNK_BITS/8);
|
||||
chunk_idx += mid_chunks;
|
||||
n -= mid_chunks * MI_BITMAP_CHUNK_BITS;
|
||||
}
|
||||
|
||||
// last chunk
|
||||
if (n > 0) {
|
||||
mi_assert_internal(n < MI_BITMAP_CHUNK_BITS);
|
||||
mi_assert_internal(chunk_idx < MI_BITMAP_CHUNK_FIELDS);
|
||||
mi_bitmap_chunk_xsetN(set, &bitmap->chunks[chunk_idx], 0, n, &already_xset);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Try to set/clear a bit in the bitmap; returns `true` if atomically transitioned from 0 to 1 (or 1 to 0),
|
||||
// and false otherwise leaving the bitmask as is.
|
||||
bool mi_bitmap_try_xset(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx) {
|
||||
mi_assert_internal(idx < MI_BITMAP_MAX_BITS);
|
||||
const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
|
||||
const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
|
||||
return mi_bitmap_chunk_try_xset( set, &bitmap->chunks[chunk_idx], cidx);
|
||||
}
|
||||
|
||||
// Try to set/clear a byte in the bitmap; returns `true` if atomically transitioned from 0 to 0xFF (or 0xFF to 0)
|
||||
// and false otherwise leaving the bitmask as is.
|
||||
bool mi_bitmap_try_xset8(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx) {
|
||||
mi_assert_internal(idx < MI_BITMAP_MAX_BITS);
|
||||
mi_assert_internal(idx%8 == 0);
|
||||
const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
|
||||
const size_t byte_idx = (idx % MI_BITMAP_CHUNK_BITS)/8;
|
||||
return mi_bitmap_chunk_try_xset8( set, &bitmap->chunks[chunk_idx],byte_idx);
|
||||
}
|
||||
|
||||
// Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's)
|
||||
// and false otherwise leaving the bitmask as is.
|
||||
// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)!
|
||||
bool mi_bitmap_try_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) {
|
||||
mi_assert_internal(n>0);
|
||||
mi_assert_internal(n<=MI_BITMAP_CHUNK_BITS);
|
||||
if (n==1) { return mi_bitmap_try_xset(set,bitmap,idx); }
|
||||
if (n==8) { return mi_bitmap_try_xset8(set,bitmap,idx); }
|
||||
|
||||
mi_assert_internal(idx + n <= MI_BITMAP_MAX_BITS);
|
||||
const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
|
||||
const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
|
||||
mi_assert_internal(cidx + n <= MI_BITMAP_CHUNK_BITS); // don't cross chunks (for now)
|
||||
if (cidx + n > MI_BITMAP_CHUNK_BITS) { n = MI_BITMAP_CHUNK_BITS - cidx; } // paranoia
|
||||
return mi_bitmap_chunk_try_xsetN( set, &bitmap->chunks[chunk_idx], cidx, n);
|
||||
}
|
||||
|
||||
// Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's).
|
||||
// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)!
|
||||
bool mi_bitmap_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n, bool* already_xset) {
|
||||
mi_assert_internal(n>0);
|
||||
mi_assert_internal(n<=MI_BITMAP_CHUNK_BITS);
|
||||
bool local_already_xset;
|
||||
if (already_xset==NULL) { already_xset = &local_already_xset; }
|
||||
// if (n==1) { return mi_bitmap_xset(set, bitmap, idx); }
|
||||
// if (n==8) { return mi_bitmap_xset8(set, bitmap, idx); }
|
||||
|
||||
mi_assert_internal(idx + n <= MI_BITMAP_MAX_BITS);
|
||||
const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
|
||||
const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
|
||||
mi_assert_internal(cidx + n <= MI_BITMAP_CHUNK_BITS); // don't cross chunks (for now)
|
||||
if (cidx + n > MI_BITMAP_CHUNK_BITS) { n = MI_BITMAP_CHUNK_BITS - cidx; } // paranoia
|
||||
return mi_bitmap_chunk_xsetN(set, &bitmap->chunks[chunk_idx], cidx, n, already_xset);
|
||||
}
|
||||
|
||||
// Is a sequence of n bits already all set/cleared?
|
||||
bool mi_bitmap_is_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) {
|
||||
mi_assert_internal(n>0);
|
||||
mi_assert_internal(n<=MI_BITMAP_CHUNK_BITS);
|
||||
mi_assert_internal(idx + n <= MI_BITMAP_MAX_BITS);
|
||||
const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
|
||||
const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
|
||||
mi_assert_internal(cidx + n <= MI_BITMAP_CHUNK_BITS); // don't cross chunks (for now)
|
||||
if (cidx + n > MI_BITMAP_CHUNK_BITS) { n = MI_BITMAP_CHUNK_BITS - cidx; } // paranoia
|
||||
return mi_bitmap_chunk_is_xsetN(set, &bitmap->chunks[chunk_idx], cidx, n);
|
||||
}
|
||||
|
||||
|
||||
#define mi_bitmap_forall_set_chunks(bitmap,start,decl_chunk_idx) \
|
||||
{ size_t _set_idx; \
|
||||
size_t _start = start % MI_BFIELD_BITS; \
|
||||
mi_bfield_t _any_set = mi_bfield_rotate_right(bitmap->any_set, _start); \
|
||||
while (mi_bfield_find_least_bit(_any_set,&_set_idx)) { \
|
||||
decl_chunk_idx = (_set_idx + _start) % MI_BFIELD_BITS;
|
||||
|
||||
#define mi_bitmap_forall_set_chunks_end() \
|
||||
_start += _set_idx+1; /* so chunk_idx stays valid */ \
|
||||
_any_set >>= _set_idx; /* skip scanned bits (and avoid UB with (idx+1)) */ \
|
||||
_any_set >>= 1; \
|
||||
} \
|
||||
}
|
||||
|
||||
// Find a set bit in a bitmap and atomically unset it. Returns true on success,
|
||||
// and in that case sets the index: `0 <= *pidx < MI_BITMAP_MAX_BITS`.
|
||||
// The low `MI_BFIELD_BITS` of start are used to set the start point of the search
|
||||
// (to reduce thread contention).
|
||||
bool mi_bitmap_try_find_and_clear(mi_bitmap_t* bitmap, size_t* pidx, size_t start) {
|
||||
mi_bitmap_forall_set_chunks(bitmap,start,size_t chunk_idx)
|
||||
{
|
||||
size_t cidx;
|
||||
if mi_likely(mi_bitmap_chunk_find_and_try_clear(&bitmap->chunks[chunk_idx],&cidx)) {
|
||||
*pidx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx;
|
||||
mi_assert_internal(*pidx < MI_BITMAP_MAX_BITS);
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
// we may find that all are unset only on a second iteration but that is ok as
|
||||
// _any_set is a conservative approximation.
|
||||
if (mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) {
|
||||
mi_bfield_atomic_xset(MI_BIT_CLEAR,&bitmap->any_set,chunk_idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
mi_bitmap_forall_set_chunks_end();
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
// Find a byte in the bitmap with all bits set (0xFF) and atomically unset it to zero.
|
||||
// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-8`.
|
||||
bool mi_bitmap_try_find_and_clear8(mi_bitmap_t* bitmap, size_t start, size_t* pidx ) {
|
||||
mi_bitmap_forall_set_chunks(bitmap,start,size_t chunk_idx)
|
||||
{
|
||||
size_t cidx;
|
||||
if mi_likely(mi_bitmap_chunk_find_and_try_clear8(&bitmap->chunks[chunk_idx],&cidx)) {
|
||||
*pidx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx;
|
||||
mi_assert_internal(*pidx <= MI_BITMAP_MAX_BITS-8);
|
||||
mi_assert_internal((*pidx % 8) == 0);
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
if (mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) {
|
||||
mi_bfield_atomic_xset(MI_BIT_CLEAR,&bitmap->any_set,chunk_idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
mi_bitmap_forall_set_chunks_end();
|
||||
return false;
|
||||
}
|
||||
|
||||
// Find a sequence of `n` bits in the bitmap with all bits set, and atomically unset all.
|
||||
// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-n`.
|
||||
bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t start, size_t n, size_t* pidx ) {
|
||||
// TODO: allow at least MI_BITMAP_CHUNK_BITS and probably larger
|
||||
// TODO: allow spanning across chunk boundaries
|
||||
if (n == 0 || n > MI_BFIELD_BITS) return false;
|
||||
mi_bitmap_forall_set_chunks(bitmap,start,size_t chunk_idx)
|
||||
{
|
||||
size_t cidx;
|
||||
if mi_likely(mi_bitmap_chunk_find_and_try_clearN(&bitmap->chunks[chunk_idx],n,&cidx)) {
|
||||
*pidx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx;
|
||||
mi_assert_internal(*pidx <= MI_BITMAP_MAX_BITS-n);
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
if (mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) {
|
||||
mi_bfield_atomic_xset(MI_BIT_CLEAR,&bitmap->any_set,chunk_idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
mi_bitmap_forall_set_chunks_end();
|
||||
return false;
|
||||
}
|
|
@ -1,94 +0,0 @@
|
|||
/* ----------------------------------------------------------------------------
|
||||
Copyright (c) 2019-2023 Microsoft Research, Daan Leijen
|
||||
This is free software; you can redistribute it and/or modify it under the
|
||||
terms of the MIT license. A copy of the license can be found in the file
|
||||
"LICENSE" at the root of this distribution.
|
||||
-----------------------------------------------------------------------------*/
|
||||
|
||||
/* ----------------------------------------------------------------------------
|
||||
Concurrent bitmap that can set/reset sequences of bits atomically
|
||||
---------------------------------------------------------------------------- */
|
||||
#pragma once
|
||||
#ifndef MI_XBITMAP_H
|
||||
#define MI_XBITMAP_H
|
||||
|
||||
/* --------------------------------------------------------------------------------
|
||||
Definitions
|
||||
-------------------------------------------------------------------------------- */
|
||||
|
||||
typedef size_t mi_bfield_t;
|
||||
|
||||
#define MI_BFIELD_BITS_SHIFT (MI_SIZE_SHIFT+3)
|
||||
#define MI_BFIELD_BITS (1 << MI_BFIELD_BITS_SHIFT)
|
||||
#define MI_BFIELD_SIZE (MI_BFIELD_BITS/8)
|
||||
#define MI_BFIELD_BITS_MOD_MASK (MI_BFIELD_BITS - 1)
|
||||
#define MI_BFIELD_LO_BIT8 ((~(mi_bfield_t(0)))/0xFF) // 0x01010101 ..
|
||||
#define MI_BFIELD_HI_BIT8 (MI_BFIELD_LO_BIT8 << 7) // 0x80808080 ..
|
||||
|
||||
#define MI_BITMAP_CHUNK_BITS_SHIFT (8) // 2^8 = 256 bits per chunk
|
||||
#define MI_BITMAP_CHUNK_BITS (1 << MI_BITMAP_CHUNK_BITS_SHIFT)
|
||||
#define MI_BITMAP_CHUNK_FIELDS (MI_BITMAP_CHUNK_BITS / MI_BFIELD_BITS)
|
||||
#define MI_BITMAP_CHUNK_BITS_MOD_MASK (MI_BITMAP_CHUNK_BITS - 1)
|
||||
|
||||
typedef mi_decl_align(32) struct mi_bitmap_chunk_s {
|
||||
_Atomic(mi_bfield_t) bfields[MI_BITMAP_CHUNK_FIELDS];
|
||||
} mi_bitmap_chunk_t;
|
||||
|
||||
|
||||
typedef mi_decl_align(32) struct mi_bitmap_s {
|
||||
mi_bitmap_chunk_t chunks[MI_BFIELD_BITS];
|
||||
_Atomic(mi_bfield_t)any_set;
|
||||
} mi_bitmap_t;
|
||||
|
||||
#define MI_BITMAP_MAX_BITS (MI_BFIELD_BITS * MI_BITMAP_CHUNK_BITS) // 16k bits on 64bit, 8k bits on 32bit
|
||||
|
||||
/* --------------------------------------------------------------------------------
|
||||
Bitmap
|
||||
-------------------------------------------------------------------------------- */
|
||||
|
||||
typedef bool mi_bit_t;
|
||||
#define MI_BIT_SET (true)
|
||||
#define MI_BIT_CLEAR (false)
|
||||
|
||||
// initialize a bitmap to all unset; avoid a mem_zero if `already_zero` is true
|
||||
void mi_bitmap_init(mi_bitmap_t* bitmap, bool already_zero);
|
||||
|
||||
// Set/clear a sequence of `n` bits in the bitmap (and can cross chunks). Not atomic so only use if local to a thread.
|
||||
void mi_bitmap_unsafe_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n);
|
||||
|
||||
// Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from all 0's to 1's (or all 1's to 0's).
|
||||
// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)!
|
||||
// If `already_xset` is not NULL, it is set to true if all the bits were already all set/cleared.
|
||||
bool mi_bitmap_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n, bool* already_xset);
|
||||
|
||||
// Is a sequence of n bits already all set/cleared?
|
||||
bool mi_bitmap_is_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n);
|
||||
|
||||
// Try to set/clear a bit in the bitmap; returns `true` if atomically transitioned from 0 to 1 (or 1 to 0)
|
||||
// and false otherwise leaving the bitmask as is.
|
||||
mi_decl_nodiscard bool mi_bitmap_try_xset(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx);
|
||||
|
||||
// Try to set/clear a byte in the bitmap; returns `true` if atomically transitioned from 0 to 0xFF (or 0xFF to 0)
|
||||
// and false otherwise leaving the bitmask as is.
|
||||
mi_decl_nodiscard bool mi_bitmap_try_xset8(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx);
|
||||
|
||||
// Try to set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's)
|
||||
// and false otherwise leaving the bitmask as is.
|
||||
// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)!
|
||||
mi_decl_nodiscard bool mi_bitmap_try_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n);
|
||||
|
||||
// Find a set bit in a bitmap and atomically unset it. Returns true on success,
|
||||
// and in that case sets the index: `0 <= *pidx < MI_BITMAP_MAX_BITS`.
|
||||
// The low `MI_BFIELD_BITS` of start are used to set the start point of the search
|
||||
// (to reduce thread contention).
|
||||
mi_decl_nodiscard bool mi_bitmap_try_find_and_clear(mi_bitmap_t* bitmap, size_t* pidx, size_t start);
|
||||
|
||||
// Find a byte in the bitmap with all bits set (0xFF) and atomically unset it to zero.
|
||||
// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-8`.
|
||||
mi_decl_nodiscard bool mi_bitmap_try_find_and_clear8(mi_bitmap_t* bitmap, size_t start, size_t* pidx );
|
||||
|
||||
// Find a sequence of `n` bits in the bitmap with all bits set, and atomically unset all.
|
||||
// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-n`.
|
||||
mi_decl_nodiscard bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t start, size_t n, size_t* pidx );
|
||||
|
||||
#endif // MI_XBITMAP_H
|
Loading…
Add table
Add a link
Reference in a new issue