wip: further progress on removing segments

This commit is contained in:
daanx 2024-11-29 10:40:18 -08:00
parent 71cfa45e76
commit 441d4fed9f
21 changed files with 2396 additions and 2492 deletions

View file

@ -82,7 +82,7 @@ extern inline void* _mi_page_malloc_zero(mi_heap_t* heap, mi_page_t* page, size_
#if (MI_STAT>0)
const size_t bsize = mi_page_usable_block_size(page);
if (bsize <= MI_LARGE_OBJ_SIZE_MAX) {
if (bsize <= MI_LARGE_MAX_OBJ_SIZE) {
mi_heap_stat_increase(heap, normal, bsize);
mi_heap_stat_counter_increase(heap, normal_count, 1);
#if (MI_STAT>1)

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

419
src/bitmap-old.c Normal file
View file

@ -0,0 +1,419 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2019-2023 Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
/* ----------------------------------------------------------------------------
Concurrent bitmap that can set/reset sequences of bits atomically,
represented as an array of fields where each field is a machine word (`size_t`)
There are two api's; the standard one cannot have sequences that cross
between the bitmap fields (and a sequence must be <= MI_BITMAP_FIELD_BITS).
The `_across` postfixed functions do allow sequences that can cross over
between the fields. (This is used in arena allocation)
---------------------------------------------------------------------------- */
#include "mimalloc.h"
#include "mimalloc/internal.h"
#include "mimalloc/bits.h"
#include "bitmap.h"
/* -----------------------------------------------------------
Bitmap definition
----------------------------------------------------------- */
// The bit mask for a given number of blocks at a specified bit index.
static inline size_t mi_bitmap_mask_(size_t count, size_t bitidx) {
mi_assert_internal(count + bitidx <= MI_BITMAP_FIELD_BITS);
mi_assert_internal(count > 0);
if (count >= MI_BITMAP_FIELD_BITS) return MI_BITMAP_FIELD_FULL;
if (count == 0) return 0;
return ((((size_t)1 << count) - 1) << bitidx);
}
/* -----------------------------------------------------------
Claim a bit sequence atomically
----------------------------------------------------------- */
// Try to atomically claim a sequence of `count` bits in a single
// field at `idx` in `bitmap`. Returns `true` on success.
bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx)
{
mi_assert_internal(bitmap_idx != NULL);
mi_assert_internal(count <= MI_BITMAP_FIELD_BITS);
mi_bitmap_field_t* field = &bitmap[idx];
size_t map = mi_atomic_load_relaxed(field);
if (map==MI_BITMAP_FIELD_FULL) return false; // short cut
// search for 0-bit sequence of length count
const size_t mask = mi_bitmap_mask_(count, 0);
const size_t bitidx_max = MI_BITMAP_FIELD_BITS - count;
#if MI_HAS_FAST_BITSCAN
size_t bitidx = mi_ctz(~map); // quickly find the first zero bit if possible
#else
size_t bitidx = 0; // otherwise start at 0
#endif
size_t m = (mask << bitidx); // invariant: m == mask shifted by bitidx
// scan linearly for a free range of zero bits
while (bitidx <= bitidx_max) {
const size_t mapm = (map & m);
if (mapm == 0) { // are the mask bits free at bitidx?
mi_assert_internal((m >> bitidx) == mask); // no overflow?
const size_t newmap = (map | m);
mi_assert_internal((newmap^map) >> bitidx == mask);
if (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)) { // TODO: use weak cas here?
// no success, another thread claimed concurrently.. keep going (with updated `map`)
continue;
}
else {
// success, we claimed the bits!
*bitmap_idx = mi_bitmap_index_create(idx, bitidx);
return true;
}
}
else {
// on to the next bit range
#if MI_HAS_FAST_BITSCAN
mi_assert_internal(mapm != 0);
const size_t shift = (count == 1 ? 1 : (MI_INTPTR_BITS - mi_clz(mapm) - bitidx));
mi_assert_internal(shift > 0 && shift <= count);
#else
const size_t shift = 1;
#endif
bitidx += shift;
m <<= shift;
}
}
// no bits found
return false;
}
// Starts at idx, and wraps around to search in all `bitmap_fields` fields.
// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never cross fields.
bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx) {
size_t idx = start_field_idx;
for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) {
if (idx >= bitmap_fields) { idx = 0; } // wrap
if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) {
return true;
}
}
return false;
}
// Set `count` bits at `bitmap_idx` to 0 atomically
// Returns `true` if all `count` bits were 1 previously.
bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
const size_t idx = mi_bitmap_index_field(bitmap_idx);
const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
const size_t mask = mi_bitmap_mask_(count, bitidx);
mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
// mi_assert_internal((bitmap[idx] & mask) == mask);
const size_t prev = mi_atomic_and_acq_rel(&bitmap[idx], ~mask);
return ((prev & mask) == mask);
}
// Set `count` bits at `bitmap_idx` to 1 atomically
// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
bool _mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_zero) {
const size_t idx = mi_bitmap_index_field(bitmap_idx);
const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
const size_t mask = mi_bitmap_mask_(count, bitidx);
mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
//mi_assert_internal(any_zero != NULL || (bitmap[idx] & mask) == 0);
size_t prev = mi_atomic_or_acq_rel(&bitmap[idx], mask);
if (any_zero != NULL) { *any_zero = ((prev & mask) != mask); }
return ((prev & mask) == 0);
}
// Returns `true` if all `count` bits were 1. `any_ones` is `true` if there was at least one bit set to one.
static bool mi_bitmap_is_claimedx(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_ones) {
const size_t idx = mi_bitmap_index_field(bitmap_idx);
const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
const size_t mask = mi_bitmap_mask_(count, bitidx);
mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
const size_t field = mi_atomic_load_relaxed(&bitmap[idx]);
if (any_ones != NULL) { *any_ones = ((field & mask) != 0); }
return ((field & mask) == mask);
}
// Try to set `count` bits at `bitmap_idx` from 0 to 1 atomically.
// Returns `true` if successful when all previous `count` bits were 0.
bool _mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
const size_t idx = mi_bitmap_index_field(bitmap_idx);
const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
const size_t mask = mi_bitmap_mask_(count, bitidx);
mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
size_t expected = mi_atomic_load_relaxed(&bitmap[idx]);
do {
if ((expected & mask) != 0) return false;
}
while (!mi_atomic_cas_strong_acq_rel(&bitmap[idx], &expected, expected | mask));
mi_assert_internal((expected & mask) == 0);
return true;
}
bool _mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
return mi_bitmap_is_claimedx(bitmap, bitmap_fields, count, bitmap_idx, NULL);
}
bool _mi_bitmap_is_any_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
bool any_ones;
mi_bitmap_is_claimedx(bitmap, bitmap_fields, count, bitmap_idx, &any_ones);
return any_ones;
}
//--------------------------------------------------------------------------
// the `_across` functions work on bitmaps where sequences can cross over
// between the fields. This is used in arena allocation
//--------------------------------------------------------------------------
// Try to atomically claim a sequence of `count` bits starting from the field
// at `idx` in `bitmap` and crossing into subsequent fields. Returns `true` on success.
// Only needs to consider crossing into the next fields (see `mi_bitmap_try_find_from_claim_across`)
static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t idx, const size_t count, const size_t retries, mi_bitmap_index_t* bitmap_idx, mi_stats_t* stats)
{
mi_assert_internal(bitmap_idx != NULL);
// check initial trailing zeros
mi_bitmap_field_t* field = &bitmap[idx];
size_t map = mi_atomic_load_relaxed(field);
const size_t initial = mi_clz(map); // count of initial zeros starting at idx
mi_assert_internal(initial <= MI_BITMAP_FIELD_BITS);
if (initial == 0) return false;
if (initial >= count) return _mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx); // no need to cross fields (this case won't happen for us)
if (_mi_divide_up(count - initial, MI_BITMAP_FIELD_BITS) >= (bitmap_fields - idx)) return false; // not enough entries
// scan ahead
size_t found = initial;
size_t mask = 0; // mask bits for the final field
while(found < count) {
field++;
map = mi_atomic_load_relaxed(field);
const size_t mask_bits = (found + MI_BITMAP_FIELD_BITS <= count ? MI_BITMAP_FIELD_BITS : (count - found));
mi_assert_internal(mask_bits > 0 && mask_bits <= MI_BITMAP_FIELD_BITS);
mask = mi_bitmap_mask_(mask_bits, 0);
if ((map & mask) != 0) return false; // some part is already claimed
found += mask_bits;
}
mi_assert_internal(field < &bitmap[bitmap_fields]);
// we found a range of contiguous zeros up to the final field; mask contains mask in the final field
// now try to claim the range atomically
mi_bitmap_field_t* const final_field = field;
const size_t final_mask = mask;
mi_bitmap_field_t* const initial_field = &bitmap[idx];
const size_t initial_idx = MI_BITMAP_FIELD_BITS - initial;
const size_t initial_mask = mi_bitmap_mask_(initial, initial_idx);
// initial field
size_t newmap;
field = initial_field;
map = mi_atomic_load_relaxed(field);
do {
newmap = (map | initial_mask);
if ((map & initial_mask) != 0) { goto rollback; };
} while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap));
// intermediate fields
while (++field < final_field) {
newmap = mi_bitmap_mask_(MI_BITMAP_FIELD_BITS, 0);
map = 0;
if (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)) { goto rollback; }
}
// final field
mi_assert_internal(field == final_field);
map = mi_atomic_load_relaxed(field);
do {
newmap = (map | final_mask);
if ((map & final_mask) != 0) { goto rollback; }
} while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap));
// claimed!
mi_stat_counter_increase(stats->arena_crossover_count,1);
*bitmap_idx = mi_bitmap_index_create(idx, initial_idx);
return true;
rollback:
// roll back intermediate fields
// (we just failed to claim `field` so decrement first)
while (--field > initial_field) {
newmap = 0;
map = mi_bitmap_mask_(MI_BITMAP_FIELD_BITS, 0);
mi_assert_internal(mi_atomic_load_relaxed(field) == map);
mi_atomic_store_release(field, newmap);
}
if (field == initial_field) { // (if we failed on the initial field, `field + 1 == initial_field`)
map = mi_atomic_load_relaxed(field);
do {
mi_assert_internal((map & initial_mask) == initial_mask);
newmap = (map & ~initial_mask);
} while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap));
}
mi_stat_counter_increase(stats->arena_rollback_count,1);
// retry? (we make a recursive call instead of goto to be able to use const declarations)
if (retries <= 2) {
return mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, retries+1, bitmap_idx, stats);
}
else {
return false;
}
}
// Find `count` bits of zeros and set them to 1 atomically; returns `true` on success.
// Starts at idx, and wraps around to search in all `bitmap_fields` fields.
bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx, mi_stats_t* stats) {
mi_assert_internal(count > 0);
if (count <= 2) {
// we don't bother with crossover fields for small counts
return _mi_bitmap_try_find_from_claim(bitmap, bitmap_fields, start_field_idx, count, bitmap_idx);
}
// visit the fields
size_t idx = start_field_idx;
for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) {
if (idx >= bitmap_fields) { idx = 0; } // wrap
// first try to claim inside a field
/*
if (count <= MI_BITMAP_FIELD_BITS) {
if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) {
return true;
}
}
*/
// if that fails, then try to claim across fields
if (mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, 0, bitmap_idx, stats)) {
return true;
}
}
return false;
}
// Helper for masks across fields; returns the mid count, post_mask may be 0
static size_t mi_bitmap_mask_across(mi_bitmap_index_t bitmap_idx, size_t bitmap_fields, size_t count, size_t* pre_mask, size_t* mid_mask, size_t* post_mask) {
MI_UNUSED(bitmap_fields);
const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
if mi_likely(bitidx + count <= MI_BITMAP_FIELD_BITS) {
*pre_mask = mi_bitmap_mask_(count, bitidx);
*mid_mask = 0;
*post_mask = 0;
mi_assert_internal(mi_bitmap_index_field(bitmap_idx) < bitmap_fields);
return 0;
}
else {
const size_t pre_bits = MI_BITMAP_FIELD_BITS - bitidx;
mi_assert_internal(pre_bits < count);
*pre_mask = mi_bitmap_mask_(pre_bits, bitidx);
count -= pre_bits;
const size_t mid_count = (count / MI_BITMAP_FIELD_BITS);
*mid_mask = MI_BITMAP_FIELD_FULL;
count %= MI_BITMAP_FIELD_BITS;
*post_mask = (count==0 ? 0 : mi_bitmap_mask_(count, 0));
mi_assert_internal(mi_bitmap_index_field(bitmap_idx) + mid_count + (count==0 ? 0 : 1) < bitmap_fields);
return mid_count;
}
}
// Set `count` bits at `bitmap_idx` to 0 atomically
// Returns `true` if all `count` bits were 1 previously.
bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
size_t idx = mi_bitmap_index_field(bitmap_idx);
size_t pre_mask;
size_t mid_mask;
size_t post_mask;
size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask);
bool all_one = true;
mi_bitmap_field_t* field = &bitmap[idx];
size_t prev = mi_atomic_and_acq_rel(field++, ~pre_mask); // clear first part
if ((prev & pre_mask) != pre_mask) all_one = false;
while(mid_count-- > 0) {
prev = mi_atomic_and_acq_rel(field++, ~mid_mask); // clear mid part
if ((prev & mid_mask) != mid_mask) all_one = false;
}
if (post_mask!=0) {
prev = mi_atomic_and_acq_rel(field, ~post_mask); // clear end part
if ((prev & post_mask) != post_mask) all_one = false;
}
return all_one;
}
// Set `count` bits at `bitmap_idx` to 1 atomically
// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero) {
size_t idx = mi_bitmap_index_field(bitmap_idx);
size_t pre_mask;
size_t mid_mask;
size_t post_mask;
size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask);
bool all_zero = true;
bool any_zero = false;
_Atomic(size_t)*field = &bitmap[idx];
size_t prev = mi_atomic_or_acq_rel(field++, pre_mask);
if ((prev & pre_mask) != 0) all_zero = false;
if ((prev & pre_mask) != pre_mask) any_zero = true;
while (mid_count-- > 0) {
prev = mi_atomic_or_acq_rel(field++, mid_mask);
if ((prev & mid_mask) != 0) all_zero = false;
if ((prev & mid_mask) != mid_mask) any_zero = true;
}
if (post_mask!=0) {
prev = mi_atomic_or_acq_rel(field, post_mask);
if ((prev & post_mask) != 0) all_zero = false;
if ((prev & post_mask) != post_mask) any_zero = true;
}
if (pany_zero != NULL) { *pany_zero = any_zero; }
return all_zero;
}
// Returns `true` if all `count` bits were 1.
// `any_ones` is `true` if there was at least one bit set to one.
static bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_ones) {
size_t idx = mi_bitmap_index_field(bitmap_idx);
size_t pre_mask;
size_t mid_mask;
size_t post_mask;
size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask);
bool all_ones = true;
bool any_ones = false;
mi_bitmap_field_t* field = &bitmap[idx];
size_t prev = mi_atomic_load_relaxed(field++);
if ((prev & pre_mask) != pre_mask) all_ones = false;
if ((prev & pre_mask) != 0) any_ones = true;
while (mid_count-- > 0) {
prev = mi_atomic_load_relaxed(field++);
if ((prev & mid_mask) != mid_mask) all_ones = false;
if ((prev & mid_mask) != 0) any_ones = true;
}
if (post_mask!=0) {
prev = mi_atomic_load_relaxed(field);
if ((prev & post_mask) != post_mask) all_ones = false;
if ((prev & post_mask) != 0) any_ones = true;
}
if (pany_ones != NULL) { *pany_ones = any_ones; }
return all_ones;
}
bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
return mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, NULL);
}
bool _mi_bitmap_is_any_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
bool any_ones;
mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, &any_ones);
return any_ones;
}

110
src/bitmap-old.h Normal file
View file

@ -0,0 +1,110 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2019-2023 Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
/* ----------------------------------------------------------------------------
Concurrent bitmap that can set/reset sequences of bits atomically,
represented as an array of fields where each field is a machine word (`size_t`)
There are two api's; the standard one cannot have sequences that cross
between the bitmap fields (and a sequence must be <= MI_BITMAP_FIELD_BITS).
(this is used in region allocation)
The `_across` postfixed functions do allow sequences that can cross over
between the fields. (This is used in arena allocation)
---------------------------------------------------------------------------- */
#pragma once
#ifndef MI_BITMAP_H
#define MI_BITMAP_H
/* -----------------------------------------------------------
Bitmap definition
----------------------------------------------------------- */
#define MI_BITMAP_FIELD_BITS (8*MI_SIZE_SIZE)
#define MI_BITMAP_FIELD_FULL (~((size_t)0)) // all bits set
// An atomic bitmap of `size_t` fields
typedef _Atomic(size_t) mi_bitmap_field_t;
typedef mi_bitmap_field_t* mi_bitmap_t;
// A bitmap index is the index of the bit in a bitmap.
typedef size_t mi_bitmap_index_t;
// Create a bit index.
static inline mi_bitmap_index_t mi_bitmap_index_create_ex(size_t idx, size_t bitidx) {
mi_assert_internal(bitidx <= MI_BITMAP_FIELD_BITS);
return (idx*MI_BITMAP_FIELD_BITS) + bitidx;
}
static inline mi_bitmap_index_t mi_bitmap_index_create(size_t idx, size_t bitidx) {
mi_assert_internal(bitidx < MI_BITMAP_FIELD_BITS);
return mi_bitmap_index_create_ex(idx,bitidx);
}
// Get the field index from a bit index.
static inline size_t mi_bitmap_index_field(mi_bitmap_index_t bitmap_idx) {
return (bitmap_idx / MI_BITMAP_FIELD_BITS);
}
// Get the bit index in a bitmap field
static inline size_t mi_bitmap_index_bit_in_field(mi_bitmap_index_t bitmap_idx) {
return (bitmap_idx % MI_BITMAP_FIELD_BITS);
}
// Get the full bit index
static inline size_t mi_bitmap_index_bit(mi_bitmap_index_t bitmap_idx) {
return bitmap_idx;
}
/* -----------------------------------------------------------
Claim a bit sequence atomically
----------------------------------------------------------- */
// Try to atomically claim a sequence of `count` bits in a single
// field at `idx` in `bitmap`. Returns `true` on success.
bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx);
// Starts at idx, and wraps around to search in all `bitmap_fields` fields.
// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never cross fields.
bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx);
// Set `count` bits at `bitmap_idx` to 0 atomically
// Returns `true` if all `count` bits were 1 previously.
bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
// Try to set `count` bits at `bitmap_idx` from 0 to 1 atomically.
// Returns `true` if successful when all previous `count` bits were 0.
bool _mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
// Set `count` bits at `bitmap_idx` to 1 atomically
// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
bool _mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_zero);
bool _mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
bool _mi_bitmap_is_any_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
//--------------------------------------------------------------------------
// the `_across` functions work on bitmaps where sequences can cross over
// between the fields. This is used in arena allocation
//--------------------------------------------------------------------------
// Find `count` bits of zeros and set them to 1 atomically; returns `true` on success.
// Starts at idx, and wraps around to search in all `bitmap_fields` fields.
bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx, mi_stats_t* stats);
// Set `count` bits at `bitmap_idx` to 0 atomically
// Returns `true` if all `count` bits were 1 previously.
bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
// Set `count` bits at `bitmap_idx` to 1 atomically
// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero);
bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
bool _mi_bitmap_is_any_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
#endif

View file

@ -1,19 +1,12 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2019-2023 Microsoft Research, Daan Leijen
Copyright (c) 2019-2024 Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
/* ----------------------------------------------------------------------------
Concurrent bitmap that can set/reset sequences of bits atomically,
represented as an array of fields where each field is a machine word (`size_t`)
There are two api's; the standard one cannot have sequences that cross
between the bitmap fields (and a sequence must be <= MI_BITMAP_FIELD_BITS).
The `_across` postfixed functions do allow sequences that can cross over
between the fields. (This is used in arena allocation)
Concurrent bitmap that can set/reset sequences of bits atomically
---------------------------------------------------------------------------- */
#include "mimalloc.h"
@ -21,399 +14,586 @@ between the fields. (This is used in arena allocation)
#include "mimalloc/bits.h"
#include "bitmap.h"
/* -----------------------------------------------------------
Bitmap definition
----------------------------------------------------------- */
/* --------------------------------------------------------------------------------
bfields
-------------------------------------------------------------------------------- */
// The bit mask for a given number of blocks at a specified bit index.
static inline size_t mi_bitmap_mask_(size_t count, size_t bitidx) {
mi_assert_internal(count + bitidx <= MI_BITMAP_FIELD_BITS);
mi_assert_internal(count > 0);
if (count >= MI_BITMAP_FIELD_BITS) return MI_BITMAP_FIELD_FULL;
if (count == 0) return 0;
return ((((size_t)1 << count) - 1) << bitidx);
static inline size_t mi_bfield_ctz(mi_bfield_t x) {
return mi_ctz(x);
}
static inline size_t mi_bfield_clz(mi_bfield_t x) {
return mi_clz(x);
}
// find the least significant bit that is set (i.e. count trailing zero's)
// return false if `x==0` (with `*idx` undefined) and true otherwise,
// with the `idx` is set to the bit index (`0 <= *idx < MI_BFIELD_BITS`).
static inline bool mi_bfield_find_least_bit(mi_bfield_t x, size_t* idx) {
return mi_bsf(x,idx);
}
static inline mi_bfield_t mi_bfield_rotate_right(mi_bfield_t x, size_t r) {
return mi_rotr(x,r);
}
// Set/clear a bit atomically. Returns `true` if the bit transitioned from 0 to 1 (or 1 to 0).
static inline bool mi_bfield_atomic_xset(mi_bit_t set, _Atomic(mi_bfield_t)*b, size_t idx) {
mi_assert_internal(idx < MI_BFIELD_BITS);
const mi_bfield_t mask = ((mi_bfield_t)1)<<idx;
if (set) {
const mi_bfield_t old = mi_atomic(fetch_or_explicit)(b, mask, mi_memory_order(acq_rel));
return ((old&mask) == 0);
}
else {
mi_bfield_t old = mi_atomic(fetch_and_explicit)(b, ~mask, mi_memory_order(acq_rel));
return ((old&mask) == mask);
}
}
// Set/clear a mask set of bits atomically, and return true of the mask bits transitioned from all 0's to 1's (or all 1's to 0's)
// `already_xset` is true if all bits for the mask were already set/cleared.
static bool mi_bfield_atomic_xset_mask(mi_bit_t set, _Atomic(mi_bfield_t)*b, mi_bfield_t mask, bool* already_xset) {
mi_assert_internal(mask != 0);
if (set) {
mi_bfield_t old = *b;
while (!mi_atomic_cas_weak_acq_rel(b, &old, old|mask)); // try to atomically set the mask bits until success
*already_xset = ((old&mask) == mask);
return ((old&mask) == 0);
}
else { // clear
mi_bfield_t old = *b;
while (!mi_atomic_cas_weak_acq_rel(b, &old, old&~mask)); // try to atomically clear the mask bits until success
*already_xset = ((old&mask) == 0);
return ((old&mask) == mask);
}
}
// Tries to set/clear a bit atomically, and returns true if the bit atomically transitioned from 0 to 1 (or 1 to 0)
static bool mi_bfield_atomic_try_xset( mi_bit_t set, _Atomic(mi_bfield_t)*b, size_t idx) {
mi_assert_internal(idx < MI_BFIELD_BITS);
// for a single bit, we can always just set/clear and test afterwards if it was actually us that changed it first
return mi_bfield_atomic_xset(set, b, idx);
}
// Tries to (un)set a mask atomically, and returns true if the mask bits atomically transitioned from 0 to mask (or mask to 0)
// and false otherwise (leaving the bit field as is).
static bool mi_bfield_atomic_try_xset_mask(mi_bit_t set, _Atomic(mi_bfield_t)* b, mi_bfield_t mask ) {
mi_assert_internal(mask != 0);
if (set) {
mi_bfield_t old = *b;
do {
if ((old&mask) != 0) return false; // the mask bits are no longer 0
} while (!mi_atomic_cas_weak_acq_rel(b, &old, old|mask)); // try to atomically set the mask bits
return true;
}
else { // clear
mi_bfield_t old = *b;
do {
if ((old&mask) != mask) return false; // the mask bits are no longer set
} while (!mi_atomic_cas_weak_acq_rel(b, &old, old&~mask)); // try to atomically clear the mask bits
return true;
}
}
/* -----------------------------------------------------------
Claim a bit sequence atomically
----------------------------------------------------------- */
// Check if all bits corresponding to a mask are set/cleared.
static bool mi_bfield_atomic_is_xset_mask(mi_bit_t set, _Atomic(mi_bfield_t)*b, mi_bfield_t mask) {
mi_assert_internal(mask != 0);
if (set) {
return ((*b & mask) == mask);
}
else {
return ((*b & mask) == 0);
}
}
// Try to atomically claim a sequence of `count` bits in a single
// field at `idx` in `bitmap`. Returns `true` on success.
bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx)
{
mi_assert_internal(bitmap_idx != NULL);
mi_assert_internal(count <= MI_BITMAP_FIELD_BITS);
mi_bitmap_field_t* field = &bitmap[idx];
size_t map = mi_atomic_load_relaxed(field);
if (map==MI_BITMAP_FIELD_FULL) return false; // short cut
// Tries to set/clear a byte atomically, and returns true if the byte atomically transitioned from 0 to 0xFF (or 0xFF to 0)
// and false otherwise (leaving the bit field as is).
static bool mi_bfield_atomic_try_xset8(mi_bit_t set, _Atomic(mi_bfield_t)* b, size_t byte_idx ) {
mi_assert_internal(byte_idx < MI_BFIELD_SIZE);
const mi_bfield_t mask = ((mi_bfield_t)0xFF)<<(byte_idx*8);
return mi_bfield_atomic_try_xset_mask(set,b,mask);
}
// search for 0-bit sequence of length count
const size_t mask = mi_bitmap_mask_(count, 0);
const size_t bitidx_max = MI_BITMAP_FIELD_BITS - count;
#if MI_HAS_FAST_BITSCAN
size_t bitidx = mi_ctz(~map); // quickly find the first zero bit if possible
#else
size_t bitidx = 0; // otherwise start at 0
#endif
size_t m = (mask << bitidx); // invariant: m == mask shifted by bitidx
/* --------------------------------------------------------------------------------
bitmap chunks
-------------------------------------------------------------------------------- */
// scan linearly for a free range of zero bits
while (bitidx <= bitidx_max) {
const size_t mapm = (map & m);
if (mapm == 0) { // are the mask bits free at bitidx?
mi_assert_internal((m >> bitidx) == mask); // no overflow?
const size_t newmap = (map | m);
mi_assert_internal((newmap^map) >> bitidx == mask);
if (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)) { // TODO: use weak cas here?
// no success, another thread claimed concurrently.. keep going (with updated `map`)
continue;
static bool mi_bitmap_chunk_try_xset(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx ) {
mi_assert_internal(cidx < MI_BITMAP_CHUNK_BITS);
const size_t i = cidx / MI_BFIELD_BITS;
const size_t idx = cidx % MI_BFIELD_BITS;
return mi_bfield_atomic_try_xset( set, &chunk->bfields[i], idx);
}
static bool mi_bitmap_chunk_try_xset8(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t byte_idx ) {
mi_assert_internal(byte_idx*8 < MI_BITMAP_CHUNK_BITS);
const size_t i = byte_idx / MI_BFIELD_SIZE;
const size_t ibyte_idx = byte_idx % MI_BFIELD_SIZE;
return mi_bfield_atomic_try_xset8( set, &chunk->bfields[i], ibyte_idx);
}
// Set/clear a sequence of `n` bits within a chunk. Returns true if all bits transitioned from 0 to 1 (or 1 to 0)
static bool mi_bitmap_chunk_xsetN(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx, size_t n, bool* palready_xset) {
mi_assert_internal(cidx + n < MI_BITMAP_CHUNK_BITS);
mi_assert_internal(n>0);
bool all_transition = true;
bool all_already_xset = true;
size_t idx = cidx % MI_BFIELD_BITS;
size_t field = cidx / MI_BFIELD_BITS;
while (n > 0) {
size_t m = MI_BFIELD_BITS - idx; // m is the bits to xset in this field
if (m > n) { m = n; }
mi_assert_internal(idx + m <= MI_BFIELD_BITS);
mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS);
const size_t mask = (m == MI_BFIELD_BITS ? ~MI_ZU(0) : ((MI_ZU(1)<<m)-1) << idx);
bool already_xset;
all_transition = all_transition && mi_bfield_atomic_xset_mask(set, &chunk->bfields[field], mask, &already_xset);
all_already_xset = all_already_xset && already_xset;
// next field
field++;
idx = 0;
n -= m;
}
*palready_xset = all_already_xset;
return all_transition;
}
// Check if a sequence of `n` bits within a chunk are all set/cleared.
static bool mi_bitmap_chunk_is_xsetN(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx, size_t n) {
mi_assert_internal(cidx + n < MI_BITMAP_CHUNK_BITS);
mi_assert_internal(n>0);
bool all_xset = true;
size_t idx = cidx % MI_BFIELD_BITS;
size_t field = cidx / MI_BFIELD_BITS;
while (n > 0) {
size_t m = MI_BFIELD_BITS - idx; // m is the bits to xset in this field
if (m > n) { m = n; }
mi_assert_internal(idx + m <= MI_BFIELD_BITS);
mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS);
const size_t mask = (m == MI_BFIELD_BITS ? ~MI_ZU(0) : ((MI_ZU(1)<<m)-1) << idx);
all_xset = all_xset && mi_bfield_atomic_is_xset_mask(set, &chunk->bfields[field], mask);
// next field
field++;
idx = 0;
n -= m;
}
return all_xset;
}
// Try to atomically set/clear a sequence of `n` bits within a chunk. Returns true if all bits transitioned from 0 to 1 (or 1 to 0),
// and false otherwise leaving all bit fields as is.
static bool mi_bitmap_chunk_try_xsetN(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx, size_t n) {
mi_assert_internal(cidx + n < MI_BITMAP_CHUNK_BITS);
mi_assert_internal(n>0);
if (n==0) return true;
size_t start_idx = cidx % MI_BFIELD_BITS;
size_t start_field = cidx / MI_BFIELD_BITS;
size_t end_field = MI_BITMAP_CHUNK_FIELDS;
size_t mask_mid = 0;
size_t mask_end = 0;
// first field
size_t field = start_field;
size_t m = MI_BFIELD_BITS - start_idx; // m is the bits to xset in this field
if (m > n) { m = n; }
mi_assert_internal(start_idx + m <= MI_BFIELD_BITS);
mi_assert_internal(start_field < MI_BITMAP_CHUNK_FIELDS);
const size_t mask_start = (m == MI_BFIELD_BITS ? ~MI_ZU(0) : ((MI_ZU(1)<<m)-1) << start_idx);
if (!mi_bfield_atomic_try_xset_mask(set, &chunk->bfields[field], mask_start)) return false;
// done?
n -= m;
if (n==0) return true;
// continue with mid fields and last field: if these fail we need to recover by unsetting previous fields
// mid fields
while (n >= MI_BFIELD_BITS) {
field++;
mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS);
mask_mid = ~MI_ZU(0);
if (!mi_bfield_atomic_try_xset_mask(set, &chunk->bfields[field], mask_mid)) goto restore;
n -= MI_BFIELD_BITS;
}
// last field
if (n > 0) {
mi_assert_internal(n < MI_BFIELD_BITS);
field++;
mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS);
end_field = field;
mask_end = (MI_ZU(1)<<n)-1;
if (!mi_bfield_atomic_try_xset_mask(set, &chunk->bfields[field], mask_end)) goto restore;
}
return true;
restore:
// field is on the field that failed to set atomically; we need to restore all previous fields
mi_assert_internal(field > start_field);
while( field > start_field) {
field--;
const size_t mask = (field == start_field ? mask_start : (field == end_field ? mask_end : mask_mid));
bool already_xset;
mi_bfield_atomic_xset_mask(!set, &chunk->bfields[field], mask, &already_xset);
}
return false;
}
// find least 1-bit in a chunk and try unset it atomically
// set `*pidx` to thi bit index (0 <= *pidx < MI_BITMAP_CHUNK_BITS) on success.
// todo: try neon version
static inline bool mi_bitmap_chunk_find_and_try_clear(mi_bitmap_chunk_t* chunk, size_t* pidx) {
#if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256)
while(true) {
const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
if (_mm256_testz_si256(vec,vec)) return false; // vec == 0 ?
const __m256i vcmp = _mm256_cmpeq_epi64(vec, _mm256_setzero_si256()); // (elem64 == 0 ? -1 : 0)
const uint32_t mask = ~_mm256_movemask_epi8(vcmp); // mask of most significant bit of each byte (so each 8 bits in the mask will be all 1 or all 0)
mi_assert_internal(mask != 0);
const size_t chunk_idx = _tzcnt_u32(mask) / 8; // tzcnt == 0, 8, 16, or 24
mi_assert_internal(chunk_idx < MI_BITMAP_CHUNK_FIELDS);
size_t cidx;
if (mi_bfield_find_least_bit(chunk->bfields[chunk_idx],&cidx)) { // find the bit that is set
if mi_likely(mi_bfield_atomic_try_xset(MI_BIT_CLEAR,&chunk->bfields[chunk_idx], cidx)) { // unset atomically
*pidx = (chunk_idx*MI_BFIELD_BITS) + cidx;
mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS);
return true;
}
}
// try again
}
#else
size_t idx;
for(int i = 0; i < MI_BITMAP_CHUNK_FIELDS; i++) {
size_t idx;
if mi_unlikely(mi_bfield_find_least_bit(chunk->bfields[i],&idx)) { // find least 1-bit
if mi_likely(mi_bfield_atomic_try_xset(MI_BIT_CLEAR,&chunk->bfields[i],idx)) { // try unset atomically
*pidx = (i*MI_BFIELD_BITS + idx);
mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS);
return true;
}
}
}
return false;
#endif
}
// find least byte in a chunk with all bits set, and try unset it atomically
// set `*pidx` to its bit index (0 <= *pidx < MI_BITMAP_CHUNK_BITS) on success.
// todo: try neon version
static inline bool mi_bitmap_chunk_find_and_try_clear8(mi_bitmap_chunk_t* chunk, size_t* pidx) {
#if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256)
while(true) {
const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
const __m256i vcmp = _mm256_cmpeq_epi8(vec, _mm256_set1_epi64x(~0)); // (byte == ~0 ? -1 : 0)
const uint32_t mask = _mm256_movemask_epi8(vcmp); // mask of most significant bit of each byte
if (mask == 0) return false;
const size_t i = _tzcnt_u32(mask);
mi_assert_internal(8*i < MI_BITMAP_CHUNK_BITS);
const size_t chunk_idx = i / MI_BFIELD_SIZE;
const size_t byte_idx = i % MI_BFIELD_SIZE;
if mi_likely(mi_bfield_atomic_try_xset8(MI_BIT_CLEAR,&chunk->bfields[chunk_idx],byte_idx)) { // try to unset atomically
*pidx = (chunk_idx*MI_BFIELD_BITS) + (byte_idx*8);
mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS);
return true;
}
// try again
}
#else
size_t idx;
for(int i = 0; i < MI_BITMAP_CHUNK_FIELDS; i++) {
const mi_bfield_t x = chunk->bfields[i];
// has_set8 has low bit in each byte set if the byte in x == 0xFF
const mi_bfield_t has_set8 = ((~x - MI_BFIELD_LO_BIT8) & // high bit set if byte in x is 0xFF or < 0x7F
(x & MI_BFIELD_HI_BIT8)) // high bit set if byte in x is >= 0x80
>> 7; // shift high bit to low bit
size_t idx;
if mi_unlikely(mi_bfield_find_least_bit(has_set8,&idx)) { // find least 1-bit
mi_assert_internal(idx <= (MI_BFIELD_BITS - 8));
mi_assert_internal((idx%8)==0);
const size_t byte_idx = idx/8;
if mi_likely(mi_bfield_atomic_try_xset8(MI_BIT_CLEAR,&chunk->bfields[i],byte_idx)) { // unset the byte atomically
*pidx = (i*MI_BFIELD_BITS) + idx;
mi_assert_internal(*pidx + 8 <= MI_BITMAP_CHUNK_BITS);
return true;
}
// else continue
}
}
return false;
#endif
}
// find a sequence of `n` bits in a chunk with all `n` bits set, and try unset it atomically
// set `*pidx` to its bit index (0 <= *pidx <= MI_BITMAP_CHUNK_BITS - n) on success.
// todo: try avx2 and neon version
// todo: allow spanning across bfield boundaries?
static inline bool mi_bitmap_chunk_find_and_try_clearN(mi_bitmap_chunk_t* chunk, size_t n, size_t* pidx) {
if (n == 0 || n > MI_BFIELD_BITS) return false; // TODO: allow larger?
const mi_bfield_t mask = (n==MI_BFIELD_BITS ? ~((mi_bfield_t)0) : (((mi_bfield_t)1) << n)-1);
for(int i = 0; i < MI_BITMAP_CHUNK_FIELDS; i++) {
mi_bfield_t b = chunk->bfields[i];
size_t bshift = 0;
size_t idx;
while (mi_bfield_find_least_bit(b, &idx)) { // find least 1-bit
b >>= idx;
bshift += idx;
if (bshift + n >= MI_BFIELD_BITS) break;
if ((b&mask) == mask) { // found a match
mi_assert_internal( ((mask << bshift) >> bshift) == mask );
if mi_likely(mi_bfield_atomic_try_xset_mask(MI_BIT_CLEAR,&chunk->bfields[i],mask<<bshift)) {
*pidx = (i*MI_BFIELD_BITS) + bshift;
mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS);
mi_assert_internal(*pidx + n <= MI_BITMAP_CHUNK_BITS);
return true;
}
else {
// if failed to atomically commit, try again from this position
b = (chunk->bfields[i] >> bshift);
}
}
else {
// success, we claimed the bits!
*bitmap_idx = mi_bitmap_index_create(idx, bitidx);
return true;
// advance
const size_t ones = mi_bfield_ctz(~b); // skip all ones (since it didn't fit the mask)
mi_assert_internal(ones>0);
bshift += ones;
b >>= ones;
}
}
}
return false;
}
// are all bits in a bitmap chunk set?
static bool mi_bitmap_chunk_all_are_set(mi_bitmap_chunk_t* chunk) {
#if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256)
const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
return _mm256_test_all_ones(vec);
#else
// written like this for vectorization
mi_bfield_t x = chunk->bfields[0];
for(int i = 1; i < MI_BITMAP_CHUNK_FIELDS; i++) {
x = x & chunk->bfields[i];
}
return (~x == 0);
#endif
}
// are all bits in a bitmap chunk clear?
static bool mi_bitmap_chunk_all_are_clear(mi_bitmap_chunk_t* chunk) {
#if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256)
const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
return _mm256_testz_si256( vec, vec );
#else
// written like this for vectorization
mi_bfield_t x = chunk->bfields[0];
for(int i = 1; i < MI_BITMAP_CHUNK_FIELDS; i++) {
x = x | chunk->bfields[i];
}
return (x == 0);
#endif
}
/* --------------------------------------------------------------------------------
bitmap
-------------------------------------------------------------------------------- */
// initialize a bitmap to all unset; avoid a mem_zero if `already_zero` is true
void mi_bitmap_init(mi_bitmap_t* bitmap, bool already_zero) {
if (!already_zero) {
_mi_memzero_aligned(bitmap, sizeof(*bitmap));
}
}
// Set/clear a sequence of `n` bits in the bitmap (and can cross chunks). Not atomic so only use if local to a thread.
void mi_bitmap_unsafe_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) {
mi_assert_internal(n>0);
mi_assert_internal(idx + n<=MI_BITMAP_MAX_BITS);
// first chunk
size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
size_t m = MI_BITMAP_CHUNK_BITS - cidx;
if (m > n) { m = n; }
bool already_xset;
mi_bitmap_chunk_xsetN(set, &bitmap->chunks[chunk_idx], cidx, m, &already_xset);
// n can be large so use memset for efficiency for all in-between chunks
chunk_idx++;
n -= m;
const size_t mid_chunks = n / MI_BITMAP_CHUNK_BITS;
if (mid_chunks > 0) {
_mi_memset(&bitmap->chunks[chunk_idx], (set ? ~0 : 0), MI_BITMAP_CHUNK_BITS/8);
chunk_idx += mid_chunks;
n -= mid_chunks * MI_BITMAP_CHUNK_BITS;
}
// last chunk
if (n > 0) {
mi_assert_internal(n < MI_BITMAP_CHUNK_BITS);
mi_assert_internal(chunk_idx < MI_BITMAP_CHUNK_FIELDS);
mi_bitmap_chunk_xsetN(set, &bitmap->chunks[chunk_idx], 0, n, &already_xset);
}
}
// Try to set/clear a bit in the bitmap; returns `true` if atomically transitioned from 0 to 1 (or 1 to 0),
// and false otherwise leaving the bitmask as is.
bool mi_bitmap_try_xset(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx) {
mi_assert_internal(idx < MI_BITMAP_MAX_BITS);
const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
return mi_bitmap_chunk_try_xset( set, &bitmap->chunks[chunk_idx], cidx);
}
// Try to set/clear a byte in the bitmap; returns `true` if atomically transitioned from 0 to 0xFF (or 0xFF to 0)
// and false otherwise leaving the bitmask as is.
bool mi_bitmap_try_xset8(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx) {
mi_assert_internal(idx < MI_BITMAP_MAX_BITS);
mi_assert_internal(idx%8 == 0);
const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
const size_t byte_idx = (idx % MI_BITMAP_CHUNK_BITS)/8;
return mi_bitmap_chunk_try_xset8( set, &bitmap->chunks[chunk_idx],byte_idx);
}
// Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's)
// and false otherwise leaving the bitmask as is.
// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)!
bool mi_bitmap_try_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) {
mi_assert_internal(n>0);
mi_assert_internal(n<=MI_BITMAP_CHUNK_BITS);
if (n==1) { return mi_bitmap_try_xset(set,bitmap,idx); }
if (n==8) { return mi_bitmap_try_xset8(set,bitmap,idx); }
mi_assert_internal(idx + n <= MI_BITMAP_MAX_BITS);
const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
mi_assert_internal(cidx + n <= MI_BITMAP_CHUNK_BITS); // don't cross chunks (for now)
if (cidx + n > MI_BITMAP_CHUNK_BITS) { n = MI_BITMAP_CHUNK_BITS - cidx; } // paranoia
return mi_bitmap_chunk_try_xsetN( set, &bitmap->chunks[chunk_idx], cidx, n);
}
// Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's).
// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)!
bool mi_bitmap_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n, bool* already_xset) {
mi_assert_internal(n>0);
mi_assert_internal(n<=MI_BITMAP_CHUNK_BITS);
bool local_already_xset;
if (already_xset==NULL) { already_xset = &local_already_xset; }
// if (n==1) { return mi_bitmap_xset(set, bitmap, idx); }
// if (n==8) { return mi_bitmap_xset8(set, bitmap, idx); }
mi_assert_internal(idx + n <= MI_BITMAP_MAX_BITS);
const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
mi_assert_internal(cidx + n <= MI_BITMAP_CHUNK_BITS); // don't cross chunks (for now)
if (cidx + n > MI_BITMAP_CHUNK_BITS) { n = MI_BITMAP_CHUNK_BITS - cidx; } // paranoia
return mi_bitmap_chunk_xsetN(set, &bitmap->chunks[chunk_idx], cidx, n, already_xset);
}
// Is a sequence of n bits already all set/cleared?
bool mi_bitmap_is_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) {
mi_assert_internal(n>0);
mi_assert_internal(n<=MI_BITMAP_CHUNK_BITS);
mi_assert_internal(idx + n <= MI_BITMAP_MAX_BITS);
const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
mi_assert_internal(cidx + n <= MI_BITMAP_CHUNK_BITS); // don't cross chunks (for now)
if (cidx + n > MI_BITMAP_CHUNK_BITS) { n = MI_BITMAP_CHUNK_BITS - cidx; } // paranoia
return mi_bitmap_chunk_is_xsetN(set, &bitmap->chunks[chunk_idx], cidx, n);
}
#define mi_bitmap_forall_set_chunks(bitmap,start,decl_chunk_idx) \
{ size_t _set_idx; \
size_t _start = start % MI_BFIELD_BITS; \
mi_bfield_t _any_set = mi_bfield_rotate_right(bitmap->any_set, _start); \
while (mi_bfield_find_least_bit(_any_set,&_set_idx)) { \
decl_chunk_idx = (_set_idx + _start) % MI_BFIELD_BITS;
#define mi_bitmap_forall_set_chunks_end() \
_start += _set_idx+1; /* so chunk_idx stays valid */ \
_any_set >>= _set_idx; /* skip scanned bits (and avoid UB with (idx+1)) */ \
_any_set >>= 1; \
} \
}
// Find a set bit in a bitmap and atomically unset it. Returns true on success,
// and in that case sets the index: `0 <= *pidx < MI_BITMAP_MAX_BITS`.
// The low `MI_BFIELD_BITS` of start are used to set the start point of the search
// (to reduce thread contention).
bool mi_bitmap_try_find_and_clear(mi_bitmap_t* bitmap, size_t* pidx, size_t start) {
mi_bitmap_forall_set_chunks(bitmap,start,size_t chunk_idx)
{
size_t cidx;
if mi_likely(mi_bitmap_chunk_find_and_try_clear(&bitmap->chunks[chunk_idx],&cidx)) {
*pidx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx;
mi_assert_internal(*pidx < MI_BITMAP_MAX_BITS);
return true;
}
else {
// on to the next bit range
#if MI_HAS_FAST_BITSCAN
mi_assert_internal(mapm != 0);
const size_t shift = (count == 1 ? 1 : (MI_INTPTR_BITS - mi_clz(mapm) - bitidx));
mi_assert_internal(shift > 0 && shift <= count);
#else
const size_t shift = 1;
#endif
bitidx += shift;
m <<= shift;
}
}
// no bits found
return false;
}
// Starts at idx, and wraps around to search in all `bitmap_fields` fields.
// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never cross fields.
bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx) {
size_t idx = start_field_idx;
for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) {
if (idx >= bitmap_fields) { idx = 0; } // wrap
if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) {
return true;
}
}
return false;
}
// Set `count` bits at `bitmap_idx` to 0 atomically
// Returns `true` if all `count` bits were 1 previously.
bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
const size_t idx = mi_bitmap_index_field(bitmap_idx);
const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
const size_t mask = mi_bitmap_mask_(count, bitidx);
mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
// mi_assert_internal((bitmap[idx] & mask) == mask);
const size_t prev = mi_atomic_and_acq_rel(&bitmap[idx], ~mask);
return ((prev & mask) == mask);
}
// Set `count` bits at `bitmap_idx` to 1 atomically
// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
bool _mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_zero) {
const size_t idx = mi_bitmap_index_field(bitmap_idx);
const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
const size_t mask = mi_bitmap_mask_(count, bitidx);
mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
//mi_assert_internal(any_zero != NULL || (bitmap[idx] & mask) == 0);
size_t prev = mi_atomic_or_acq_rel(&bitmap[idx], mask);
if (any_zero != NULL) { *any_zero = ((prev & mask) != mask); }
return ((prev & mask) == 0);
}
// Returns `true` if all `count` bits were 1. `any_ones` is `true` if there was at least one bit set to one.
static bool mi_bitmap_is_claimedx(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_ones) {
const size_t idx = mi_bitmap_index_field(bitmap_idx);
const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
const size_t mask = mi_bitmap_mask_(count, bitidx);
mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
const size_t field = mi_atomic_load_relaxed(&bitmap[idx]);
if (any_ones != NULL) { *any_ones = ((field & mask) != 0); }
return ((field & mask) == mask);
}
// Try to set `count` bits at `bitmap_idx` from 0 to 1 atomically.
// Returns `true` if successful when all previous `count` bits were 0.
bool _mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
const size_t idx = mi_bitmap_index_field(bitmap_idx);
const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
const size_t mask = mi_bitmap_mask_(count, bitidx);
mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
size_t expected = mi_atomic_load_relaxed(&bitmap[idx]);
do {
if ((expected & mask) != 0) return false;
}
while (!mi_atomic_cas_strong_acq_rel(&bitmap[idx], &expected, expected | mask));
mi_assert_internal((expected & mask) == 0);
return true;
}
bool _mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
return mi_bitmap_is_claimedx(bitmap, bitmap_fields, count, bitmap_idx, NULL);
}
bool _mi_bitmap_is_any_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
bool any_ones;
mi_bitmap_is_claimedx(bitmap, bitmap_fields, count, bitmap_idx, &any_ones);
return any_ones;
}
//--------------------------------------------------------------------------
// the `_across` functions work on bitmaps where sequences can cross over
// between the fields. This is used in arena allocation
//--------------------------------------------------------------------------
// Try to atomically claim a sequence of `count` bits starting from the field
// at `idx` in `bitmap` and crossing into subsequent fields. Returns `true` on success.
// Only needs to consider crossing into the next fields (see `mi_bitmap_try_find_from_claim_across`)
static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t idx, const size_t count, const size_t retries, mi_bitmap_index_t* bitmap_idx, mi_stats_t* stats)
{
mi_assert_internal(bitmap_idx != NULL);
// check initial trailing zeros
mi_bitmap_field_t* field = &bitmap[idx];
size_t map = mi_atomic_load_relaxed(field);
const size_t initial = mi_clz(map); // count of initial zeros starting at idx
mi_assert_internal(initial <= MI_BITMAP_FIELD_BITS);
if (initial == 0) return false;
if (initial >= count) return _mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx); // no need to cross fields (this case won't happen for us)
if (_mi_divide_up(count - initial, MI_BITMAP_FIELD_BITS) >= (bitmap_fields - idx)) return false; // not enough entries
// scan ahead
size_t found = initial;
size_t mask = 0; // mask bits for the final field
while(found < count) {
field++;
map = mi_atomic_load_relaxed(field);
const size_t mask_bits = (found + MI_BITMAP_FIELD_BITS <= count ? MI_BITMAP_FIELD_BITS : (count - found));
mi_assert_internal(mask_bits > 0 && mask_bits <= MI_BITMAP_FIELD_BITS);
mask = mi_bitmap_mask_(mask_bits, 0);
if ((map & mask) != 0) return false; // some part is already claimed
found += mask_bits;
}
mi_assert_internal(field < &bitmap[bitmap_fields]);
// we found a range of contiguous zeros up to the final field; mask contains mask in the final field
// now try to claim the range atomically
mi_bitmap_field_t* const final_field = field;
const size_t final_mask = mask;
mi_bitmap_field_t* const initial_field = &bitmap[idx];
const size_t initial_idx = MI_BITMAP_FIELD_BITS - initial;
const size_t initial_mask = mi_bitmap_mask_(initial, initial_idx);
// initial field
size_t newmap;
field = initial_field;
map = mi_atomic_load_relaxed(field);
do {
newmap = (map | initial_mask);
if ((map & initial_mask) != 0) { goto rollback; };
} while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap));
// intermediate fields
while (++field < final_field) {
newmap = mi_bitmap_mask_(MI_BITMAP_FIELD_BITS, 0);
map = 0;
if (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)) { goto rollback; }
}
// final field
mi_assert_internal(field == final_field);
map = mi_atomic_load_relaxed(field);
do {
newmap = (map | final_mask);
if ((map & final_mask) != 0) { goto rollback; }
} while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap));
// claimed!
mi_stat_counter_increase(stats->arena_crossover_count,1);
*bitmap_idx = mi_bitmap_index_create(idx, initial_idx);
return true;
rollback:
// roll back intermediate fields
// (we just failed to claim `field` so decrement first)
while (--field > initial_field) {
newmap = 0;
map = mi_bitmap_mask_(MI_BITMAP_FIELD_BITS, 0);
mi_assert_internal(mi_atomic_load_relaxed(field) == map);
mi_atomic_store_release(field, newmap);
}
if (field == initial_field) { // (if we failed on the initial field, `field + 1 == initial_field`)
map = mi_atomic_load_relaxed(field);
do {
mi_assert_internal((map & initial_mask) == initial_mask);
newmap = (map & ~initial_mask);
} while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap));
}
mi_stat_counter_increase(stats->arena_rollback_count,1);
// retry? (we make a recursive call instead of goto to be able to use const declarations)
if (retries <= 2) {
return mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, retries+1, bitmap_idx, stats);
}
else {
return false;
}
}
// Find `count` bits of zeros and set them to 1 atomically; returns `true` on success.
// Starts at idx, and wraps around to search in all `bitmap_fields` fields.
bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx, mi_stats_t* stats) {
mi_assert_internal(count > 0);
if (count <= 2) {
// we don't bother with crossover fields for small counts
return _mi_bitmap_try_find_from_claim(bitmap, bitmap_fields, start_field_idx, count, bitmap_idx);
}
// visit the fields
size_t idx = start_field_idx;
for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) {
if (idx >= bitmap_fields) { idx = 0; } // wrap
// first try to claim inside a field
/*
if (count <= MI_BITMAP_FIELD_BITS) {
if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) {
return true;
// we may find that all are unset only on a second iteration but that is ok as
// _any_set is a conservative approximation.
if (mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) {
mi_bfield_atomic_xset(MI_BIT_CLEAR,&bitmap->any_set,chunk_idx);
}
}
*/
// if that fails, then try to claim across fields
if (mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, 0, bitmap_idx, stats)) {
return true;
}
}
mi_bitmap_forall_set_chunks_end();
return false;
}
// Helper for masks across fields; returns the mid count, post_mask may be 0
static size_t mi_bitmap_mask_across(mi_bitmap_index_t bitmap_idx, size_t bitmap_fields, size_t count, size_t* pre_mask, size_t* mid_mask, size_t* post_mask) {
MI_UNUSED(bitmap_fields);
const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
if mi_likely(bitidx + count <= MI_BITMAP_FIELD_BITS) {
*pre_mask = mi_bitmap_mask_(count, bitidx);
*mid_mask = 0;
*post_mask = 0;
mi_assert_internal(mi_bitmap_index_field(bitmap_idx) < bitmap_fields);
return 0;
}
else {
const size_t pre_bits = MI_BITMAP_FIELD_BITS - bitidx;
mi_assert_internal(pre_bits < count);
*pre_mask = mi_bitmap_mask_(pre_bits, bitidx);
count -= pre_bits;
const size_t mid_count = (count / MI_BITMAP_FIELD_BITS);
*mid_mask = MI_BITMAP_FIELD_FULL;
count %= MI_BITMAP_FIELD_BITS;
*post_mask = (count==0 ? 0 : mi_bitmap_mask_(count, 0));
mi_assert_internal(mi_bitmap_index_field(bitmap_idx) + mid_count + (count==0 ? 0 : 1) < bitmap_fields);
return mid_count;
// Find a byte in the bitmap with all bits set (0xFF) and atomically unset it to zero.
// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-8`.
bool mi_bitmap_try_find_and_clear8(mi_bitmap_t* bitmap, size_t start, size_t* pidx ) {
mi_bitmap_forall_set_chunks(bitmap,start,size_t chunk_idx)
{
size_t cidx;
if mi_likely(mi_bitmap_chunk_find_and_try_clear8(&bitmap->chunks[chunk_idx],&cidx)) {
*pidx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx;
mi_assert_internal(*pidx <= MI_BITMAP_MAX_BITS-8);
mi_assert_internal((*pidx % 8) == 0);
return true;
}
else {
if (mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) {
mi_bfield_atomic_xset(MI_BIT_CLEAR,&bitmap->any_set,chunk_idx);
}
}
}
mi_bitmap_forall_set_chunks_end();
return false;
}
// Set `count` bits at `bitmap_idx` to 0 atomically
// Returns `true` if all `count` bits were 1 previously.
bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
size_t idx = mi_bitmap_index_field(bitmap_idx);
size_t pre_mask;
size_t mid_mask;
size_t post_mask;
size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask);
bool all_one = true;
mi_bitmap_field_t* field = &bitmap[idx];
size_t prev = mi_atomic_and_acq_rel(field++, ~pre_mask); // clear first part
if ((prev & pre_mask) != pre_mask) all_one = false;
while(mid_count-- > 0) {
prev = mi_atomic_and_acq_rel(field++, ~mid_mask); // clear mid part
if ((prev & mid_mask) != mid_mask) all_one = false;
// Find a sequence of `n` bits in the bitmap with all bits set, and atomically unset all.
// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-n`.
bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t start, size_t n, size_t* pidx ) {
// TODO: allow at least MI_BITMAP_CHUNK_BITS and probably larger
// TODO: allow spanning across chunk boundaries
if (n == 0 || n > MI_BFIELD_BITS) return false;
mi_bitmap_forall_set_chunks(bitmap,start,size_t chunk_idx)
{
size_t cidx;
if mi_likely(mi_bitmap_chunk_find_and_try_clearN(&bitmap->chunks[chunk_idx],n,&cidx)) {
*pidx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx;
mi_assert_internal(*pidx <= MI_BITMAP_MAX_BITS-n);
return true;
}
else {
if (mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) {
mi_bfield_atomic_xset(MI_BIT_CLEAR,&bitmap->any_set,chunk_idx);
}
}
}
if (post_mask!=0) {
prev = mi_atomic_and_acq_rel(field, ~post_mask); // clear end part
if ((prev & post_mask) != post_mask) all_one = false;
}
return all_one;
}
// Set `count` bits at `bitmap_idx` to 1 atomically
// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero) {
size_t idx = mi_bitmap_index_field(bitmap_idx);
size_t pre_mask;
size_t mid_mask;
size_t post_mask;
size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask);
bool all_zero = true;
bool any_zero = false;
_Atomic(size_t)*field = &bitmap[idx];
size_t prev = mi_atomic_or_acq_rel(field++, pre_mask);
if ((prev & pre_mask) != 0) all_zero = false;
if ((prev & pre_mask) != pre_mask) any_zero = true;
while (mid_count-- > 0) {
prev = mi_atomic_or_acq_rel(field++, mid_mask);
if ((prev & mid_mask) != 0) all_zero = false;
if ((prev & mid_mask) != mid_mask) any_zero = true;
}
if (post_mask!=0) {
prev = mi_atomic_or_acq_rel(field, post_mask);
if ((prev & post_mask) != 0) all_zero = false;
if ((prev & post_mask) != post_mask) any_zero = true;
}
if (pany_zero != NULL) { *pany_zero = any_zero; }
return all_zero;
}
// Returns `true` if all `count` bits were 1.
// `any_ones` is `true` if there was at least one bit set to one.
static bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_ones) {
size_t idx = mi_bitmap_index_field(bitmap_idx);
size_t pre_mask;
size_t mid_mask;
size_t post_mask;
size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask);
bool all_ones = true;
bool any_ones = false;
mi_bitmap_field_t* field = &bitmap[idx];
size_t prev = mi_atomic_load_relaxed(field++);
if ((prev & pre_mask) != pre_mask) all_ones = false;
if ((prev & pre_mask) != 0) any_ones = true;
while (mid_count-- > 0) {
prev = mi_atomic_load_relaxed(field++);
if ((prev & mid_mask) != mid_mask) all_ones = false;
if ((prev & mid_mask) != 0) any_ones = true;
}
if (post_mask!=0) {
prev = mi_atomic_load_relaxed(field);
if ((prev & post_mask) != post_mask) all_ones = false;
if ((prev & post_mask) != 0) any_ones = true;
}
if (pany_ones != NULL) { *pany_ones = any_ones; }
return all_ones;
}
bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
return mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, NULL);
}
bool _mi_bitmap_is_any_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
bool any_ones;
mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, &any_ones);
return any_ones;
mi_bitmap_forall_set_chunks_end();
return false;
}

View file

@ -6,105 +6,87 @@ terms of the MIT license. A copy of the license can be found in the file
-----------------------------------------------------------------------------*/
/* ----------------------------------------------------------------------------
Concurrent bitmap that can set/reset sequences of bits atomically,
represented as an array of fields where each field is a machine word (`size_t`)
There are two api's; the standard one cannot have sequences that cross
between the bitmap fields (and a sequence must be <= MI_BITMAP_FIELD_BITS).
(this is used in region allocation)
The `_across` postfixed functions do allow sequences that can cross over
between the fields. (This is used in arena allocation)
Concurrent bitmap that can set/reset sequences of bits atomically
---------------------------------------------------------------------------- */
#pragma once
#ifndef MI_BITMAP_H
#define MI_BITMAP_H
/* -----------------------------------------------------------
Bitmap definition
----------------------------------------------------------- */
/* --------------------------------------------------------------------------------
Definitions
-------------------------------------------------------------------------------- */
#define MI_BITMAP_FIELD_BITS (8*MI_SIZE_SIZE)
#define MI_BITMAP_FIELD_FULL (~((size_t)0)) // all bits set
typedef size_t mi_bfield_t;
// An atomic bitmap of `size_t` fields
typedef _Atomic(size_t) mi_bitmap_field_t;
typedef mi_bitmap_field_t* mi_bitmap_t;
#define MI_BFIELD_BITS_SHIFT (MI_SIZE_SHIFT+3)
#define MI_BFIELD_BITS (1 << MI_BFIELD_BITS_SHIFT)
#define MI_BFIELD_SIZE (MI_BFIELD_BITS/8)
#define MI_BFIELD_BITS_MOD_MASK (MI_BFIELD_BITS - 1)
#define MI_BFIELD_LO_BIT8 ((~(mi_bfield_t(0)))/0xFF) // 0x01010101 ..
#define MI_BFIELD_HI_BIT8 (MI_BFIELD_LO_BIT8 << 7) // 0x80808080 ..
// A bitmap index is the index of the bit in a bitmap.
typedef size_t mi_bitmap_index_t;
#define MI_BITMAP_CHUNK_FIELDS (MI_BITMAP_CHUNK_BITS / MI_BFIELD_BITS)
#define MI_BITMAP_CHUNK_BITS_MOD_MASK (MI_BITMAP_CHUNK_BITS - 1)
// Create a bit index.
static inline mi_bitmap_index_t mi_bitmap_index_create_ex(size_t idx, size_t bitidx) {
mi_assert_internal(bitidx <= MI_BITMAP_FIELD_BITS);
return (idx*MI_BITMAP_FIELD_BITS) + bitidx;
}
static inline mi_bitmap_index_t mi_bitmap_index_create(size_t idx, size_t bitidx) {
mi_assert_internal(bitidx < MI_BITMAP_FIELD_BITS);
return mi_bitmap_index_create_ex(idx,bitidx);
}
// Get the field index from a bit index.
static inline size_t mi_bitmap_index_field(mi_bitmap_index_t bitmap_idx) {
return (bitmap_idx / MI_BITMAP_FIELD_BITS);
}
// Get the bit index in a bitmap field
static inline size_t mi_bitmap_index_bit_in_field(mi_bitmap_index_t bitmap_idx) {
return (bitmap_idx % MI_BITMAP_FIELD_BITS);
}
// Get the full bit index
static inline size_t mi_bitmap_index_bit(mi_bitmap_index_t bitmap_idx) {
return bitmap_idx;
}
/* -----------------------------------------------------------
Claim a bit sequence atomically
----------------------------------------------------------- */
// Try to atomically claim a sequence of `count` bits in a single
// field at `idx` in `bitmap`. Returns `true` on success.
bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx);
// Starts at idx, and wraps around to search in all `bitmap_fields` fields.
// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never cross fields.
bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx);
// Set `count` bits at `bitmap_idx` to 0 atomically
// Returns `true` if all `count` bits were 1 previously.
bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
// Try to set `count` bits at `bitmap_idx` from 0 to 1 atomically.
// Returns `true` if successful when all previous `count` bits were 0.
bool _mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
// Set `count` bits at `bitmap_idx` to 1 atomically
// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
bool _mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_zero);
bool _mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
bool _mi_bitmap_is_any_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
typedef mi_decl_align(32) struct mi_bitmap_chunk_s {
_Atomic(mi_bfield_t) bfields[MI_BITMAP_CHUNK_FIELDS];
} mi_bitmap_chunk_t;
//--------------------------------------------------------------------------
// the `_across` functions work on bitmaps where sequences can cross over
// between the fields. This is used in arena allocation
//--------------------------------------------------------------------------
typedef mi_decl_align(32) struct mi_bitmap_s {
mi_bitmap_chunk_t chunks[MI_BFIELD_BITS];
_Atomic(mi_bfield_t)any_set;
} mi_bitmap_t;
// Find `count` bits of zeros and set them to 1 atomically; returns `true` on success.
// Starts at idx, and wraps around to search in all `bitmap_fields` fields.
bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx, mi_stats_t* stats);
#define MI_BITMAP_MAX_BITS (MI_BFIELD_BITS * MI_BITMAP_CHUNK_BITS) // 16k bits on 64bit, 8k bits on 32bit
// Set `count` bits at `bitmap_idx` to 0 atomically
// Returns `true` if all `count` bits were 1 previously.
bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
/* --------------------------------------------------------------------------------
Bitmap
-------------------------------------------------------------------------------- */
// Set `count` bits at `bitmap_idx` to 1 atomically
// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero);
typedef bool mi_bit_t;
#define MI_BIT_SET (true)
#define MI_BIT_CLEAR (false)
bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
bool _mi_bitmap_is_any_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
// initialize a bitmap to all unset; avoid a mem_zero if `already_zero` is true
void mi_bitmap_init(mi_bitmap_t* bitmap, bool already_zero);
#endif
// Set/clear a sequence of `n` bits in the bitmap (and can cross chunks). Not atomic so only use if local to a thread.
void mi_bitmap_unsafe_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n);
// Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from all 0's to 1's (or all 1's to 0's).
// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)!
// If `already_xset` is not NULL, it is set to true if all the bits were already all set/cleared.
bool mi_bitmap_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n, bool* already_xset);
// Is a sequence of n bits already all set/cleared?
bool mi_bitmap_is_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n);
// Try to set/clear a bit in the bitmap; returns `true` if atomically transitioned from 0 to 1 (or 1 to 0)
// and false otherwise leaving the bitmask as is.
mi_decl_nodiscard bool mi_bitmap_try_xset(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx);
// Try to set/clear a byte in the bitmap; returns `true` if atomically transitioned from 0 to 0xFF (or 0xFF to 0)
// and false otherwise leaving the bitmask as is.
mi_decl_nodiscard bool mi_bitmap_try_xset8(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx);
// Try to set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's)
// and false otherwise leaving the bitmask as is.
// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)!
mi_decl_nodiscard bool mi_bitmap_try_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n);
// Find a set bit in a bitmap and atomically unset it. Returns true on success,
// and in that case sets the index: `0 <= *pidx < MI_BITMAP_MAX_BITS`.
// The low `MI_BFIELD_BITS` of start are used to set the start point of the search
// (to reduce thread contention).
mi_decl_nodiscard bool mi_bitmap_try_find_and_clear(mi_bitmap_t* bitmap, size_t* pidx, size_t start);
// Find a byte in the bitmap with all bits set (0xFF) and atomically unset it to zero.
// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-8`.
mi_decl_nodiscard bool mi_bitmap_try_find_and_clear8(mi_bitmap_t* bitmap, size_t start, size_t* pidx );
// Find a sequence of `n` bits in the bitmap with all bits set, and atomically unset all.
// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-n`.
mi_decl_nodiscard bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t start, size_t n, size_t* pidx );
#endif // MI_XBITMAP_H

View file

@ -24,7 +24,7 @@ static void mi_stat_free(const mi_page_t* page, const mi_block_t* block);
// ------------------------------------------------------
// forward declaration of multi-threaded free (`_mt`) (or free in huge block if compiled with MI_HUGE_PAGE_ABANDON)
static mi_decl_noinline void mi_free_block_mt(mi_page_t* page, mi_segment_t* segment, mi_block_t* block);
static mi_decl_noinline void mi_free_block_mt(mi_page_t* page, mi_block_t* block);
// regular free of a (thread local) block pointer
// fast path written carefully to prevent spilling on the stack
@ -57,7 +57,7 @@ static inline void mi_free_block_local(mi_page_t* page, mi_block_t* block, bool
mi_block_t* _mi_page_ptr_unalign(const mi_page_t* page, const void* p) {
mi_assert_internal(page!=NULL && p!=NULL);
size_t diff = (uint8_t*)p - page->page_start;
size_t diff = (uint8_t*)p - mi_page_start(page);
size_t adjust;
if mi_likely(page->block_size_shift != 0) {
adjust = diff & (((size_t)1 << page->block_size_shift) - 1);
@ -82,72 +82,55 @@ static inline void mi_block_check_unguard(mi_page_t* page, mi_block_t* block, vo
#endif
// free a local pointer (page parameter comes first for better codegen)
static void mi_decl_noinline mi_free_generic_local(mi_page_t* page, mi_segment_t* segment, void* p) mi_attr_noexcept {
MI_UNUSED(segment);
static void mi_decl_noinline mi_free_generic_local(mi_page_t* page, void* p) mi_attr_noexcept {
mi_block_t* const block = (mi_page_has_aligned(page) ? _mi_page_ptr_unalign(page, p) : (mi_block_t*)p);
mi_block_check_unguard(page, block, p);
mi_free_block_local(page, block, true /* track stats */, true /* check for a full page */);
}
// free a pointer owned by another thread (page parameter comes first for better codegen)
static void mi_decl_noinline mi_free_generic_mt(mi_page_t* page, mi_segment_t* segment, void* p) mi_attr_noexcept {
static void mi_decl_noinline mi_free_generic_mt(mi_page_t* page, void* p) mi_attr_noexcept {
mi_block_t* const block = _mi_page_ptr_unalign(page, p); // don't check `has_aligned` flag to avoid a race (issue #865)
mi_block_check_unguard(page, block, p);
mi_free_block_mt(page, segment, block);
mi_free_block_mt(page, block);
}
// generic free (for runtime integration)
void mi_decl_noinline _mi_free_generic(mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept {
if (is_local) mi_free_generic_local(page,segment,p);
else mi_free_generic_mt(page,segment,p);
void mi_decl_noinline _mi_free_generic(mi_page_t* page, bool is_local, void* p) mi_attr_noexcept {
if (is_local) mi_free_generic_local(page,p);
else mi_free_generic_mt(page,p);
}
// Get the segment data belonging to a pointer
// This is just a single `and` in release mode but does further checks in debug mode
// (and secure mode) to see if this was a valid pointer.
static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* msg)
static inline mi_page_t* mi_checked_ptr_page(const void* p, const char* msg)
{
MI_UNUSED(msg);
#if (MI_DEBUG>0)
MI_UNUSED_RELEASE(msg);
#if MI_DEBUG
if mi_unlikely(((uintptr_t)p & (MI_INTPTR_SIZE - 1)) != 0 && !mi_option_is_enabled(mi_option_guarded_precise)) {
_mi_error_message(EINVAL, "%s: invalid (unaligned) pointer: %p\n", msg, p);
return NULL;
}
#endif
mi_segment_t* const segment = _mi_ptr_segment(p);
if mi_unlikely(segment==NULL) return segment;
#if (MI_DEBUG>0)
if mi_unlikely(!mi_is_in_heap_region(p)) {
_mi_warning_message("%s: pointer might not point to a valid heap region: %p\n"
"(this may still be a valid very large allocation (over 64MiB))\n", msg, p);
if mi_likely(_mi_ptr_cookie(segment) == segment->cookie) {
_mi_warning_message("(yes, the previous pointer %p was valid after all)\n", p);
}
mi_page_t* const page = _mi_ptr_page(p);
#if MI_DEBUG
if (page == MI_PAGE_PTR_INVALID) {
_mi_error_message(EINVAL, "%s: invalid pointer: %p\n", msg, p);
}
#endif
#if (MI_DEBUG>0 || MI_SECURE>=4)
if mi_unlikely(_mi_ptr_cookie(segment) != segment->cookie) {
_mi_error_message(EINVAL, "%s: pointer does not point to a valid heap space: %p\n", msg, p);
return NULL;
}
#endif
return segment;
return page;
}
// Free a block
// Fast path written carefully to prevent register spilling on the stack
void mi_free(void* p) mi_attr_noexcept
{
mi_segment_t* const segment = mi_checked_ptr_segment(p,"mi_free");
if mi_unlikely(segment==NULL) return;
const bool is_local = (_mi_prim_thread_id() == mi_atomic_load_relaxed(&segment->thread_id));
mi_page_t* const page = _mi_segment_page_of(segment, p);
mi_page_t* const page = mi_checked_ptr_page(p,"mi_free");
if mi_unlikely(page==NULL) return;
const bool is_local = (_mi_prim_thread_id() == mi_page_thread_id(page));
if mi_likely(is_local) { // thread-local free?
if mi_likely(page->flags.full_aligned == 0) { // and it is not a full page (full pages need to move from the full bin), nor has aligned blocks (aligned blocks need to be unaligned)
// thread-local, aligned, and not a full page
@ -156,12 +139,12 @@ void mi_free(void* p) mi_attr_noexcept
}
else {
// page is full or contains (inner) aligned blocks; use generic path
mi_free_generic_local(page, segment, p);
mi_free_generic_local(page, p);
}
}
else {
// not thread-local; use generic path
mi_free_generic_mt(page, segment, p);
mi_free_generic_mt(page, p);
}
}
@ -169,10 +152,8 @@ void mi_free(void* p) mi_attr_noexcept
bool _mi_free_delayed_block(mi_block_t* block) {
// get segment and page
mi_assert_internal(block!=NULL);
const mi_segment_t* const segment = _mi_ptr_segment(block);
mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie);
mi_assert_internal(_mi_thread_id() == segment->thread_id);
mi_page_t* const page = _mi_segment_page_of(segment, block);
mi_page_t* const page = mi_checked_ptr_page(block,"_mi_free_delayed_block");
mi_assert_internal(_mi_thread_id() == mi_page_thread_id(page));
// Clear the no-delayed flag so delayed freeing is used again for this page.
// This must be done before collecting the free lists on this page -- otherwise
@ -242,20 +223,19 @@ static void mi_decl_noinline mi_free_block_delayed_mt( mi_page_t* page, mi_block
}
// Multi-threaded free (`_mt`) (or free in huge block if compiled with MI_HUGE_PAGE_ABANDON)
static void mi_decl_noinline mi_free_block_mt(mi_page_t* page, mi_segment_t* segment, mi_block_t* block)
static void mi_decl_noinline mi_free_block_mt(mi_page_t* page, mi_block_t* block)
{
// first see if the segment was abandoned and if we can reclaim it into our thread
if (_mi_option_get_fast(mi_option_abandoned_reclaim_on_free) != 0 &&
#if MI_HUGE_PAGE_ABANDON
segment->page_kind != MI_PAGE_HUGE &&
#endif
mi_atomic_load_relaxed(&segment->thread_id) == 0 && // segment is abandoned?
// first see if the page was abandoned and if we can reclaim it into our thread
if (mi_page_is_abandoned(page) &&
(_mi_option_get_fast(mi_option_abandoned_reclaim_on_free) != 0 ||
mi_page_is_singleton(page) // only one block, and we are free-ing it
) &&
mi_prim_get_default_heap() != (mi_heap_t*)&_mi_heap_empty) // and we did not already exit this thread (without this check, a fresh heap will be initalized (issue #944))
{
// the segment is abandoned, try to reclaim it into our heap
if (_mi_segment_attempt_reclaim(mi_heap_get_default(), segment)) {
mi_assert_internal(_mi_thread_id() == mi_atomic_load_relaxed(&segment->thread_id));
mi_assert_internal(mi_heap_get_default()->tld->segments.subproc == segment->subproc);
// the page is abandoned, try to reclaim it into our heap
if (_mi_heap_try_reclaim(mi_heap_get_default(), page)) { // TODO: avoid putting it in the full free queue
mi_assert_internal(_mi_thread_id() == mi_page_thread_id(page));
// mi_assert_internal(mi_heap_get_default()->tld->subproc == page->subproc);
mi_free(block); // recursively free as now it will be a local free in our heap
return;
}
@ -272,17 +252,12 @@ static void mi_decl_noinline mi_free_block_mt(mi_page_t* page, mi_segment_t* seg
// for small size, ensure we can fit the delayed thread pointers without triggering overflow detection
_mi_padding_shrink(page, block, sizeof(mi_block_t));
if (segment->page_kind == MI_PAGE_HUGE) {
#if MI_HUGE_PAGE_ABANDON
// huge page segments are always abandoned and can be freed immediately
_mi_segment_huge_page_free(segment, page, block);
return;
#else
if (mi_page_is_huge(page)) {
mi_assert_internal(mi_page_is_singleton(page));
// huge pages are special as they occupy the entire segment
// as these are large we reset the memory occupied by the page so it is available to other threads
// (as the owning thread needs to actually free the memory later).
_mi_segment_huge_page_reset(segment, page, block);
#endif
_mi_os_reset(mi_page_start(page), mi_page_block_size(page), NULL); // resets conservatively
}
else {
#if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN // note: when tracking, cannot use mi_usable_size with multi-threading
@ -316,9 +291,8 @@ static size_t mi_decl_noinline mi_page_usable_aligned_size_of(const mi_page_t* p
}
static inline size_t _mi_usable_size(const void* p, const char* msg) mi_attr_noexcept {
const mi_segment_t* const segment = mi_checked_ptr_segment(p, msg);
if mi_unlikely(segment==NULL) return 0;
const mi_page_t* const page = _mi_segment_page_of(segment, p);
const mi_page_t* const page = mi_checked_ptr_page(p,msg);
if mi_unlikely(page==NULL) return 0;
if mi_likely(!mi_page_has_aligned(page)) {
const mi_block_t* block = (const mi_block_t*)p;
return mi_page_usable_size_of(page, block);
@ -514,20 +488,20 @@ static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) {
// only maintain stats for smaller objects if requested
#if (MI_STAT>0)
static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) {
#if (MI_STAT < 2)
#if (MI_STAT < 2)
MI_UNUSED(block);
#endif
#endif
mi_heap_t* const heap = mi_heap_get_default();
const size_t bsize = mi_page_usable_block_size(page);
#if (MI_STAT>1)
#if (MI_STAT>1)
const size_t usize = mi_page_usable_size_of(page, block);
mi_heap_stat_decrease(heap, malloc, usize);
#endif
if (bsize <= MI_LARGE_OBJ_SIZE_MAX) {
#endif
if (bsize <= MI_LARGE_MAX_OBJ_SIZE) {
mi_heap_stat_decrease(heap, normal, bsize);
#if (MI_STAT > 1)
#if (MI_STAT > 1)
mi_heap_stat_decrease(heap, normal_bins[_mi_bin(bsize)], 1);
#endif
#endif
}
else {
const size_t bpsize = mi_page_block_size(page); // match stat in page.c:mi_huge_page_alloc

View file

@ -7,11 +7,8 @@ terms of the MIT license. A copy of the license can be found in the file
#include "mimalloc.h"
#include "mimalloc/internal.h"
#include "mimalloc/atomic.h"
#include "mimalloc/prim.h" // mi_prim_get_default_heap
#include <string.h> // memset, memcpy
#if defined(_MSC_VER) && (_MSC_VER < 1920)
#pragma warning(disable:4204) // non-constant aggregate initializer
#endif
@ -258,7 +255,7 @@ static void mi_heap_reset_pages(mi_heap_t* heap) {
mi_assert_internal(heap != NULL);
mi_assert_internal(mi_heap_is_initialized(heap));
// TODO: copy full empty heap instead?
memset(&heap->pages_free_direct, 0, sizeof(heap->pages_free_direct));
_mi_memset(&heap->pages_free_direct, 0, sizeof(heap->pages_free_direct));
_mi_memcpy_aligned(&heap->pages, &_mi_heap_empty.pages, sizeof(heap->pages));
heap->thread_delayed_free = NULL;
heap->page_count = 0;

View file

@ -59,6 +59,10 @@ size_t _mi_os_large_page_size(void) {
return (mi_os_mem_config.large_page_size != 0 ? mi_os_mem_config.large_page_size : _mi_os_page_size());
}
size_t _mi_os_virtual_address_bits(void) {
return mi_os_mem_config.virtual_address_bits;
}
bool _mi_os_use_large_page(size_t size, size_t alignment) {
// if we have access, check the size and alignment requirements
if (mi_os_mem_config.large_page_size == 0 || !mi_option_is_enabled(mi_option_allow_large_os_pages)) return false;
@ -103,58 +107,10 @@ static void* mi_align_down_ptr(void* p, size_t alignment) {
return (void*)_mi_align_down((uintptr_t)p, alignment);
}
/* -----------------------------------------------------------
aligned hinting
-------------------------------------------------------------- */
// On systems with enough virtual address bits, we can do efficient aligned allocation by using
// the 2TiB to 30TiB area to allocate those. If we have at least 46 bits of virtual address
// space (64TiB) we use this technique. (but see issue #939)
#if (MI_INTPTR_SIZE >= 8) && !defined(MI_NO_ALIGNED_HINT)
static mi_decl_cache_align _Atomic(uintptr_t)aligned_base;
// Return a MI_SEGMENT_SIZE aligned address that is probably available.
// If this returns NULL, the OS will determine the address but on some OS's that may not be
// properly aligned which can be more costly as it needs to be adjusted afterwards.
// For a size > 1GiB this always returns NULL in order to guarantee good ASLR randomization;
// (otherwise an initial large allocation of say 2TiB has a 50% chance to include (known) addresses
// in the middle of the 2TiB - 6TiB address range (see issue #372))
#define MI_HINT_BASE ((uintptr_t)2 << 40) // 2TiB start
#define MI_HINT_AREA ((uintptr_t)4 << 40) // upto 6TiB (since before win8 there is "only" 8TiB available to processes)
#define MI_HINT_MAX ((uintptr_t)30 << 40) // wrap after 30TiB (area after 32TiB is used for huge OS pages)
void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size)
{
if (try_alignment <= 1 || try_alignment > MI_SEGMENT_SIZE) return NULL;
if (mi_os_mem_config.virtual_address_bits < 46) return NULL; // < 64TiB virtual address space
size = _mi_align_up(size, MI_SEGMENT_SIZE);
if (size > 1*MI_GiB) return NULL; // guarantee the chance of fixed valid address is at most 1/(MI_HINT_AREA / 1<<30) = 1/4096.
#if (MI_SECURE>0)
size += MI_SEGMENT_SIZE; // put in `MI_SEGMENT_SIZE` virtual gaps between hinted blocks; this splits VLA's but increases guarded areas.
#endif
uintptr_t hint = mi_atomic_add_acq_rel(&aligned_base, size);
if (hint == 0 || hint > MI_HINT_MAX) { // wrap or initialize
uintptr_t init = MI_HINT_BASE;
#if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of aligned allocations unless in debug mode
uintptr_t r = _mi_heap_random_next(mi_prim_get_default_heap());
init = init + ((MI_SEGMENT_SIZE * ((r>>17) & 0xFFFFF)) % MI_HINT_AREA); // (randomly 20 bits)*4MiB == 0 to 4TiB
#endif
uintptr_t expected = hint + size;
mi_atomic_cas_strong_acq_rel(&aligned_base, &expected, init);
hint = mi_atomic_add_acq_rel(&aligned_base, size); // this may still give 0 or > MI_HINT_MAX but that is ok, it is a hint after all
}
if (hint%try_alignment != 0) return NULL;
return (void*)hint;
}
#else
void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size) {
MI_UNUSED(try_alignment); MI_UNUSED(size);
return NULL;
}
#endif
/* -----------------------------------------------------------
@ -380,12 +336,10 @@ void* _mi_os_zalloc(size_t size, mi_memid_t* memid, mi_stats_t* stats) {
----------------------------------------------------------- */
void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t offset, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* stats) {
mi_assert(offset <= MI_SEGMENT_SIZE);
mi_assert(offset <= size);
mi_assert((alignment % _mi_os_page_size()) == 0);
*memid = _mi_memid_none();
if (stats == NULL) stats = &_mi_stats_main;
if (offset > MI_SEGMENT_SIZE) return NULL;
if (offset == 0) {
// regular aligned allocation
return _mi_os_alloc_aligned(size, alignment, commit, allow_large, memid, stats);
@ -605,7 +559,6 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {
#endif
}
end = start + size;
mi_assert_internal(end % MI_SEGMENT_SIZE == 0);
} while (!mi_atomic_cas_strong_acq_rel(&mi_huge_start, &huge_start, end));
if (total_size != NULL) *total_size = size;

90
src/page-map.c Normal file
View file

@ -0,0 +1,90 @@
/*----------------------------------------------------------------------------
Copyright (c) 2023-2024, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
#include "mimalloc.h"
#include "mimalloc/internal.h"
#include "bitmap.h"
mi_decl_cache_align signed char* _mi_page_map = NULL;
static bool mi_page_map_all_committed = false;
static size_t mi_blocks_per_commit_bit = 1;
static mi_memid_t mi_page_map_memid;
static mi_bitmap_t mi_page_map_commit;
static bool mi_page_map_init(void) {
size_t vbits = _mi_os_virtual_address_bits();
if (vbits >= 48) vbits = 47;
// 1 byte per block = 2 GiB for 128 TiB address space (48 bit = 256 TiB address space)
// 64 KiB for 4 GiB address space (on 32-bit)
const size_t page_map_size = (MI_ZU(1) << (vbits >> MI_ARENA_BLOCK_SHIFT));
const size_t min_commit_size = _mi_divide_up(page_map_size,MI_BITMAP_MAX_BITS);
mi_blocks_per_commit_bit = mi_block_count_of_size(min_commit_size);
mi_page_map_all_committed = _mi_os_has_overcommit(); // commit on-access on Linux systems
_mi_page_map = (int8_t*)_mi_os_alloc_aligned(page_map_size, 0, mi_page_map_all_committed, true, &mi_page_map_memid, NULL);
if (_mi_page_map==NULL) {
_mi_error_message(ENOMEM, "unable to reserve virtual memory for the page map (%zu KiB)\n", page_map_size / MI_KiB);
return false;
}
if (mi_page_map_memid.initially_committed && !mi_page_map_memid.initially_zero) {
_mi_warning_message("the page map was committed on-demand but not zero initialized!\n");
_mi_memzero_aligned(_mi_page_map, page_map_size);
}
return true;
}
static size_t mi_page_map_get_idx(mi_page_t* page, uint8_t** page_start, size_t* block_count) {
size_t page_size;
*page_start = mi_page_area(page, &page_size);
if (page_size > MI_LARGE_PAGE_SIZE) { page_size = MI_LARGE_PAGE_SIZE; } // furthest interior pointer
*block_count = mi_block_count_of_size(page_size);
return ((uintptr_t)*page_start >> MI_ARENA_BLOCK_SHIFT);
}
void _mi_page_map_register(mi_page_t* page) {
if mi_unlikely(_mi_page_map == NULL) {
if (!mi_page_map_init()) return;
}
uint8_t* page_start;
size_t block_count;
const size_t idx = mi_page_map_get_idx(page, &page_start, &block_count);
// is the page map area that contains the page address committed?
if (!mi_page_map_all_committed) {
const size_t commit_bit_count = _mi_divide_up(block_count, mi_blocks_per_commit_bit);
const size_t commit_bit_idx = idx / mi_blocks_per_commit_bit;
for (size_t i = 0; i < commit_bit_count; i++) { // per bit to avoid crossing over bitmap chunks
if (mi_bitmap_is_xsetN(MI_BIT_CLEAR, &mi_page_map_commit, commit_bit_idx + i, 1)) {
// this may race, in which case we do multiple commits (which is ok)
_mi_os_commit(page_start + (i*mi_blocks_per_commit_bit*MI_ARENA_BLOCK_SIZE), mi_blocks_per_commit_bit* MI_ARENA_BLOCK_SIZE, NULL, NULL);
mi_bitmap_xsetN(MI_BIT_SET, &mi_page_map_commit, commit_bit_idx + i, 1, NULL);
}
}
}
// set the offsets
for (int i = 0; i < block_count; i++) {
mi_assert_internal(i < 128);
_mi_page_map[idx + i] = (int8_t)(-i-1);
}
}
void _mi_page_map_unregister(mi_page_t* page) {
mi_assert_internal(_mi_page_map != NULL);
// get index and count
uint8_t* page_start;
size_t block_count;
const size_t idx = mi_page_map_get_idx(page, &page_start, &block_count);
// unset the offsets
_mi_memzero(_mi_page_map + idx, block_count);
}

View file

@ -59,7 +59,7 @@ static inline uint8_t* mi_page_area(const mi_page_t* page) {
static bool mi_page_list_is_valid(mi_page_t* page, mi_block_t* p) {
size_t psize;
uint8_t* page_area = _mi_segment_page_start(_mi_page_segment(page), page, &psize);
uint8_t* page_area = mi_page_area(page, &psize);
mi_block_t* start = (mi_block_t*)page_area;
mi_block_t* end = (mi_block_t*)(page_area + psize);
while(p != NULL) {
@ -83,10 +83,7 @@ static bool mi_page_is_valid_init(mi_page_t* page) {
mi_assert_internal(page->capacity <= page->reserved);
// const size_t bsize = mi_page_block_size(page);
mi_segment_t* segment = _mi_page_segment(page);
uint8_t* start = mi_page_start(page);
mi_assert_internal(start == _mi_segment_page_start(segment,page,NULL));
mi_assert_internal(page->is_huge == (segment->page_kind == MI_PAGE_HUGE));
//mi_assert_internal(start + page->capacity*page->block_size == page->top);
mi_assert_internal(mi_page_list_is_valid(page,page->free));
@ -122,15 +119,11 @@ bool _mi_page_is_valid(mi_page_t* page) {
mi_assert_internal(page->keys[0] != 0);
#endif
if (mi_page_heap(page)!=NULL) {
mi_segment_t* segment = _mi_page_segment(page);
mi_assert_internal(!_mi_process_is_initialized || segment->thread_id == mi_page_heap(page)->thread_id || segment->thread_id==0);
#if MI_HUGE_PAGE_ABANDON
if (segment->page_kind != MI_PAGE_HUGE)
#endif
mi_assert_internal(!_mi_process_is_initialized || page->thread_id == mi_page_heap(page)->thread_id || page->thread_id==0);
{
mi_page_queue_t* pq = mi_page_queue_of(page);
mi_assert_internal(mi_page_queue_contains(pq, page));
mi_assert_internal(pq->block_size==mi_page_block_size(page) || mi_page_block_size(page) > MI_LARGE_OBJ_SIZE_MAX || mi_page_is_in_full(page));
mi_assert_internal(pq->block_size==mi_page_block_size(page) || mi_page_block_size(page) > MI_LARGE_MAX_OBJ_SIZE || mi_page_is_in_full(page));
mi_assert_internal(mi_heap_contains_queue(mi_page_heap(page),pq));
}
}
@ -274,16 +267,13 @@ static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size
#if !MI_HUGE_PAGE_ABANDON
mi_assert_internal(pq != NULL);
mi_assert_internal(mi_heap_contains_queue(heap, pq));
mi_assert_internal(page_alignment > 0 || block_size > MI_LARGE_OBJ_SIZE_MAX || block_size == pq->block_size);
mi_assert_internal(page_alignment > 0 || block_size > MI_LARGE_MAX_OBJ_SIZE || block_size == pq->block_size);
#endif
mi_page_t* page = _mi_segment_page_alloc(heap, block_size, page_alignment, &heap->tld->segments, &heap->tld->os);
mi_page_t* page = _mi_heap_page_alloc(heap, block_size, page_alignment);
if (page == NULL) {
// this may be out-of-memory, or an abandoned page was reclaimed (and in our queue)
return NULL;
}
#if MI_HUGE_PAGE_ABANDON
mi_assert_internal(pq==NULL || _mi_page_segment(page)->page_kind != MI_PAGE_HUGE);
#endif
mi_assert_internal(pq!=NULL || mi_page_block_size(page) >= block_size);
// a fresh page was found, initialize it
const size_t full_block_size = (pq == NULL || mi_page_is_huge(page) ? mi_page_block_size(page) : block_size); // see also: mi_segment_huge_page_alloc
@ -384,7 +374,6 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) {
mi_heap_t* pheap = mi_page_heap(page);
// remove from our page list
mi_segments_tld_t* segments_tld = &pheap->tld->segments;
mi_page_queue_remove(pq, page);
// page is no longer associated with our heap
@ -399,8 +388,8 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) {
#endif
// and abandon it
mi_assert_internal(mi_page_heap(page) == NULL);
_mi_segment_page_abandon(page,segments_tld);
mi_assert_internal(mi_page_is_abandoned(page));
_mi_arena_page_abandon(page,&pheap->tld);
}
// force abandon a page
@ -411,8 +400,7 @@ void _mi_page_force_abandon(mi_page_t* page) {
// ensure this page is no longer in the heap delayed free list
_mi_heap_delayed_free_all(heap);
// We can still access the page meta-info even if it is freed as we ensure
// in `mi_segment_force_abandon` that the segment is not freed (yet)
// TODO: can we still access the page meta-info even if it is freed?
if (page->capacity == 0) return; // it may have been freed now
// and now unlink it from the page queue and abandon (or free)
@ -433,17 +421,18 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) {
mi_assert_internal(mi_page_all_free(page));
mi_assert_internal(mi_page_thread_free_flag(page)!=MI_DELAYED_FREEING);
mi_heap_t* pheap = mi_page_heap(page);
// no more aligned blocks in here
mi_page_set_has_aligned(page, false);
// remove from the page list
// (no need to do _mi_heap_delayed_free first as all blocks are already free)
mi_segments_tld_t* segments_tld = &mi_page_heap(page)->tld->segments;
mi_page_queue_remove(pq, page);
// and free it
mi_page_set_heap(page,NULL);
_mi_segment_page_free(page, force, segments_tld);
_mi_arena_page_free(page, force, &pheap->tld);
}
#define MI_MAX_RETIRE_SIZE MI_LARGE_OBJ_SIZE_MAX // should be less than size for MI_BIN_HUGE
@ -474,7 +463,7 @@ void _mi_page_retire(mi_page_t* page) mi_attr_noexcept {
if mi_likely( /* bsize < MI_MAX_RETIRE_SIZE && */ !mi_page_queue_is_special(pq)) { // not full or huge queue?
if (pq->last==page && pq->first==page) { // the only page in the queue?
mi_stat_counter_increase(_mi_stats_main.page_no_retire,1);
page->retire_expire = (bsize <= MI_SMALL_OBJ_SIZE_MAX ? MI_RETIRE_CYCLES : MI_RETIRE_CYCLES/4);
page->retire_expire = (bsize <= MI_SMALL_MAX_OBJ_SIZE ? MI_RETIRE_CYCLES : MI_RETIRE_CYCLES/4);
mi_heap_t* heap = mi_page_heap(page);
mi_assert_internal(pq >= heap->pages);
const size_t index = pq - heap->pages;
@ -639,7 +628,7 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld)
size_t page_size;
//uint8_t* page_start =
_mi_segment_page_start(_mi_page_segment(page), page, &page_size);
mi_page_area(page, &page_size);
mi_stat_counter_increase(tld->stats.pages_extended, 1);
// calculate the extend count
@ -676,15 +665,13 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld)
// Initialize a fresh page
static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi_tld_t* tld) {
mi_assert(page != NULL);
mi_segment_t* segment = _mi_page_segment(page);
mi_assert(segment != NULL);
mi_assert_internal(block_size > 0);
// set fields
mi_page_set_heap(page, heap);
page->block_size = block_size;
size_t page_size;
page->page_start = _mi_segment_page_start(segment, page, &page_size);
mi_track_mem_noaccess(page->page_start,page_size);
uint8_t* page_start = mi_page_area(page, &page_size);
mi_track_mem_noaccess(page_start,page_size);
mi_assert_internal(page_size / block_size < (1L<<16));
page->reserved = (uint16_t)(page_size / block_size);
mi_assert_internal(page->reserved > 0);
@ -692,15 +679,15 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
page->keys[0] = _mi_heap_random_next(heap);
page->keys[1] = _mi_heap_random_next(heap);
#endif
page->free_is_zero = page->is_zero_init;
page->free_is_zero = page->memid.initially_zero;
#if MI_DEBUG>2
if (page->is_zero_init) {
if (page->memid.initially_zero) {
mi_track_mem_defined(page->page_start, page_size);
mi_assert_expensive(mi_mem_is_zero(page->page_start, page_size));
mi_assert_expensive(mi_mem_is_zero(page_start, page_size));
}
#endif
if (block_size > 0 && _mi_is_power_of_two(block_size)) {
page->block_size_shift = (uint8_t)(mi_ctz((uintptr_t)block_size));
page->block_size_shift = (uint8_t)mi_ctz(block_size);
}
else {
page->block_size_shift = 0;
@ -734,13 +721,6 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
// search for a best next page to use for at most N pages (often cut short if immediate blocks are available)
#define MI_MAX_CANDIDATE_SEARCH (8)
// is the page not yet used up to its reserved space?
static bool mi_page_is_expandable(const mi_page_t* page) {
mi_assert_internal(page != NULL);
mi_assert_internal(page->capacity <= page->reserved);
return (page->capacity < page->reserved);
}
// Find a page with free blocks of `page->block_size`.
static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* pq, bool first_try)
@ -907,7 +887,7 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size, size_t page_a
#if MI_HUGE_PAGE_ABANDON
mi_page_queue_t* pq = NULL;
#else
mi_page_queue_t* pq = mi_page_queue(heap, MI_LARGE_OBJ_SIZE_MAX+1); // always in the huge queue regardless of the block size
mi_page_queue_t* pq = mi_page_queue(heap, MI_LARGE_MAX_OBJ_SIZE+1); // always in the huge queue regardless of the block size
mi_assert_internal(mi_page_queue_is_huge(pq));
#endif
mi_page_t* page = mi_page_fresh_alloc(heap, pq, block_size, page_alignment);
@ -915,10 +895,9 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size, size_t page_a
mi_assert_internal(mi_page_block_size(page) >= size);
mi_assert_internal(mi_page_immediate_available(page));
mi_assert_internal(mi_page_is_huge(page));
mi_assert_internal(_mi_page_segment(page)->page_kind == MI_PAGE_HUGE);
mi_assert_internal(_mi_page_segment(page)->used==1);
mi_assert_internal(mi_page_is_singleton(page));
#if MI_HUGE_PAGE_ABANDON
mi_assert_internal(_mi_page_segment(page)->thread_id==0); // abandoned, not in the huge queue
mi_assert_internal(mi_page_is_abandoned(page));
mi_page_set_heap(page, NULL);
#endif
mi_heap_stat_increase(heap, huge, mi_page_block_size(page));
@ -933,7 +912,7 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size, size_t page_a
static mi_page_t* mi_find_page(mi_heap_t* heap, size_t size, size_t huge_alignment) mi_attr_noexcept {
// huge allocation?
const size_t req_size = size - MI_PADDING_SIZE; // correct for padding_size in case of an overflow on `size`
if mi_unlikely(req_size > (MI_LARGE_OBJ_SIZE_MAX - MI_PADDING_SIZE) || huge_alignment > 0) {
if mi_unlikely(req_size > (MI_LARGE_MAX_OBJ_SIZE - MI_PADDING_SIZE) || huge_alignment > 0) {
if mi_unlikely(req_size > MI_MAX_ALLOC_SIZE) {
_mi_error_message(EOVERFLOW, "allocation request is too large (%zu bytes)\n", req_size);
return NULL;

View file

@ -20,7 +20,7 @@ terms of the MIT license. A copy of the license can be found in the file
// containing the whole library. If it is linked first
// it will override all the standard library allocation
// functions (on Unix's).
#include "alloc.c" // includes alloc-override.c
#include "alloc.c" // includes alloc-override.c and free.c
#include "alloc-aligned.c"
#include "alloc-posix.c"
#include "arena.c"
@ -31,6 +31,7 @@ terms of the MIT license. A copy of the license can be found in the file
#include "options.c"
#include "os.c"
#include "page.c" // includes page-queue.c
#include "page-map.c"
#include "random.c"
#include "segment.c"
#include "segment-map.c"

View file

@ -1,599 +0,0 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2019-2024 Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
/* ----------------------------------------------------------------------------
Concurrent bitmap that can set/reset sequences of bits atomically
---------------------------------------------------------------------------- */
#include "mimalloc.h"
#include "mimalloc/internal.h"
#include "mimalloc/bits.h"
#include "xbitmap.h"
/* --------------------------------------------------------------------------------
bfields
-------------------------------------------------------------------------------- */
static inline size_t mi_bfield_ctz(mi_bfield_t x) {
return mi_ctz(x);
}
static inline size_t mi_bfield_clz(mi_bfield_t x) {
return mi_clz(x);
}
// find the least significant bit that is set (i.e. count trailing zero's)
// return false if `x==0` (with `*idx` undefined) and true otherwise,
// with the `idx` is set to the bit index (`0 <= *idx < MI_BFIELD_BITS`).
static inline bool mi_bfield_find_least_bit(mi_bfield_t x, size_t* idx) {
return mi_bsf(x,idx);
}
static inline mi_bfield_t mi_bfield_rotate_right(mi_bfield_t x, size_t r) {
return mi_rotr(x,r);
}
// Set/clear a bit atomically. Returns `true` if the bit transitioned from 0 to 1 (or 1 to 0).
static inline bool mi_bfield_atomic_xset(mi_bit_t set, _Atomic(mi_bfield_t)*b, size_t idx) {
mi_assert_internal(idx < MI_BFIELD_BITS);
const mi_bfield_t mask = ((mi_bfield_t)1)<<idx;
if (set) {
const mi_bfield_t old = mi_atomic(fetch_or_explicit)(b, mask, mi_memory_order(acq_rel));
return ((old&mask) == 0);
}
else {
mi_bfield_t old = mi_atomic(fetch_and_explicit)(b, ~mask, mi_memory_order(acq_rel));
return ((old&mask) == mask);
}
}
// Set/clear a mask set of bits atomically, and return true of the mask bits transitioned from all 0's to 1's (or all 1's to 0's)
// `already_xset` is true if all bits for the mask were already set/cleared.
static bool mi_bfield_atomic_xset_mask(mi_bit_t set, _Atomic(mi_bfield_t)*b, mi_bfield_t mask, bool* already_xset) {
mi_assert_internal(mask != 0);
if (set) {
mi_bfield_t old = *b;
while (!mi_atomic_cas_weak_acq_rel(b, &old, old|mask)); // try to atomically set the mask bits until success
*already_xset = ((old&mask) == mask);
return ((old&mask) == 0);
}
else { // clear
mi_bfield_t old = *b;
while (!mi_atomic_cas_weak_acq_rel(b, &old, old&~mask)); // try to atomically clear the mask bits until success
*already_xset = ((old&mask) == 0);
return ((old&mask) == mask);
}
}
// Tries to set/clear a bit atomically, and returns true if the bit atomically transitioned from 0 to 1 (or 1 to 0)
static bool mi_bfield_atomic_try_xset( mi_bit_t set, _Atomic(mi_bfield_t)*b, size_t idx) {
mi_assert_internal(idx < MI_BFIELD_BITS);
// for a single bit, we can always just set/clear and test afterwards if it was actually us that changed it first
return mi_bfield_atomic_xset(set, b, idx);
}
// Tries to (un)set a mask atomically, and returns true if the mask bits atomically transitioned from 0 to mask (or mask to 0)
// and false otherwise (leaving the bit field as is).
static bool mi_bfield_atomic_try_xset_mask(mi_bit_t set, _Atomic(mi_bfield_t)* b, mi_bfield_t mask ) {
mi_assert_internal(mask != 0);
if (set) {
mi_bfield_t old = *b;
do {
if ((old&mask) != 0) return false; // the mask bits are no longer 0
} while (!mi_atomic_cas_weak_acq_rel(b, &old, old|mask)); // try to atomically set the mask bits
return true;
}
else { // clear
mi_bfield_t old = *b;
do {
if ((old&mask) != mask) return false; // the mask bits are no longer set
} while (!mi_atomic_cas_weak_acq_rel(b, &old, old&~mask)); // try to atomically clear the mask bits
return true;
}
}
// Check if all bits corresponding to a mask are set/cleared.
static bool mi_bfield_atomic_is_xset_mask(mi_bit_t set, _Atomic(mi_bfield_t)*b, mi_bfield_t mask) {
mi_assert_internal(mask != 0);
if (set) {
return ((*b & mask) == mask);
}
else {
return ((*b & mask) == 0);
}
}
// Tries to set/clear a byte atomically, and returns true if the byte atomically transitioned from 0 to 0xFF (or 0xFF to 0)
// and false otherwise (leaving the bit field as is).
static bool mi_bfield_atomic_try_xset8(mi_bit_t set, _Atomic(mi_bfield_t)* b, size_t byte_idx ) {
mi_assert_internal(byte_idx < MI_BFIELD_SIZE);
const mi_bfield_t mask = ((mi_bfield_t)0xFF)<<(byte_idx*8);
return mi_bfield_atomic_try_xset_mask(set,b,mask);
}
/* --------------------------------------------------------------------------------
bitmap chunks
-------------------------------------------------------------------------------- */
static bool mi_bitmap_chunk_try_xset(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx ) {
mi_assert_internal(cidx < MI_BITMAP_CHUNK_BITS);
const size_t i = cidx / MI_BFIELD_BITS;
const size_t idx = cidx % MI_BFIELD_BITS;
return mi_bfield_atomic_try_xset( set, &chunk->bfields[i], idx);
}
static bool mi_bitmap_chunk_try_xset8(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t byte_idx ) {
mi_assert_internal(byte_idx*8 < MI_BITMAP_CHUNK_BITS);
const size_t i = byte_idx / MI_BFIELD_SIZE;
const size_t ibyte_idx = byte_idx % MI_BFIELD_SIZE;
return mi_bfield_atomic_try_xset8( set, &chunk->bfields[i], ibyte_idx);
}
// Set/clear a sequence of `n` bits within a chunk. Returns true if all bits transitioned from 0 to 1 (or 1 to 0)
static bool mi_bitmap_chunk_xsetN(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx, size_t n, bool* palready_xset) {
mi_assert_internal(cidx + n < MI_BITMAP_CHUNK_BITS);
mi_assert_internal(n>0);
bool all_transition = true;
bool all_already_xset = true;
size_t idx = cidx % MI_BFIELD_BITS;
size_t field = cidx / MI_BFIELD_BITS;
while (n > 0) {
size_t m = MI_BFIELD_BITS - idx; // m is the bits to xset in this field
if (m > n) { m = n; }
mi_assert_internal(idx + m <= MI_BFIELD_BITS);
mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS);
const size_t mask = (m == MI_BFIELD_BITS ? ~MI_ZU(0) : ((MI_ZU(1)<<m)-1) << idx);
bool already_xset;
all_transition = all_transition && mi_bfield_atomic_xset_mask(set, &chunk->bfields[field], mask, &already_xset);
all_already_xset = all_already_xset && already_xset;
// next field
field++;
idx = 0;
n -= m;
}
*palready_xset = all_already_xset;
return all_transition;
}
// Check if a sequence of `n` bits within a chunk are all set/cleared.
static bool mi_bitmap_chunk_is_xsetN(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx, size_t n) {
mi_assert_internal(cidx + n < MI_BITMAP_CHUNK_BITS);
mi_assert_internal(n>0);
bool all_xset = true;
size_t idx = cidx % MI_BFIELD_BITS;
size_t field = cidx / MI_BFIELD_BITS;
while (n > 0) {
size_t m = MI_BFIELD_BITS - idx; // m is the bits to xset in this field
if (m > n) { m = n; }
mi_assert_internal(idx + m <= MI_BFIELD_BITS);
mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS);
const size_t mask = (m == MI_BFIELD_BITS ? ~MI_ZU(0) : ((MI_ZU(1)<<m)-1) << idx);
all_xset = all_xset && mi_bfield_atomic_is_xset_mask(set, &chunk->bfields[field], mask);
// next field
field++;
idx = 0;
n -= m;
}
return all_xset;
}
// Try to atomically set/clear a sequence of `n` bits within a chunk. Returns true if all bits transitioned from 0 to 1 (or 1 to 0),
// and false otherwise leaving all bit fields as is.
static bool mi_bitmap_chunk_try_xsetN(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx, size_t n) {
mi_assert_internal(cidx + n < MI_BITMAP_CHUNK_BITS);
mi_assert_internal(n>0);
if (n==0) return true;
size_t start_idx = cidx % MI_BFIELD_BITS;
size_t start_field = cidx / MI_BFIELD_BITS;
size_t end_field = MI_BITMAP_CHUNK_FIELDS;
size_t mask_mid = 0;
size_t mask_end = 0;
// first field
size_t field = start_field;
size_t m = MI_BFIELD_BITS - start_idx; // m is the bits to xset in this field
if (m > n) { m = n; }
mi_assert_internal(start_idx + m <= MI_BFIELD_BITS);
mi_assert_internal(start_field < MI_BITMAP_CHUNK_FIELDS);
const size_t mask_start = (m == MI_BFIELD_BITS ? ~MI_ZU(0) : ((MI_ZU(1)<<m)-1) << start_idx);
if (!mi_bfield_atomic_try_xset_mask(set, &chunk->bfields[field], mask_start)) return false;
// done?
n -= m;
if (n==0) return true;
// continue with mid fields and last field: if these fail we need to recover by unsetting previous fields
// mid fields
while (n >= MI_BFIELD_BITS) {
field++;
mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS);
mask_mid = ~MI_ZU(0);
if (!mi_bfield_atomic_try_xset_mask(set, &chunk->bfields[field], mask_mid)) goto restore;
n -= MI_BFIELD_BITS;
}
// last field
if (n > 0) {
mi_assert_internal(n < MI_BFIELD_BITS);
field++;
mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS);
end_field = field;
mask_end = (MI_ZU(1)<<n)-1;
if (!mi_bfield_atomic_try_xset_mask(set, &chunk->bfields[field], mask_end)) goto restore;
}
return true;
restore:
// field is on the field that failed to set atomically; we need to restore all previous fields
mi_assert_internal(field > start_field);
while( field > start_field) {
field--;
const size_t mask = (field == start_field ? mask_start : (field == end_field ? mask_end : mask_mid));
bool already_xset;
mi_bfield_atomic_xset_mask(!set, &chunk->bfields[field], mask, &already_xset);
}
return false;
}
// find least 1-bit in a chunk and try unset it atomically
// set `*pidx` to thi bit index (0 <= *pidx < MI_BITMAP_CHUNK_BITS) on success.
// todo: try neon version
static inline bool mi_bitmap_chunk_find_and_try_clear(mi_bitmap_chunk_t* chunk, size_t* pidx) {
#if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256)
while(true) {
const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
if (_mm256_testz_si256(vec,vec)) return false; // vec == 0 ?
const __m256i vcmp = _mm256_cmpeq_epi64(vec, _mm256_setzero_si256()); // (elem64 == 0 ? -1 : 0)
const uint32_t mask = ~_mm256_movemask_epi8(vcmp); // mask of most significant bit of each byte (so each 8 bits in the mask will be all 1 or all 0)
mi_assert_internal(mask != 0);
const size_t chunk_idx = _tzcnt_u32(mask) / 8; // tzcnt == 0, 8, 16, or 24
mi_assert_internal(chunk_idx < MI_BITMAP_CHUNK_FIELDS);
size_t cidx;
if (mi_bfield_find_least_bit(chunk->bfields[chunk_idx],&cidx)) { // find the bit that is set
if mi_likely(mi_bfield_atomic_try_xset(MI_BIT_CLEAR,&chunk->bfields[chunk_idx], cidx)) { // unset atomically
*pidx = (chunk_idx*MI_BFIELD_BITS) + cidx;
mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS);
return true;
}
}
// try again
}
#else
size_t idx;
for(int i = 0; i < MI_BITMAP_CHUNK_FIELDS; i++) {
size_t idx;
if mi_unlikely(mi_bfield_find_least_bit(chunk->bfields[i],&idx)) { // find least 1-bit
if mi_likely(mi_bfield_atomic_try_xset(MI_BIT_CLEAR,&chunk->bfields[i],idx)) { // try unset atomically
*pidx = (i*MI_BFIELD_BITS + idx);
mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS);
return true;
}
}
}
return false;
#endif
}
// find least byte in a chunk with all bits set, and try unset it atomically
// set `*pidx` to its bit index (0 <= *pidx < MI_BITMAP_CHUNK_BITS) on success.
// todo: try neon version
static inline bool mi_bitmap_chunk_find_and_try_clear8(mi_bitmap_chunk_t* chunk, size_t* pidx) {
#if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256)
while(true) {
const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
const __m256i vcmp = _mm256_cmpeq_epi8(vec, _mm256_set1_epi64x(~0)); // (byte == ~0 ? -1 : 0)
const uint32_t mask = _mm256_movemask_epi8(vcmp); // mask of most significant bit of each byte
if (mask == 0) return false;
const size_t i = _tzcnt_u32(mask);
mi_assert_internal(8*i < MI_BITMAP_CHUNK_BITS);
const size_t chunk_idx = i / MI_BFIELD_SIZE;
const size_t byte_idx = i % MI_BFIELD_SIZE;
if mi_likely(mi_bfield_atomic_try_xset8(MI_BIT_CLEAR,&chunk->bfields[chunk_idx],byte_idx)) { // try to unset atomically
*pidx = (chunk_idx*MI_BFIELD_BITS) + (byte_idx*8);
mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS);
return true;
}
// try again
}
#else
size_t idx;
for(int i = 0; i < MI_BITMAP_CHUNK_FIELDS; i++) {
const mi_bfield_t x = chunk->bfields[i];
// has_set8 has low bit in each byte set if the byte in x == 0xFF
const mi_bfield_t has_set8 = ((~x - MI_BFIELD_LO_BIT8) & // high bit set if byte in x is 0xFF or < 0x7F
(x & MI_BFIELD_HI_BIT8)) // high bit set if byte in x is >= 0x80
>> 7; // shift high bit to low bit
size_t idx;
if mi_unlikely(mi_bfield_find_least_bit(has_set8,&idx)) { // find least 1-bit
mi_assert_internal(idx <= (MI_BFIELD_BITS - 8));
mi_assert_internal((idx%8)==0);
const size_t byte_idx = idx/8;
if mi_likely(mi_bfield_atomic_try_xset8(MI_BIT_CLEAR,&chunk->bfields[i],byte_idx)) { // unset the byte atomically
*pidx = (i*MI_BFIELD_BITS) + idx;
mi_assert_internal(*pidx + 8 <= MI_BITMAP_CHUNK_BITS);
return true;
}
// else continue
}
}
return false;
#endif
}
// find a sequence of `n` bits in a chunk with all `n` bits set, and try unset it atomically
// set `*pidx` to its bit index (0 <= *pidx <= MI_BITMAP_CHUNK_BITS - n) on success.
// todo: try avx2 and neon version
// todo: allow spanning across bfield boundaries?
static inline bool mi_bitmap_chunk_find_and_try_clearN(mi_bitmap_chunk_t* chunk, size_t n, size_t* pidx) {
if (n == 0 || n > MI_BFIELD_BITS) return false; // TODO: allow larger?
const mi_bfield_t mask = (n==MI_BFIELD_BITS ? ~((mi_bfield_t)0) : (((mi_bfield_t)1) << n)-1);
for(int i = 0; i < MI_BITMAP_CHUNK_FIELDS; i++) {
mi_bfield_t b = chunk->bfields[i];
size_t bshift = 0;
size_t idx;
while (mi_bfield_find_least_bit(b, &idx)) { // find least 1-bit
b >>= idx;
bshift += idx;
if (bshift + n >= MI_BFIELD_BITS) break;
if ((b&mask) == mask) { // found a match
mi_assert_internal( ((mask << bshift) >> bshift) == mask );
if mi_likely(mi_bfield_atomic_try_xset_mask(MI_BIT_CLEAR,&chunk->bfields[i],mask<<bshift)) {
*pidx = (i*MI_BFIELD_BITS) + bshift;
mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS);
mi_assert_internal(*pidx + n <= MI_BITMAP_CHUNK_BITS);
return true;
}
else {
// if failed to atomically commit, try again from this position
b = (chunk->bfields[i] >> bshift);
}
}
else {
// advance
const size_t ones = mi_bfield_ctz(~b); // skip all ones (since it didn't fit the mask)
mi_assert_internal(ones>0);
bshift += ones;
b >>= ones;
}
}
}
return false;
}
// are all bits in a bitmap chunk set?
static bool mi_bitmap_chunk_all_are_set(mi_bitmap_chunk_t* chunk) {
#if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256)
const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
return _mm256_test_all_ones(vec);
#else
// written like this for vectorization
mi_bfield_t x = chunk->bfields[0];
for(int i = 1; i < MI_BITMAP_CHUNK_FIELDS; i++) {
x = x & chunk->bfields[i];
}
return (~x == 0);
#endif
}
// are all bits in a bitmap chunk clear?
static bool mi_bitmap_chunk_all_are_clear(mi_bitmap_chunk_t* chunk) {
#if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256)
const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
return _mm256_testz_si256( vec, vec );
#else
// written like this for vectorization
mi_bfield_t x = chunk->bfields[0];
for(int i = 1; i < MI_BITMAP_CHUNK_FIELDS; i++) {
x = x | chunk->bfields[i];
}
return (x == 0);
#endif
}
/* --------------------------------------------------------------------------------
bitmap
-------------------------------------------------------------------------------- */
// initialize a bitmap to all unset; avoid a mem_zero if `already_zero` is true
void mi_bitmap_init(mi_bitmap_t* bitmap, bool already_zero) {
if (!already_zero) {
_mi_memzero_aligned(bitmap, sizeof(*bitmap));
}
}
// Set/clear a sequence of `n` bits in the bitmap (and can cross chunks). Not atomic so only use if local to a thread.
void mi_bitmap_unsafe_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) {
mi_assert_internal(n>0);
mi_assert_internal(idx + n<=MI_BITMAP_MAX_BITS);
// first chunk
size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
size_t m = MI_BITMAP_CHUNK_BITS - cidx;
if (m > n) { m = n; }
bool already_xset;
mi_bitmap_chunk_xsetN(set, &bitmap->chunks[chunk_idx], cidx, m, &already_xset);
// n can be large so use memset for efficiency for all in-between chunks
chunk_idx++;
n -= m;
const size_t mid_chunks = n / MI_BITMAP_CHUNK_BITS;
if (mid_chunks > 0) {
_mi_memset(&bitmap->chunks[chunk_idx], (set ? ~0 : 0), MI_BITMAP_CHUNK_BITS/8);
chunk_idx += mid_chunks;
n -= mid_chunks * MI_BITMAP_CHUNK_BITS;
}
// last chunk
if (n > 0) {
mi_assert_internal(n < MI_BITMAP_CHUNK_BITS);
mi_assert_internal(chunk_idx < MI_BITMAP_CHUNK_FIELDS);
mi_bitmap_chunk_xsetN(set, &bitmap->chunks[chunk_idx], 0, n, &already_xset);
}
}
// Try to set/clear a bit in the bitmap; returns `true` if atomically transitioned from 0 to 1 (or 1 to 0),
// and false otherwise leaving the bitmask as is.
bool mi_bitmap_try_xset(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx) {
mi_assert_internal(idx < MI_BITMAP_MAX_BITS);
const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
return mi_bitmap_chunk_try_xset( set, &bitmap->chunks[chunk_idx], cidx);
}
// Try to set/clear a byte in the bitmap; returns `true` if atomically transitioned from 0 to 0xFF (or 0xFF to 0)
// and false otherwise leaving the bitmask as is.
bool mi_bitmap_try_xset8(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx) {
mi_assert_internal(idx < MI_BITMAP_MAX_BITS);
mi_assert_internal(idx%8 == 0);
const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
const size_t byte_idx = (idx % MI_BITMAP_CHUNK_BITS)/8;
return mi_bitmap_chunk_try_xset8( set, &bitmap->chunks[chunk_idx],byte_idx);
}
// Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's)
// and false otherwise leaving the bitmask as is.
// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)!
bool mi_bitmap_try_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) {
mi_assert_internal(n>0);
mi_assert_internal(n<=MI_BITMAP_CHUNK_BITS);
if (n==1) { return mi_bitmap_try_xset(set,bitmap,idx); }
if (n==8) { return mi_bitmap_try_xset8(set,bitmap,idx); }
mi_assert_internal(idx + n <= MI_BITMAP_MAX_BITS);
const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
mi_assert_internal(cidx + n <= MI_BITMAP_CHUNK_BITS); // don't cross chunks (for now)
if (cidx + n > MI_BITMAP_CHUNK_BITS) { n = MI_BITMAP_CHUNK_BITS - cidx; } // paranoia
return mi_bitmap_chunk_try_xsetN( set, &bitmap->chunks[chunk_idx], cidx, n);
}
// Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's).
// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)!
bool mi_bitmap_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n, bool* already_xset) {
mi_assert_internal(n>0);
mi_assert_internal(n<=MI_BITMAP_CHUNK_BITS);
bool local_already_xset;
if (already_xset==NULL) { already_xset = &local_already_xset; }
// if (n==1) { return mi_bitmap_xset(set, bitmap, idx); }
// if (n==8) { return mi_bitmap_xset8(set, bitmap, idx); }
mi_assert_internal(idx + n <= MI_BITMAP_MAX_BITS);
const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
mi_assert_internal(cidx + n <= MI_BITMAP_CHUNK_BITS); // don't cross chunks (for now)
if (cidx + n > MI_BITMAP_CHUNK_BITS) { n = MI_BITMAP_CHUNK_BITS - cidx; } // paranoia
return mi_bitmap_chunk_xsetN(set, &bitmap->chunks[chunk_idx], cidx, n, already_xset);
}
// Is a sequence of n bits already all set/cleared?
bool mi_bitmap_is_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) {
mi_assert_internal(n>0);
mi_assert_internal(n<=MI_BITMAP_CHUNK_BITS);
mi_assert_internal(idx + n <= MI_BITMAP_MAX_BITS);
const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
mi_assert_internal(cidx + n <= MI_BITMAP_CHUNK_BITS); // don't cross chunks (for now)
if (cidx + n > MI_BITMAP_CHUNK_BITS) { n = MI_BITMAP_CHUNK_BITS - cidx; } // paranoia
return mi_bitmap_chunk_is_xsetN(set, &bitmap->chunks[chunk_idx], cidx, n);
}
#define mi_bitmap_forall_set_chunks(bitmap,start,decl_chunk_idx) \
{ size_t _set_idx; \
size_t _start = start % MI_BFIELD_BITS; \
mi_bfield_t _any_set = mi_bfield_rotate_right(bitmap->any_set, _start); \
while (mi_bfield_find_least_bit(_any_set,&_set_idx)) { \
decl_chunk_idx = (_set_idx + _start) % MI_BFIELD_BITS;
#define mi_bitmap_forall_set_chunks_end() \
_start += _set_idx+1; /* so chunk_idx stays valid */ \
_any_set >>= _set_idx; /* skip scanned bits (and avoid UB with (idx+1)) */ \
_any_set >>= 1; \
} \
}
// Find a set bit in a bitmap and atomically unset it. Returns true on success,
// and in that case sets the index: `0 <= *pidx < MI_BITMAP_MAX_BITS`.
// The low `MI_BFIELD_BITS` of start are used to set the start point of the search
// (to reduce thread contention).
bool mi_bitmap_try_find_and_clear(mi_bitmap_t* bitmap, size_t* pidx, size_t start) {
mi_bitmap_forall_set_chunks(bitmap,start,size_t chunk_idx)
{
size_t cidx;
if mi_likely(mi_bitmap_chunk_find_and_try_clear(&bitmap->chunks[chunk_idx],&cidx)) {
*pidx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx;
mi_assert_internal(*pidx < MI_BITMAP_MAX_BITS);
return true;
}
else {
// we may find that all are unset only on a second iteration but that is ok as
// _any_set is a conservative approximation.
if (mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) {
mi_bfield_atomic_xset(MI_BIT_CLEAR,&bitmap->any_set,chunk_idx);
}
}
}
mi_bitmap_forall_set_chunks_end();
return false;
}
// Find a byte in the bitmap with all bits set (0xFF) and atomically unset it to zero.
// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-8`.
bool mi_bitmap_try_find_and_clear8(mi_bitmap_t* bitmap, size_t start, size_t* pidx ) {
mi_bitmap_forall_set_chunks(bitmap,start,size_t chunk_idx)
{
size_t cidx;
if mi_likely(mi_bitmap_chunk_find_and_try_clear8(&bitmap->chunks[chunk_idx],&cidx)) {
*pidx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx;
mi_assert_internal(*pidx <= MI_BITMAP_MAX_BITS-8);
mi_assert_internal((*pidx % 8) == 0);
return true;
}
else {
if (mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) {
mi_bfield_atomic_xset(MI_BIT_CLEAR,&bitmap->any_set,chunk_idx);
}
}
}
mi_bitmap_forall_set_chunks_end();
return false;
}
// Find a sequence of `n` bits in the bitmap with all bits set, and atomically unset all.
// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-n`.
bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t start, size_t n, size_t* pidx ) {
// TODO: allow at least MI_BITMAP_CHUNK_BITS and probably larger
// TODO: allow spanning across chunk boundaries
if (n == 0 || n > MI_BFIELD_BITS) return false;
mi_bitmap_forall_set_chunks(bitmap,start,size_t chunk_idx)
{
size_t cidx;
if mi_likely(mi_bitmap_chunk_find_and_try_clearN(&bitmap->chunks[chunk_idx],n,&cidx)) {
*pidx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx;
mi_assert_internal(*pidx <= MI_BITMAP_MAX_BITS-n);
return true;
}
else {
if (mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) {
mi_bfield_atomic_xset(MI_BIT_CLEAR,&bitmap->any_set,chunk_idx);
}
}
}
mi_bitmap_forall_set_chunks_end();
return false;
}

View file

@ -1,94 +0,0 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2019-2023 Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
/* ----------------------------------------------------------------------------
Concurrent bitmap that can set/reset sequences of bits atomically
---------------------------------------------------------------------------- */
#pragma once
#ifndef MI_XBITMAP_H
#define MI_XBITMAP_H
/* --------------------------------------------------------------------------------
Definitions
-------------------------------------------------------------------------------- */
typedef size_t mi_bfield_t;
#define MI_BFIELD_BITS_SHIFT (MI_SIZE_SHIFT+3)
#define MI_BFIELD_BITS (1 << MI_BFIELD_BITS_SHIFT)
#define MI_BFIELD_SIZE (MI_BFIELD_BITS/8)
#define MI_BFIELD_BITS_MOD_MASK (MI_BFIELD_BITS - 1)
#define MI_BFIELD_LO_BIT8 ((~(mi_bfield_t(0)))/0xFF) // 0x01010101 ..
#define MI_BFIELD_HI_BIT8 (MI_BFIELD_LO_BIT8 << 7) // 0x80808080 ..
#define MI_BITMAP_CHUNK_BITS_SHIFT (8) // 2^8 = 256 bits per chunk
#define MI_BITMAP_CHUNK_BITS (1 << MI_BITMAP_CHUNK_BITS_SHIFT)
#define MI_BITMAP_CHUNK_FIELDS (MI_BITMAP_CHUNK_BITS / MI_BFIELD_BITS)
#define MI_BITMAP_CHUNK_BITS_MOD_MASK (MI_BITMAP_CHUNK_BITS - 1)
typedef mi_decl_align(32) struct mi_bitmap_chunk_s {
_Atomic(mi_bfield_t) bfields[MI_BITMAP_CHUNK_FIELDS];
} mi_bitmap_chunk_t;
typedef mi_decl_align(32) struct mi_bitmap_s {
mi_bitmap_chunk_t chunks[MI_BFIELD_BITS];
_Atomic(mi_bfield_t)any_set;
} mi_bitmap_t;
#define MI_BITMAP_MAX_BITS (MI_BFIELD_BITS * MI_BITMAP_CHUNK_BITS) // 16k bits on 64bit, 8k bits on 32bit
/* --------------------------------------------------------------------------------
Bitmap
-------------------------------------------------------------------------------- */
typedef bool mi_bit_t;
#define MI_BIT_SET (true)
#define MI_BIT_CLEAR (false)
// initialize a bitmap to all unset; avoid a mem_zero if `already_zero` is true
void mi_bitmap_init(mi_bitmap_t* bitmap, bool already_zero);
// Set/clear a sequence of `n` bits in the bitmap (and can cross chunks). Not atomic so only use if local to a thread.
void mi_bitmap_unsafe_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n);
// Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from all 0's to 1's (or all 1's to 0's).
// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)!
// If `already_xset` is not NULL, it is set to true if all the bits were already all set/cleared.
bool mi_bitmap_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n, bool* already_xset);
// Is a sequence of n bits already all set/cleared?
bool mi_bitmap_is_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n);
// Try to set/clear a bit in the bitmap; returns `true` if atomically transitioned from 0 to 1 (or 1 to 0)
// and false otherwise leaving the bitmask as is.
mi_decl_nodiscard bool mi_bitmap_try_xset(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx);
// Try to set/clear a byte in the bitmap; returns `true` if atomically transitioned from 0 to 0xFF (or 0xFF to 0)
// and false otherwise leaving the bitmask as is.
mi_decl_nodiscard bool mi_bitmap_try_xset8(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx);
// Try to set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's)
// and false otherwise leaving the bitmask as is.
// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)!
mi_decl_nodiscard bool mi_bitmap_try_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n);
// Find a set bit in a bitmap and atomically unset it. Returns true on success,
// and in that case sets the index: `0 <= *pidx < MI_BITMAP_MAX_BITS`.
// The low `MI_BFIELD_BITS` of start are used to set the start point of the search
// (to reduce thread contention).
mi_decl_nodiscard bool mi_bitmap_try_find_and_clear(mi_bitmap_t* bitmap, size_t* pidx, size_t start);
// Find a byte in the bitmap with all bits set (0xFF) and atomically unset it to zero.
// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-8`.
mi_decl_nodiscard bool mi_bitmap_try_find_and_clear8(mi_bitmap_t* bitmap, size_t start, size_t* pidx );
// Find a sequence of `n` bits in the bitmap with all bits set, and atomically unset all.
// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-n`.
mi_decl_nodiscard bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t start, size_t n, size_t* pidx );
#endif // MI_XBITMAP_H