mirror of
https://github.com/microsoft/mimalloc.git
synced 2025-05-07 15:59:32 +03:00
enable purging of free committed slices from arenas
This commit is contained in:
parent
42af184ce9
commit
4c81c3cf90
7 changed files with 222 additions and 56 deletions
|
@ -321,7 +321,7 @@ typedef struct mi_page_s {
|
||||||
|
|
||||||
// The max object size are checked to not waste more than 12.5% internally over the page sizes.
|
// The max object size are checked to not waste more than 12.5% internally over the page sizes.
|
||||||
// (Except for large pages since huge objects are allocated in 4MiB chunks)
|
// (Except for large pages since huge objects are allocated in 4MiB chunks)
|
||||||
#define MI_SMALL_MAX_OBJ_SIZE ((MI_SMALL_PAGE_SIZE-MI_PAGE_INFO_SIZE)/8) // < 8 KiB
|
#define MI_SMALL_MAX_OBJ_SIZE ((MI_SMALL_PAGE_SIZE-MI_PAGE_INFO_SIZE)/4) // < 8 KiB
|
||||||
#define MI_MEDIUM_MAX_OBJ_SIZE ((MI_MEDIUM_PAGE_SIZE-MI_PAGE_INFO_SIZE)/4) // < 128 KiB
|
#define MI_MEDIUM_MAX_OBJ_SIZE ((MI_MEDIUM_PAGE_SIZE-MI_PAGE_INFO_SIZE)/4) // < 128 KiB
|
||||||
#define MI_LARGE_MAX_OBJ_SIZE ((MI_LARGE_PAGE_SIZE-MI_PAGE_INFO_SIZE)/2) // < 2 MiB
|
#define MI_LARGE_MAX_OBJ_SIZE ((MI_LARGE_PAGE_SIZE-MI_PAGE_INFO_SIZE)/2) // < 2 MiB
|
||||||
#define MI_LARGE_MAX_OBJ_WSIZE (MI_LARGE_MAX_OBJ_SIZE/MI_SIZE_SIZE)
|
#define MI_LARGE_MAX_OBJ_WSIZE (MI_LARGE_MAX_OBJ_SIZE/MI_SIZE_SIZE)
|
||||||
|
|
162
src/arena.c
162
src/arena.c
|
@ -42,13 +42,13 @@ typedef struct mi_arena_s {
|
||||||
int numa_node; // associated NUMA node
|
int numa_node; // associated NUMA node
|
||||||
bool is_exclusive; // only allow allocations if specifically for this arena
|
bool is_exclusive; // only allow allocations if specifically for this arena
|
||||||
bool is_large; // memory area consists of large- or huge OS pages (always committed)
|
bool is_large; // memory area consists of large- or huge OS pages (always committed)
|
||||||
_Atomic(mi_msecs_t) purge_expire; // expiration time when pages can be purged from `pages_purge`.
|
_Atomic(mi_msecs_t) purge_expire; // expiration time when slices can be purged from `slices_purge`.
|
||||||
|
|
||||||
mi_bitmap_t* slices_free; // is the slice free?
|
mi_bitmap_t* slices_free; // is the slice free?
|
||||||
mi_bitmap_t* slices_committed; // is the slice committed? (i.e. accessible)
|
mi_bitmap_t* slices_committed; // is the slice committed? (i.e. accessible)
|
||||||
mi_bitmap_t* slices_dirty; // is the slice potentially non-zero?
|
mi_bitmap_t* slices_dirty; // is the slice potentially non-zero?
|
||||||
|
mi_bitmap_t* slices_purge; // slices that can be purged
|
||||||
mi_bitmap_t* pages; // all registered pages (abandoned and owned)
|
mi_bitmap_t* pages; // all registered pages (abandoned and owned)
|
||||||
mi_bitmap_t* pages_purge; // pages that are scheduled to be purged
|
|
||||||
mi_bitmap_t* pages_abandoned[MI_BIN_COUNT]; // abandoned pages per size bin (a set bit means the start of the page)
|
mi_bitmap_t* pages_abandoned[MI_BIN_COUNT]; // abandoned pages per size bin (a set bit means the start of the page)
|
||||||
// the full queue contains abandoned full pages
|
// the full queue contains abandoned full pages
|
||||||
// followed by the bitmaps (whose sizes depend on the arena size)
|
// followed by the bitmaps (whose sizes depend on the arena size)
|
||||||
|
@ -57,8 +57,8 @@ typedef struct mi_arena_s {
|
||||||
// Every "page" in `pages_purge` points to purge info
|
// Every "page" in `pages_purge` points to purge info
|
||||||
// (since we use it for any free'd range and not just for pages)
|
// (since we use it for any free'd range and not just for pages)
|
||||||
typedef struct mi_purge_info_s {
|
typedef struct mi_purge_info_s {
|
||||||
mi_msecs_t expire;
|
_Atomic(mi_msecs_t) expire;
|
||||||
size_t slice_count;
|
_Atomic(size_t) slice_count;
|
||||||
} mi_purge_info_t;
|
} mi_purge_info_t;
|
||||||
|
|
||||||
|
|
||||||
|
@ -1123,8 +1123,8 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int
|
||||||
arena->slices_free = mi_arena_bitmap_init(slice_count,&base);
|
arena->slices_free = mi_arena_bitmap_init(slice_count,&base);
|
||||||
arena->slices_committed = mi_arena_bitmap_init(slice_count,&base);
|
arena->slices_committed = mi_arena_bitmap_init(slice_count,&base);
|
||||||
arena->slices_dirty = mi_arena_bitmap_init(slice_count,&base);
|
arena->slices_dirty = mi_arena_bitmap_init(slice_count,&base);
|
||||||
|
arena->slices_purge = mi_arena_bitmap_init(slice_count, &base);
|
||||||
arena->pages = mi_arena_bitmap_init(slice_count, &base);
|
arena->pages = mi_arena_bitmap_init(slice_count, &base);
|
||||||
arena->pages_purge = mi_arena_bitmap_init(slice_count, &base);
|
|
||||||
for( size_t i = 0; i < MI_ARENA_BIN_COUNT; i++) {
|
for( size_t i = 0; i < MI_ARENA_BIN_COUNT; i++) {
|
||||||
arena->pages_abandoned[i] = mi_arena_bitmap_init(slice_count,&base);
|
arena->pages_abandoned[i] = mi_arena_bitmap_init(slice_count,&base);
|
||||||
}
|
}
|
||||||
|
@ -1224,16 +1224,12 @@ static size_t mi_debug_show_page_bfield(mi_bfield_t field, char* buf, mi_arena_t
|
||||||
else if (_mi_meta_is_meta_page(start)) { c = 'm'; }
|
else if (_mi_meta_is_meta_page(start)) { c = 'm'; }
|
||||||
else if (slice_index + bit < arena->info_slices) { c = 'i'; }
|
else if (slice_index + bit < arena->info_slices) { c = 'i'; }
|
||||||
// else if (mi_bitmap_is_setN(arena->pages_purge, slice_index + bit, NULL)) { c = '*'; }
|
// else if (mi_bitmap_is_setN(arena->pages_purge, slice_index + bit, NULL)) { c = '*'; }
|
||||||
else if (mi_bitmap_is_setN(arena->slices_free, slice_index+bit, 1)) {
|
else if (mi_bitmap_is_set(arena->slices_free, slice_index+bit)) {
|
||||||
if (mi_bitmap_is_setN(arena->pages_purge, slice_index + bit, 1)) {
|
if (mi_bitmap_is_set(arena->slices_purge, slice_index + bit)) { c = '!'; }
|
||||||
mi_assert_internal(bit_of_page <= 0);
|
else if (mi_bitmap_is_setN(arena->slices_committed, slice_index + bit, 1)) { c = '_'; }
|
||||||
mi_purge_info_t* pinfo = (mi_purge_info_t*)start;
|
|
||||||
c = '!';
|
|
||||||
bit_of_page = (long)pinfo->slice_count;
|
|
||||||
}
|
|
||||||
if (mi_bitmap_is_setN(arena->slices_committed, slice_index + bit, 1)) { c = '_'; }
|
|
||||||
else { c = '.'; }
|
else { c = '.'; }
|
||||||
}
|
}
|
||||||
|
if (bit==MI_BFIELD_BITS-1 && bit_of_page > 1) { c = '>'; }
|
||||||
buf[bit] = c;
|
buf[bit] = c;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1390,53 +1386,121 @@ static long mi_arena_purge_delay(void) {
|
||||||
return (mi_option_get(mi_option_purge_delay) * mi_option_get(mi_option_arena_purge_mult));
|
return (mi_option_get(mi_option_purge_delay) * mi_option_get(mi_option_arena_purge_mult));
|
||||||
}
|
}
|
||||||
|
|
||||||
// reset or decommit in an arena and update the committed/decommit bitmaps
|
// reset or decommit in an arena and update the commit bitmap
|
||||||
// assumes we own the area (i.e. slices_free is claimed by us)
|
// assumes we own the area (i.e. slices_free is claimed by us)
|
||||||
static void mi_arena_purge(mi_arena_t* arena, size_t slice_index, size_t slices) {
|
static void mi_arena_purge(mi_arena_t* arena, size_t slice_index, size_t slice_count) {
|
||||||
mi_assert_internal(!arena->memid.is_pinned);
|
mi_assert_internal(!arena->memid.is_pinned);
|
||||||
const size_t size = mi_size_of_slices(slices);
|
mi_assert_internal(mi_bitmap_is_clearN(arena->slices_free, slice_index, slice_count));
|
||||||
|
|
||||||
|
const size_t size = mi_size_of_slices(slice_count);
|
||||||
void* const p = mi_arena_slice_start(arena, slice_index);
|
void* const p = mi_arena_slice_start(arena, slice_index);
|
||||||
bool needs_recommit;
|
bool needs_recommit = false; // reset needs no recommit, decommit does need it
|
||||||
if (mi_bitmap_is_setN(arena->slices_committed, slice_index, slices)) {
|
if (mi_bitmap_is_setN(arena->slices_committed, slice_index, slice_count)) {
|
||||||
// all slices are committed, we can purge freely
|
// all slices are committed, we can purge the entire range
|
||||||
needs_recommit = _mi_os_purge(p, size);
|
needs_recommit = _mi_os_purge(p, size);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
// some slices are not committed -- this can happen when a partially committed slice is freed
|
mi_assert_internal(false); // ?
|
||||||
// in `_mi_arena_free` and it is conservatively marked as uncommitted but still scheduled for a purge
|
|
||||||
// we need to ensure we do not try to reset (as that may be invalid for uncommitted memory),
|
|
||||||
// and also undo the decommit stats (as it was already adjusted)
|
|
||||||
mi_assert_internal(mi_option_is_enabled(mi_option_purge_decommits));
|
|
||||||
needs_recommit = _mi_os_purge_ex(p, size, false /* allow reset? */);
|
|
||||||
if (needs_recommit) { _mi_stat_increase(&_mi_stats_main.committed, size); }
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// clear the purged slices
|
|
||||||
mi_bitmap_clearN(arena->slices_purge, slices, slice_index);
|
|
||||||
|
|
||||||
// update committed bitmap
|
// update committed bitmap
|
||||||
if (needs_recommit) {
|
if (needs_recommit) {
|
||||||
mi_bitmap_clearN(arena->slices_committed, slices, slice_index);
|
mi_bitmap_clearN(arena->slices_committed, slice_index, slice_count);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Schedule a purge. This is usually delayed to avoid repeated decommit/commit calls.
|
// Schedule a purge. This is usually delayed to avoid repeated decommit/commit calls.
|
||||||
// Note: assumes we (still) own the area as we may purge immediately
|
// Note: assumes we (still) own the area as we may purge immediately
|
||||||
static void mi_arena_schedule_purge(mi_arena_t* arena, size_t slice_index, size_t slices) {
|
static void mi_arena_schedule_purge(mi_arena_t* arena, size_t slice_index, size_t slice_count) {
|
||||||
const long delay = mi_arena_purge_delay();
|
const long delay = mi_arena_purge_delay();
|
||||||
if (delay < 0 || _mi_preloading()) return; // is purging allowed at all?
|
if (delay < 0 || _mi_preloading()) return; // is purging allowed at all?
|
||||||
|
|
||||||
|
mi_assert_internal(mi_bitmap_is_clearN(arena->slices_free, slice_index, slice_count));
|
||||||
if (delay == 0) {
|
if (delay == 0) {
|
||||||
// decommit directly
|
// purge directly
|
||||||
mi_arena_purge(arena, slice_index, slices);
|
mi_arena_purge(arena, slice_index, slice_count);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
// schedule decommit
|
// schedule purge
|
||||||
_mi_error_message(EFAULT, "purging not yet implemented\n");
|
mi_msecs_t expire = mi_atomic_loadi64_relaxed(&arena->purge_expire);
|
||||||
|
if (expire == 0) {
|
||||||
|
mi_atomic_storei64_release(&arena->purge_expire, _mi_clock_now() + delay);
|
||||||
|
}
|
||||||
|
//else {
|
||||||
|
// mi_atomic_addi64_acq_rel(&arena->purge_expire, (mi_msecs_t)(delay/10)); // add smallish extra delay
|
||||||
|
//}
|
||||||
|
mi_bitmap_setN(arena->slices_purge, slice_index, slice_count, NULL);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
typedef struct mi_purge_visit_info_s {
|
||||||
|
mi_msecs_t now;
|
||||||
|
mi_msecs_t delay;
|
||||||
|
bool all_purged;
|
||||||
|
bool any_purged;
|
||||||
|
} mi_purge_visit_info_t;
|
||||||
|
|
||||||
|
static bool mi_arena_try_purge_range(mi_arena_t* arena, size_t slice_index, size_t slice_count) {
|
||||||
|
if (mi_bitmap_try_clearN(arena->slices_free, slice_index, slice_count)) {
|
||||||
|
// purge
|
||||||
|
mi_arena_purge(arena, slice_index, slice_count);
|
||||||
|
// and reset the free range
|
||||||
|
mi_bitmap_setN(arena->slices_free, slice_index, slice_count, NULL);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool mi_arena_try_purge_visitor(size_t slice_index, size_t slice_count, mi_arena_t* arena, void* arg) {
|
||||||
|
mi_purge_visit_info_t* vinfo = (mi_purge_visit_info_t*)arg;
|
||||||
|
// try to purge: first claim the free blocks
|
||||||
|
if (mi_arena_try_purge_range(arena, slice_index, slice_count)) {
|
||||||
|
vinfo->any_purged = true;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// failed to claim the full range, try per slice instead
|
||||||
|
for (size_t i = 0; i < slice_count; i++) {
|
||||||
|
vinfo->any_purged = vinfo->any_purged || mi_arena_try_purge_range(arena, slice_index + i, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// done: clear the purge bits
|
||||||
|
mi_bitmap_clearN(arena->slices_purge, slice_index, slice_count);
|
||||||
|
return true; // continue
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// returns true if anything was purged
|
||||||
|
static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force)
|
||||||
|
{
|
||||||
|
// check pre-conditions
|
||||||
|
if (arena->memid.is_pinned) return false;
|
||||||
|
mi_msecs_t expire = mi_atomic_loadi64_relaxed(&arena->purge_expire);
|
||||||
|
if (expire == 0) return false;
|
||||||
|
|
||||||
|
// expired yet?
|
||||||
|
if (!force && expire > now) return false;
|
||||||
|
|
||||||
|
// reset expire (if not already set concurrently)
|
||||||
|
mi_atomic_casi64_strong_acq_rel(&arena->purge_expire, &expire, (mi_msecs_t)0);
|
||||||
|
|
||||||
|
// go through all purge info's
|
||||||
|
// todo: instead of visiting per-bit, we should visit per range of bits
|
||||||
|
mi_purge_visit_info_t vinfo = { now, mi_arena_purge_delay(), true /*all?*/, false /*any?*/};
|
||||||
|
_mi_bitmap_forall_set(arena->slices_purge, &mi_arena_try_purge_visitor, arena, &vinfo);
|
||||||
|
|
||||||
|
// if not fully purged, make sure to purge again in the future
|
||||||
|
if (!vinfo.all_purged) {
|
||||||
|
const long delay = mi_arena_purge_delay();
|
||||||
|
mi_msecs_t expected = 0;
|
||||||
|
mi_atomic_casi64_strong_acq_rel(&arena->purge_expire, &expected, _mi_clock_now() + delay);
|
||||||
|
}
|
||||||
|
return vinfo.any_purged;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static void mi_arenas_try_purge(bool force, bool visit_all) {
|
static void mi_arenas_try_purge(bool force, bool visit_all) {
|
||||||
if (_mi_preloading() || mi_arena_purge_delay() <= 0) return; // nothing will be scheduled
|
if (_mi_preloading() || mi_arena_purge_delay() <= 0) return; // nothing will be scheduled
|
||||||
|
@ -1444,12 +1508,27 @@ static void mi_arenas_try_purge(bool force, bool visit_all) {
|
||||||
const size_t max_arena = mi_arena_get_count();
|
const size_t max_arena = mi_arena_get_count();
|
||||||
if (max_arena == 0) return;
|
if (max_arena == 0) return;
|
||||||
|
|
||||||
// _mi_error_message(EFAULT, "purging not yet implemented\n");
|
// allow only one thread to purge at a time
|
||||||
MI_UNUSED(visit_all);
|
static mi_atomic_guard_t purge_guard;
|
||||||
MI_UNUSED(force);
|
mi_atomic_guard(&purge_guard)
|
||||||
|
{
|
||||||
|
const mi_msecs_t now = _mi_clock_now();
|
||||||
|
const size_t arena_start = _mi_tld()->tseq % max_arena;
|
||||||
|
size_t max_purge_count = (visit_all ? max_arena : 1);
|
||||||
|
for (size_t _i = 0; _i < max_arena; _i++) {
|
||||||
|
size_t i = _i + arena_start;
|
||||||
|
if (i >= max_arena) { i -= max_arena; }
|
||||||
|
mi_arena_t* arena = mi_arena_from_index(i);
|
||||||
|
if (arena != NULL) {
|
||||||
|
if (mi_arena_try_purge(arena, now, force)) {
|
||||||
|
if (max_purge_count <= 1) break;
|
||||||
|
max_purge_count--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool mi_abandoned_visit_blocks(mi_subproc_id_t subproc_id, int heap_tag, bool visit_blocks, mi_block_visit_fun* visitor, void* arg) {
|
bool mi_abandoned_visit_blocks(mi_subproc_id_t subproc_id, int heap_tag, bool visit_blocks, mi_block_visit_fun* visitor, void* arg) {
|
||||||
MI_UNUSED(subproc_id); MI_UNUSED(heap_tag); MI_UNUSED(visit_blocks); MI_UNUSED(visitor); MI_UNUSED(arg);
|
MI_UNUSED(subproc_id); MI_UNUSED(heap_tag); MI_UNUSED(visit_blocks); MI_UNUSED(visitor); MI_UNUSED(arg);
|
||||||
_mi_error_message(EINVAL, "implement mi_abandoned_visit_blocks\n");
|
_mi_error_message(EINVAL, "implement mi_abandoned_visit_blocks\n");
|
||||||
|
@ -1460,8 +1539,9 @@ bool mi_abandoned_visit_blocks(mi_subproc_id_t subproc_id, int heap_tag, bool vi
|
||||||
/* -----------------------------------------------------------
|
/* -----------------------------------------------------------
|
||||||
Unloading and reloading an arena.
|
Unloading and reloading an arena.
|
||||||
----------------------------------------------------------- */
|
----------------------------------------------------------- */
|
||||||
static bool mi_arena_page_register(size_t slice_index, mi_arena_t* arena, void* arg) {
|
static bool mi_arena_page_register(size_t slice_index, size_t slice_count, mi_arena_t* arena, void* arg) {
|
||||||
MI_UNUSED(arg);
|
MI_UNUSED(arg); MI_UNUSED(slice_count);
|
||||||
|
mi_assert_internal(slice_count == 1);
|
||||||
mi_page_t* page = (mi_page_t*)mi_arena_slice_start(arena, slice_index);
|
mi_page_t* page = (mi_page_t*)mi_arena_slice_start(arena, slice_index);
|
||||||
mi_assert_internal(mi_bitmap_is_setN(page->memid.mem.arena.arena->pages, page->memid.mem.arena.slice_index, 1));
|
mi_assert_internal(mi_bitmap_is_setN(page->memid.mem.arena.arena->pages, page->memid.mem.arena.slice_index, 1));
|
||||||
_mi_page_map_register(page);
|
_mi_page_map_register(page);
|
||||||
|
|
59
src/bitmap.c
59
src/bitmap.c
|
@ -1051,6 +1051,23 @@ bool mi_bitmap_xsetN(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx, size_t n, s
|
||||||
return mi_bitmap_xsetN_(set, bitmap, idx, n, already_xset);
|
return mi_bitmap_xsetN_(set, bitmap, idx, n, already_xset);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ------- mi_bitmap_try_clearN ---------------------------------------
|
||||||
|
|
||||||
|
bool mi_bitmap_try_clearN(mi_bitmap_t* bitmap, size_t idx, size_t n) {
|
||||||
|
mi_assert_internal(n>0);
|
||||||
|
mi_assert_internal(n<=MI_BCHUNK_BITS);
|
||||||
|
mi_assert_internal(idx + n <= mi_bitmap_max_bits(bitmap));
|
||||||
|
|
||||||
|
const size_t chunk_idx = idx / MI_BCHUNK_BITS;
|
||||||
|
const size_t cidx = idx % MI_BCHUNK_BITS;
|
||||||
|
mi_assert_internal(cidx + n <= MI_BCHUNK_BITS); // don't cross chunks (for now)
|
||||||
|
mi_assert_internal(chunk_idx < mi_bitmap_chunk_count(bitmap));
|
||||||
|
if (cidx + n > MI_BCHUNK_BITS) return false;
|
||||||
|
bool maybe_all_clear;
|
||||||
|
const bool cleared = mi_bchunk_try_clearN(&bitmap->chunks[chunk_idx], cidx, n, &maybe_all_clear);
|
||||||
|
if (cleared && maybe_all_clear) { mi_bitmap_chunkmap_try_clear(bitmap, chunk_idx); }
|
||||||
|
return cleared;
|
||||||
|
}
|
||||||
|
|
||||||
// ------- mi_bitmap_is_xset ---------------------------------------
|
// ------- mi_bitmap_is_xset ---------------------------------------
|
||||||
|
|
||||||
|
@ -1071,6 +1088,7 @@ bool mi_bitmap_is_xsetN(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx, size_t n
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* --------------------------------------------------------------------------------
|
/* --------------------------------------------------------------------------------
|
||||||
Iterate through a bfield
|
Iterate through a bfield
|
||||||
-------------------------------------------------------------------------------- */
|
-------------------------------------------------------------------------------- */
|
||||||
|
@ -1144,7 +1162,7 @@ static inline bool mi_bitmap_find(mi_bitmap_t* bitmap, size_t tseq, size_t n, si
|
||||||
// and for each chunkmap entry we iterate over its bits to find the chunks
|
// and for each chunkmap entry we iterate over its bits to find the chunks
|
||||||
mi_bfield_t cmap_entry = mi_atomic_load_relaxed(&bitmap->chunkmap.bfields[cmap_idx]);
|
mi_bfield_t cmap_entry = mi_atomic_load_relaxed(&bitmap->chunkmap.bfields[cmap_idx]);
|
||||||
size_t cmap_entry_cycle = (cmap_idx != cmap_acc ? MI_BFIELD_BITS : cmap_acc_bits);
|
size_t cmap_entry_cycle = (cmap_idx != cmap_acc ? MI_BFIELD_BITS : cmap_acc_bits);
|
||||||
mi_bfield_cycle_iterate(cmap_entry, tseq, cmap_entry_cycle, eidx, Y)
|
mi_bfield_cycle_iterate(cmap_entry, tseq%8, cmap_entry_cycle, eidx, Y) // reduce the tseq to 8 bins to reduce using extra memory (see `mstress`)
|
||||||
{
|
{
|
||||||
mi_assert_internal(eidx <= MI_BFIELD_BITS);
|
mi_assert_internal(eidx <= MI_BFIELD_BITS);
|
||||||
const size_t chunk_idx = cmap_idx*MI_BFIELD_BITS + eidx;
|
const size_t chunk_idx = cmap_idx*MI_BFIELD_BITS + eidx;
|
||||||
|
@ -1314,10 +1332,47 @@ bool _mi_bitmap_forall_set(mi_bitmap_t* bitmap, mi_forall_set_fun_t* visit, mi_a
|
||||||
size_t bidx;
|
size_t bidx;
|
||||||
while (mi_bfield_foreach_bit(&b, &bidx)) {
|
while (mi_bfield_foreach_bit(&b, &bidx)) {
|
||||||
const size_t idx = base_idx + bidx;
|
const size_t idx = base_idx + bidx;
|
||||||
if (!visit(idx, arena, arg)) return false;
|
if (!visit(idx, 1, arena, arg)) return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Visit all set bits in a bitmap but try to return ranges (within bfields) if possible.
|
||||||
|
// used by purging to purge larger ranges if possible
|
||||||
|
// todo: optimize further? maybe use avx512 to directly get all indices using a mask_compressstore?
|
||||||
|
bool _mi_bitmap_forall_set_ranges(mi_bitmap_t* bitmap, mi_forall_set_fun_t* visit, mi_arena_t* arena, void* arg) {
|
||||||
|
// for all chunkmap entries
|
||||||
|
const size_t chunkmap_max = _mi_divide_up(mi_bitmap_chunk_count(bitmap), MI_BFIELD_BITS);
|
||||||
|
for (size_t i = 0; i < chunkmap_max; i++) {
|
||||||
|
mi_bfield_t cmap_entry = mi_atomic_load_relaxed(&bitmap->chunkmap.bfields[i]);
|
||||||
|
size_t cmap_idx;
|
||||||
|
// for each chunk (corresponding to a set bit in a chunkmap entry)
|
||||||
|
while (mi_bfield_foreach_bit(&cmap_entry, &cmap_idx)) {
|
||||||
|
const size_t chunk_idx = i*MI_BFIELD_BITS + cmap_idx;
|
||||||
|
// for each chunk field
|
||||||
|
mi_bchunk_t* const chunk = &bitmap->chunks[chunk_idx];
|
||||||
|
for (size_t j = 0; j < MI_BCHUNK_FIELDS; j++) {
|
||||||
|
const size_t base_idx = (chunk_idx*MI_BCHUNK_BITS) + (j*MI_BFIELD_BITS);
|
||||||
|
mi_bfield_t b = mi_atomic_load_relaxed(&chunk->bfields[j]);
|
||||||
|
size_t bshift = 0;
|
||||||
|
size_t bidx;
|
||||||
|
while (mi_bfield_find_least_bit(b, &bidx)) {
|
||||||
|
b >>= bidx;
|
||||||
|
bshift += bidx;
|
||||||
|
const size_t rng = mi_ctz(~b); // all the set bits from bidx
|
||||||
|
mi_assert_internal(rng>=1);
|
||||||
|
const size_t idx = base_idx + bshift + bidx;
|
||||||
|
if (!visit(idx, rng, arena, arg)) return false;
|
||||||
|
// skip rng
|
||||||
|
b >>= rng;
|
||||||
|
bshift += rng;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
23
src/bitmap.h
23
src/bitmap.h
|
@ -171,6 +171,22 @@ static inline bool mi_bitmap_is_clearN(mi_bitmap_t* bitmap, size_t idx, size_t n
|
||||||
return mi_bitmap_is_xsetN(MI_BIT_CLEAR, bitmap, idx, n);
|
return mi_bitmap_is_xsetN(MI_BIT_CLEAR, bitmap, idx, n);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool mi_bitmap_is_set(mi_bitmap_t* bitmap, size_t idx) {
|
||||||
|
return mi_bitmap_is_setN(bitmap, idx, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool mi_bitmap_is_clear(mi_bitmap_t* bitmap, size_t idx) {
|
||||||
|
return mi_bitmap_is_clearN(bitmap, idx, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool mi_bitmap_try_clearN(mi_bitmap_t* bitmap, size_t idx, size_t n);
|
||||||
|
|
||||||
|
static inline bool mi_bitmap_try_clear(mi_bitmap_t* bitmap, size_t idx) {
|
||||||
|
return mi_bitmap_try_clearN(bitmap, idx, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// Specialized versions for common bit sequence sizes
|
// Specialized versions for common bit sequence sizes
|
||||||
mi_decl_nodiscard bool mi_bitmap_try_find_and_clear(mi_bitmap_t* bitmap, size_t tseq, size_t* pidx); // 1-bit
|
mi_decl_nodiscard bool mi_bitmap_try_find_and_clear(mi_bitmap_t* bitmap, size_t tseq, size_t* pidx); // 1-bit
|
||||||
|
@ -212,9 +228,12 @@ void mi_bitmap_clear_once_set(mi_bitmap_t* bitmap, size_t idx);
|
||||||
bool mi_bitmap_bsr(mi_bitmap_t* bitmap, size_t* idx);
|
bool mi_bitmap_bsr(mi_bitmap_t* bitmap, size_t* idx);
|
||||||
|
|
||||||
|
|
||||||
typedef bool (mi_forall_set_fun_t)(size_t slice_index, mi_arena_t* arena, void* arg2);
|
typedef bool (mi_forall_set_fun_t)(size_t slice_index, size_t slice_count, mi_arena_t* arena, void* arg2);
|
||||||
|
|
||||||
// Visit all set bits in a bitmap
|
// Visit all set bits in a bitmap (`slice_count == 1`)
|
||||||
bool _mi_bitmap_forall_set(mi_bitmap_t* bitmap, mi_forall_set_fun_t* visit, mi_arena_t* arena, void* arg);
|
bool _mi_bitmap_forall_set(mi_bitmap_t* bitmap, mi_forall_set_fun_t* visit, mi_arena_t* arena, void* arg);
|
||||||
|
|
||||||
|
// Visit all set bits in a bitmap with larger ranges if possible (`slice_count >= 1`)
|
||||||
|
bool _mi_bitmap_forall_set_ranges(mi_bitmap_t* bitmap, mi_forall_set_fun_t* visit, mi_arena_t* arena, void* arg);
|
||||||
|
|
||||||
#endif // MI_BITMAP_H
|
#endif // MI_BITMAP_H
|
||||||
|
|
|
@ -79,8 +79,12 @@ typedef struct mi_option_desc_s {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef MI_DEFAULT_ALLOW_LARGE_OS_PAGES
|
#ifndef MI_DEFAULT_ALLOW_LARGE_OS_PAGES
|
||||||
|
#if defined(__linux__) && !defined(__ANDROID__)
|
||||||
|
#define MI_DEFAULT_ALLOW_LARGE_OS_PAGES 1
|
||||||
|
#else
|
||||||
#define MI_DEFAULT_ALLOW_LARGE_OS_PAGES 0
|
#define MI_DEFAULT_ALLOW_LARGE_OS_PAGES 0
|
||||||
#endif
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifndef MI_DEFAULT_RESERVE_HUGE_OS_PAGES
|
#ifndef MI_DEFAULT_RESERVE_HUGE_OS_PAGES
|
||||||
#define MI_DEFAULT_RESERVE_HUGE_OS_PAGES 0
|
#define MI_DEFAULT_RESERVE_HUGE_OS_PAGES 0
|
||||||
|
@ -132,7 +136,7 @@ static mi_option_desc_t options[_mi_option_last] =
|
||||||
#else
|
#else
|
||||||
{ 1, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed (but per page in the segment on demand)
|
{ 1, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed (but per page in the segment on demand)
|
||||||
#endif
|
#endif
|
||||||
{ -1, UNINIT, MI_OPTION_LEGACY(purge_delay,reset_delay) }, // purge delay in milli-seconds
|
{ 1000,UNINIT, MI_OPTION_LEGACY(purge_delay,reset_delay) }, // purge delay in milli-seconds
|
||||||
{ 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes.
|
{ 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes.
|
||||||
{ 0, UNINIT, MI_OPTION_LEGACY(disallow_os_alloc,limit_os_alloc) }, // 1 = do not use OS memory for allocation (but only reserved arenas)
|
{ 0, UNINIT, MI_OPTION_LEGACY(disallow_os_alloc,limit_os_alloc) }, // 1 = do not use OS memory for allocation (but only reserved arenas)
|
||||||
{ 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose
|
{ 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose
|
||||||
|
@ -141,7 +145,7 @@ static mi_option_desc_t options[_mi_option_last] =
|
||||||
{ 10, UNINIT, MI_OPTION(max_segment_reclaim)}, // max. percentage of the abandoned segments to be reclaimed per try.
|
{ 10, UNINIT, MI_OPTION(max_segment_reclaim)}, // max. percentage of the abandoned segments to be reclaimed per try.
|
||||||
{ 0, UNINIT, MI_OPTION(destroy_on_exit)}, // release all OS memory on process exit; careful with dangling pointer or after-exit frees!
|
{ 0, UNINIT, MI_OPTION(destroy_on_exit)}, // release all OS memory on process exit; careful with dangling pointer or after-exit frees!
|
||||||
{ MI_DEFAULT_ARENA_RESERVE, UNINIT, MI_OPTION(arena_reserve) }, // reserve memory N KiB at a time (=1GiB) (use `option_get_size`)
|
{ MI_DEFAULT_ARENA_RESERVE, UNINIT, MI_OPTION(arena_reserve) }, // reserve memory N KiB at a time (=1GiB) (use `option_get_size`)
|
||||||
{ 10, UNINIT, MI_OPTION(arena_purge_mult) }, // purge delay multiplier for arena's
|
{ 1, UNINIT, MI_OPTION(arena_purge_mult) }, // purge delay multiplier for arena's
|
||||||
{ 1, UNINIT, MI_OPTION_LEGACY(purge_extend_delay, decommit_extend_delay) },
|
{ 1, UNINIT, MI_OPTION_LEGACY(purge_extend_delay, decommit_extend_delay) },
|
||||||
{ MI_DEFAULT_DISALLOW_ARENA_ALLOC, UNINIT, MI_OPTION(disallow_arena_alloc) }, // 1 = do not use arena's for allocation (except if using specific arena id's)
|
{ MI_DEFAULT_DISALLOW_ARENA_ALLOC, UNINIT, MI_OPTION(disallow_arena_alloc) }, // 1 = do not use arena's for allocation (except if using specific arena id's)
|
||||||
{ 400, UNINIT, MI_OPTION(retry_on_oom) }, // windows only: retry on out-of-memory for N milli seconds (=400), set to 0 to disable retries.
|
{ 400, UNINIT, MI_OPTION(retry_on_oom) }, // windows only: retry on out-of-memory for N milli seconds (=400), set to 0 to disable retries.
|
||||||
|
@ -192,7 +196,7 @@ void _mi_options_init(void) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
_mi_verbose_message("guarded build: %s\n", mi_option_get(mi_option_guarded_sample_rate) != 0 ? "enabled" : "disabled");
|
_mi_verbose_message("guarded build: %s\n", mi_option_get(mi_option_guarded_sample_rate) != 0 ? "enabled" : "disabled");
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
long _mi_option_get_fast(mi_option_t option) {
|
long _mi_option_get_fast(mi_option_t option) {
|
||||||
|
|
|
@ -61,6 +61,7 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||||
#include <sys/syscall.h>
|
#include <sys/syscall.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#define MI_UNIX_LARGE_PAGE_SIZE (2*MI_MiB) // TODO: can we query the OS for this?
|
||||||
|
|
||||||
//------------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------------
|
||||||
// Use syscalls for some primitives to allow for libraries that override open/read/close etc.
|
// Use syscalls for some primitives to allow for libraries that override open/read/close etc.
|
||||||
|
@ -146,7 +147,7 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config )
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
config->large_page_size = 2*MI_MiB; // TODO: can we query the OS for this?
|
config->large_page_size = MI_UNIX_LARGE_PAGE_SIZE;
|
||||||
config->has_overcommit = unix_detect_overcommit();
|
config->has_overcommit = unix_detect_overcommit();
|
||||||
config->has_partial_free = true; // mmap can free in parts
|
config->has_partial_free = true; // mmap can free in parts
|
||||||
config->has_virtual_reserve = true; // todo: check if this true for NetBSD? (for anonymous mmap with PROT_NONE)
|
config->has_virtual_reserve = true; // todo: check if this true for NetBSD? (for anonymous mmap with PROT_NONE)
|
||||||
|
@ -361,6 +362,9 @@ int _mi_prim_alloc(void* hint_addr, size_t size, size_t try_alignment, bool comm
|
||||||
mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0);
|
mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0);
|
||||||
mi_assert_internal(commit || !allow_large);
|
mi_assert_internal(commit || !allow_large);
|
||||||
mi_assert_internal(try_alignment > 0);
|
mi_assert_internal(try_alignment > 0);
|
||||||
|
if (hint_addr == NULL && size >= 8*MI_UNIX_LARGE_PAGE_SIZE && try_alignment > 1 && _mi_is_power_of_two(try_alignment) && try_alignment < MI_UNIX_LARGE_PAGE_SIZE) {
|
||||||
|
try_alignment = MI_UNIX_LARGE_PAGE_SIZE; // try to align along large page size for larger allocations
|
||||||
|
}
|
||||||
|
|
||||||
*is_zero = true;
|
*is_zero = true;
|
||||||
int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE);
|
int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE);
|
||||||
|
|
|
@ -40,10 +40,10 @@ static int ITER = 20;
|
||||||
static int THREADS = 8;
|
static int THREADS = 8;
|
||||||
static int SCALE = 10;
|
static int SCALE = 10;
|
||||||
static int ITER = 10;
|
static int ITER = 10;
|
||||||
#elif 1
|
#elif 0
|
||||||
static int THREADS = 4;
|
static int THREADS = 4;
|
||||||
static int SCALE = 10;
|
static int SCALE = 10;
|
||||||
static int ITER = 10;
|
static int ITER = 20;
|
||||||
#define ALLOW_LARGE false
|
#define ALLOW_LARGE false
|
||||||
#elif 0
|
#elif 0
|
||||||
static int THREADS = 32;
|
static int THREADS = 32;
|
||||||
|
@ -260,8 +260,12 @@ static void test_stress(void) {
|
||||||
//mi_debug_show_arenas();
|
//mi_debug_show_arenas();
|
||||||
#endif
|
#endif
|
||||||
#if !defined(NDEBUG) || defined(MI_TSAN)
|
#if !defined(NDEBUG) || defined(MI_TSAN)
|
||||||
if ((n + 1) % 10 == 0)
|
if ((n + 1) % 10 == 0) {
|
||||||
{ printf("- iterations left: %3d\n", ITER - (n + 1)); }
|
printf("- iterations left: %3d\n", ITER - (n + 1));
|
||||||
|
//mi_debug_show_arenas(true, false, false);
|
||||||
|
//mi_collect(true);
|
||||||
|
//mi_debug_show_arenas(true, false, false);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
// clean up
|
// clean up
|
||||||
|
@ -344,8 +348,8 @@ int main(int argc, char** argv) {
|
||||||
#ifndef USE_STD_MALLOC
|
#ifndef USE_STD_MALLOC
|
||||||
#ifndef NDEBUG
|
#ifndef NDEBUG
|
||||||
//mi_debug_show_arenas(true, true, false);
|
//mi_debug_show_arenas(true, true, false);
|
||||||
// mi_debug_show_arenas(true, false, false);
|
mi_debug_show_arenas(true, false, false);
|
||||||
// mi_collect(true);
|
mi_collect(true);
|
||||||
mi_debug_show_arenas(true,false,false);
|
mi_debug_show_arenas(true,false,false);
|
||||||
#else
|
#else
|
||||||
mi_collect(false);
|
mi_collect(false);
|
||||||
|
|
Loading…
Add table
Reference in a new issue