Merge branch 'dev3' into dev3-bin

This commit is contained in:
daanx 2024-12-17 18:10:37 -08:00
commit 587eabe72b
9 changed files with 87 additions and 48 deletions

View file

@ -43,7 +43,7 @@ jobs:
solution: $(BuildType)/libmimalloc.sln solution: $(BuildType)/libmimalloc.sln
configuration: '$(MSBuildConfiguration)' configuration: '$(MSBuildConfiguration)'
msbuildArguments: -m msbuildArguments: -m
- script: ctest --verbose --timeout 180 -C $(MSBuildConfiguration) - script: ctest --verbose --timeout 240 -C $(MSBuildConfiguration)
workingDirectory: $(BuildType) workingDirectory: $(BuildType)
displayName: CTest displayName: CTest
#- script: $(BuildType)\$(BuildType)\mimalloc-test-stress #- script: $(BuildType)\$(BuildType)\mimalloc-test-stress
@ -126,7 +126,7 @@ jobs:
cmakeArgs: .. $(cmakeExtraArgs) cmakeArgs: .. $(cmakeExtraArgs)
- script: make -j$(nproc) -C $(BuildType) - script: make -j$(nproc) -C $(BuildType)
displayName: Make displayName: Make
- script: ctest --verbose --timeout 180 - script: ctest --verbose --timeout 240
workingDirectory: $(BuildType) workingDirectory: $(BuildType)
displayName: CTest displayName: CTest
env: env:
@ -157,7 +157,7 @@ jobs:
cmakeArgs: .. $(cmakeExtraArgs) cmakeArgs: .. $(cmakeExtraArgs)
- script: make -j$(sysctl -n hw.ncpu) -C $(BuildType) - script: make -j$(sysctl -n hw.ncpu) -C $(BuildType)
displayName: Make displayName: Make
- script: ctest --verbose --timeout 180 - script: ctest --verbose --timeout 240
workingDirectory: $(BuildType) workingDirectory: $(BuildType)
displayName: CTest displayName: CTest
# - upload: $(Build.SourcesDirectory)/$(BuildType) # - upload: $(Build.SourcesDirectory)/$(BuildType)
@ -192,7 +192,7 @@ jobs:
solution: $(BuildType)/libmimalloc.sln solution: $(BuildType)/libmimalloc.sln
configuration: '$(MSBuildConfiguration)' configuration: '$(MSBuildConfiguration)'
msbuildArguments: -m msbuildArguments: -m
- script: ctest --verbose --timeout 180 -C $(MSBuildConfiguration) - script: ctest --verbose --timeout 240 -C $(MSBuildConfiguration)
workingDirectory: $(BuildType) workingDirectory: $(BuildType)
displayName: CTest displayName: CTest
@ -235,7 +235,7 @@ jobs:
cmakeArgs: .. $(cmakeExtraArgs) cmakeArgs: .. $(cmakeExtraArgs)
- script: make -j$(nproc) -C $(BuildType) - script: make -j$(nproc) -C $(BuildType)
displayName: Make displayName: Make
- script: ctest --verbose --timeout 180 - script: ctest --verbose --timeout 240
workingDirectory: $(BuildType) workingDirectory: $(BuildType)
displayName: CTest displayName: CTest
@ -278,7 +278,7 @@ jobs:
cmakeArgs: .. $(cmakeExtraArgs) cmakeArgs: .. $(cmakeExtraArgs)
- script: make -j$(nproc) -C $(BuildType) - script: make -j$(nproc) -C $(BuildType)
displayName: Make displayName: Make
- script: ctest --verbose --timeout 180 - script: ctest --verbose --timeout 240
workingDirectory: $(BuildType) workingDirectory: $(BuildType)
displayName: CTest displayName: CTest
@ -302,7 +302,7 @@ jobs:
cmakeArgs: .. $(cmakeExtraArgs) cmakeArgs: .. $(cmakeExtraArgs)
- script: make -j$(sysctl -n hw.ncpu) -C $(BuildType) - script: make -j$(sysctl -n hw.ncpu) -C $(BuildType)
displayName: Make displayName: Make
- script: ctest --verbose --timeout 180 - script: ctest --verbose --timeout 240
workingDirectory: $(BuildType) workingDirectory: $(BuildType)
displayName: CTest displayName: CTest

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -485,18 +485,27 @@ typedef struct mi_stats_s {
} mi_stats_t; } mi_stats_t;
// add to stat keeping track of the peak
void _mi_stat_increase(mi_stat_count_t* stat, size_t amount); void _mi_stat_increase(mi_stat_count_t* stat, size_t amount);
void _mi_stat_decrease(mi_stat_count_t* stat, size_t amount); void _mi_stat_decrease(mi_stat_count_t* stat, size_t amount);
// adjust stat in special cases to compensate for double counting
void _mi_stat_adjust_increase(mi_stat_count_t* stat, size_t amount);
void _mi_stat_adjust_decrease(mi_stat_count_t* stat, size_t amount);
// counters can just be increased
void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount); void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount);
#if (MI_STAT) #if (MI_STAT)
#define mi_stat_increase(stat,amount) _mi_stat_increase( &(stat), amount) #define mi_stat_increase(stat,amount) _mi_stat_increase( &(stat), amount)
#define mi_stat_decrease(stat,amount) _mi_stat_decrease( &(stat), amount) #define mi_stat_decrease(stat,amount) _mi_stat_decrease( &(stat), amount)
#define mi_stat_counter_increase(stat,amount) _mi_stat_counter_increase( &(stat), amount) #define mi_stat_counter_increase(stat,amount) _mi_stat_counter_increase( &(stat), amount)
#define mi_stat_adjust_increase(stat,amount) _mi_stat_adjust_increase( &(stat), amount)
#define mi_stat_adjust_decrease(stat,amount) _mi_stat_adjust_decrease( &(stat), amount)
#else #else
#define mi_stat_increase(stat,amount) (void)0 #define mi_stat_increase(stat,amount) ((void)0)
#define mi_stat_decrease(stat,amount) (void)0 #define mi_stat_decrease(stat,amount) ((void)0)
#define mi_stat_counter_increase(stat,amount) (void)0 #define mi_stat_counter_increase(stat,amount) ((void)0)
#define mi_stat_adjuct_increase(stat,amount) ((void)0)
#define mi_stat_adjust_decrease(stat,amount) ((void)0)
#endif #endif
#define mi_heap_stat_counter_increase(heap,stat,amount) mi_stat_counter_increase( (heap)->tld->stats.stat, amount) #define mi_heap_stat_counter_increase(heap,stat,amount) mi_stat_counter_increase( (heap)->tld->stats.stat, amount)

View file

@ -231,12 +231,24 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(
// commit requested, but the range may not be committed as a whole: ensure it is committed now // commit requested, but the range may not be committed as a whole: ensure it is committed now
if (!mi_bitmap_is_setN(arena->slices_committed, slice_index, slice_count)) { if (!mi_bitmap_is_setN(arena->slices_committed, slice_index, slice_count)) {
// not fully committed: commit the full range and set the commit bits // not fully committed: commit the full range and set the commit bits
// (this may race and we may double-commit which is fine) // we set the bits first since we own these slices (they are no longer free)
size_t already_committed_count = 0;
mi_bitmap_setN(arena->slices_committed, slice_index, slice_count, &already_committed_count);
// adjust the stats so we don't double count the commits
if (already_committed_count > 0) {
_mi_stat_adjust_decrease(&_mi_stats_main.committed, mi_size_of_slices(already_committed_count));
}
// now actually commit
bool commit_zero = false; bool commit_zero = false;
if (!_mi_os_commit(p, mi_size_of_slices(slice_count), &commit_zero)) { if (!_mi_os_commit(p, mi_size_of_slices(slice_count), &commit_zero)) {
// failed to commit (todo: give warning?)
if (already_committed_count > 0) {
_mi_stat_increase(&_mi_stats_main.committed, mi_size_of_slices(already_committed_count));
}
memid->initially_committed = false; memid->initially_committed = false;
} }
else { else {
// committed
if (commit_zero) { memid->initially_zero = true; } if (commit_zero) { memid->initially_zero = true; }
#if MI_DEBUG > 1 #if MI_DEBUG > 1
if (memid->initially_zero) { if (memid->initially_zero) {
@ -246,12 +258,6 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(
} }
} }
#endif #endif
size_t already_committed_count = 0;
mi_bitmap_setN(arena->slices_committed, slice_index, slice_count, &already_committed_count);
if (already_committed_count < slice_count) {
// todo: also decrease total
mi_stat_decrease(_mi_stats_main.committed, mi_size_of_slices(already_committed_count));
}
} }
} }
if (memid->initially_zero) { if (memid->initially_zero) {
@ -795,7 +801,7 @@ void _mi_arena_page_free(mi_page_t* page) {
Arena abandon Arena abandon
----------------------------------------------------------- */ ----------------------------------------------------------- */
static void mi_arena_page_abandon_no_stat(mi_page_t* page) { void _mi_arena_page_abandon(mi_page_t* page) {
mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN)); mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN));
mi_assert_internal(_mi_ptr_page(page)==page); mi_assert_internal(_mi_ptr_page(page)==page);
mi_assert_internal(mi_page_is_owned(page)); mi_assert_internal(mi_page_is_owned(page));
@ -824,12 +830,8 @@ static void mi_arena_page_abandon_no_stat(mi_page_t* page) {
// page is full (or a singleton), page is OS/externally allocated // page is full (or a singleton), page is OS/externally allocated
// leave as is; it will be reclaimed when an object is free'd in the page // leave as is; it will be reclaimed when an object is free'd in the page
} }
_mi_page_unown(page);
}
void _mi_arena_page_abandon(mi_page_t* page) {
mi_arena_page_abandon_no_stat(page);
_mi_stat_increase(&_mi_stats_main.pages_abandoned, 1); _mi_stat_increase(&_mi_stats_main.pages_abandoned, 1);
_mi_page_unown(page);
} }
bool _mi_arena_page_try_reabandon_to_mapped(mi_page_t* page) { bool _mi_arena_page_try_reabandon_to_mapped(mi_page_t* page) {
@ -846,7 +848,8 @@ bool _mi_arena_page_try_reabandon_to_mapped(mi_page_t* page) {
} }
else { else {
_mi_stat_counter_increase(&_mi_stats_main.pages_reabandon_full, 1); _mi_stat_counter_increase(&_mi_stats_main.pages_reabandon_full, 1);
mi_arena_page_abandon_no_stat(page); _mi_stat_adjust_decrease(&_mi_stats_main.pages_abandoned, 1); // adjust as we are not abandoning fresh
_mi_arena_page_abandon(page);
return true; return true;
} }
} }
@ -1416,19 +1419,21 @@ int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserv
// reset or decommit in an arena and update the commit bitmap // reset or decommit in an arena and update the commit bitmap
// assumes we own the area (i.e. slices_free is claimed by us) // assumes we own the area (i.e. slices_free is claimed by us)
static void mi_arena_purge(mi_arena_t* arena, size_t slice_index, size_t slice_count) { // returns if the memory is no longer committed (versus reset which keeps the commit)
static bool mi_arena_purge(mi_arena_t* arena, size_t slice_index, size_t slice_count) {
mi_assert_internal(!arena->memid.is_pinned); mi_assert_internal(!arena->memid.is_pinned);
mi_assert_internal(mi_bbitmap_is_clearN(arena->slices_free, slice_index, slice_count)); mi_assert_internal(mi_bbitmap_is_clearN(arena->slices_free, slice_index, slice_count));
const size_t size = mi_size_of_slices(slice_count); const size_t size = mi_size_of_slices(slice_count);
void* const p = mi_arena_slice_start(arena, slice_index); void* const p = mi_arena_slice_start(arena, slice_index);
const bool all_committed = mi_bitmap_is_setN(arena->slices_committed, slice_index, slice_count); const bool all_committed = mi_bitmap_is_setN(arena->slices_committed, slice_index, slice_count);
const bool needs_recommit = _mi_os_purge_ex(p, size, all_committed); const bool needs_recommit = _mi_os_purge_ex(p, size, all_committed /* allow reset? */);
// update committed bitmap // update committed bitmap
if (needs_recommit) { if (needs_recommit) {
mi_bitmap_clearN(arena->slices_committed, slice_index, slice_count); mi_bitmap_clearN(arena->slices_committed, slice_index, slice_count);
} }
return needs_recommit;
} }
@ -1445,12 +1450,13 @@ static void mi_arena_schedule_purge(mi_arena_t* arena, size_t slice_index, size_
} }
else { else {
// schedule purge // schedule purge
mi_msecs_t expire = mi_atomic_loadi64_relaxed(&arena->purge_expire); mi_msecs_t expire0 = 0;
if (expire == 0) { if (mi_atomic_casi64_strong_acq_rel(&arena->purge_expire, &expire0, _mi_clock_now() + delay)) {
mi_atomic_storei64_release(&arena->purge_expire, _mi_clock_now() + delay); // expiration was not yet set
mi_atomic_storei64_release(&arena->purge_expire_extend, 0); mi_atomic_storei64_release(&arena->purge_expire_extend, 0);
} }
else if (mi_atomic_loadi64_acquire(&arena->purge_expire_extend) < 10*delay) { // limit max extension time else if (mi_atomic_loadi64_acquire(&arena->purge_expire_extend) < 10*delay) { // limit max extension time
// already an expiration was set
mi_atomic_addi64_acq_rel(&arena->purge_expire_extend, (mi_msecs_t)(delay/10)); // add smallish extra delay mi_atomic_addi64_acq_rel(&arena->purge_expire_extend, (mi_msecs_t)(delay/10)); // add smallish extra delay
} }
mi_bitmap_setN(arena->slices_purge, slice_index, slice_count, NULL); mi_bitmap_setN(arena->slices_purge, slice_index, slice_count, NULL);
@ -1467,8 +1473,8 @@ typedef struct mi_purge_visit_info_s {
static bool mi_arena_try_purge_range(mi_arena_t* arena, size_t slice_index, size_t slice_count) { static bool mi_arena_try_purge_range(mi_arena_t* arena, size_t slice_index, size_t slice_count) {
if (mi_bbitmap_try_clearN(arena->slices_free, slice_index, slice_count)) { if (mi_bbitmap_try_clearN(arena->slices_free, slice_index, slice_count)) {
// purge // purge
mi_arena_purge(arena, slice_index, slice_count); bool decommitted = mi_arena_purge(arena, slice_index, slice_count); MI_UNUSED(decommitted);
mi_assert_internal(mi_bitmap_is_clearN(arena->slices_committed, slice_index, slice_count)); mi_assert_internal(!decommitted || mi_bitmap_is_clearN(arena->slices_committed, slice_index, slice_count));
// and reset the free range // and reset the free range
mi_bbitmap_setN(arena->slices_free, slice_index, slice_count); mi_bbitmap_setN(arena->slices_free, slice_index, slice_count);
return true; return true;
@ -1495,8 +1501,8 @@ static bool mi_arena_try_purge_visitor(size_t slice_index, size_t slice_count, m
vinfo->all_purged = vinfo->all_purged && purged; vinfo->all_purged = vinfo->all_purged && purged;
} }
} }
// done: clear the purge bits // don't clear the purge bits as that is done atomically be the _bitmap_forall_set_ranges
mi_bitmap_clearN(arena->slices_purge, slice_index, slice_count); // mi_bitmap_clearN(arena->slices_purge, slice_index, slice_count);
return true; // continue return true; // continue
} }
@ -1520,17 +1526,11 @@ static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force)
_mi_stat_counter_increase(&_mi_stats_main.arena_purges, 1); _mi_stat_counter_increase(&_mi_stats_main.arena_purges, 1);
// go through all purge info's (with max MI_BFIELD_BITS ranges at a time) // go through all purge info's (with max MI_BFIELD_BITS ranges at a time)
// this also clears those ranges atomically (so any newly freed blocks will get purged next
// time around)
mi_purge_visit_info_t vinfo = { now, mi_arena_purge_delay(), true /*all?*/, false /*any?*/}; mi_purge_visit_info_t vinfo = { now, mi_arena_purge_delay(), true /*all?*/, false /*any?*/};
_mi_bitmap_forall_set_ranges(arena->slices_purge, &mi_arena_try_purge_visitor, arena, &vinfo); _mi_bitmap_forall_setc_ranges(arena->slices_purge, &mi_arena_try_purge_visitor, arena, &vinfo);
// if not fully purged, make sure to purge again in the future
if (!vinfo.all_purged) {
const long delay = mi_arena_purge_delay();
mi_msecs_t expected = 0;
if (mi_atomic_casi64_strong_acq_rel(&arena->purge_expire, &expected, _mi_clock_now() + delay)) {
mi_atomic_storei64_release(&arena->purge_expire_extend, (mi_msecs_t)0);
}
}
return vinfo.any_purged; return vinfo.any_purged;
} }

View file

@ -1239,9 +1239,10 @@ bool _mi_bitmap_forall_set(mi_bitmap_t* bitmap, mi_forall_set_fun_t* visit, mi_a
} }
// Visit all set bits in a bitmap but try to return ranges (within bfields) if possible. // Visit all set bits in a bitmap but try to return ranges (within bfields) if possible.
// used by purging to purge larger ranges if possible // Also clear those ranges atomically.
// Used by purging to purge larger ranges when possible
// todo: optimize further? maybe use avx512 to directly get all indices using a mask_compressstore? // todo: optimize further? maybe use avx512 to directly get all indices using a mask_compressstore?
bool _mi_bitmap_forall_set_ranges(mi_bitmap_t* bitmap, mi_forall_set_fun_t* visit, mi_arena_t* arena, void* arg) { bool _mi_bitmap_forall_setc_ranges(mi_bitmap_t* bitmap, mi_forall_set_fun_t* visit, mi_arena_t* arena, void* arg) {
// for all chunkmap entries // for all chunkmap entries
const size_t chunkmap_max = _mi_divide_up(mi_bitmap_chunk_count(bitmap), MI_BFIELD_BITS); const size_t chunkmap_max = _mi_divide_up(mi_bitmap_chunk_count(bitmap), MI_BFIELD_BITS);
for (size_t i = 0; i < chunkmap_max; i++) { for (size_t i = 0; i < chunkmap_max; i++) {
@ -1254,7 +1255,7 @@ bool _mi_bitmap_forall_set_ranges(mi_bitmap_t* bitmap, mi_forall_set_fun_t* visi
mi_bchunk_t* const chunk = &bitmap->chunks[chunk_idx]; mi_bchunk_t* const chunk = &bitmap->chunks[chunk_idx];
for (size_t j = 0; j < MI_BCHUNK_FIELDS; j++) { for (size_t j = 0; j < MI_BCHUNK_FIELDS; j++) {
const size_t base_idx = (chunk_idx*MI_BCHUNK_BITS) + (j*MI_BFIELD_BITS); const size_t base_idx = (chunk_idx*MI_BCHUNK_BITS) + (j*MI_BFIELD_BITS);
mi_bfield_t b = mi_atomic_load_relaxed(&chunk->bfields[j]); mi_bfield_t b = mi_atomic_exchange_acq_rel(&chunk->bfields[j], 0); // can be relaxed?
#if MI_DEBUG > 1 #if MI_DEBUG > 1
const size_t bpopcount = mi_popcount(b); const size_t bpopcount = mi_popcount(b);
size_t rngcount = 0; size_t rngcount = 0;

View file

@ -204,7 +204,7 @@ typedef bool (mi_forall_set_fun_t)(size_t slice_index, size_t slice_count, mi_ar
bool _mi_bitmap_forall_set(mi_bitmap_t* bitmap, mi_forall_set_fun_t* visit, mi_arena_t* arena, void* arg); bool _mi_bitmap_forall_set(mi_bitmap_t* bitmap, mi_forall_set_fun_t* visit, mi_arena_t* arena, void* arg);
// Visit all set bits in a bitmap with larger ranges if possible (`slice_count >= 1`) // Visit all set bits in a bitmap with larger ranges if possible (`slice_count >= 1`)
bool _mi_bitmap_forall_set_ranges(mi_bitmap_t* bitmap, mi_forall_set_fun_t* visit, mi_arena_t* arena, void* arg); bool _mi_bitmap_forall_setc_ranges(mi_bitmap_t* bitmap, mi_forall_set_fun_t* visit, mi_arena_t* arena, void* arg);
/* ---------------------------------------------------------------------------- /* ----------------------------------------------------------------------------

View file

@ -26,7 +26,7 @@ static bool mi_is_in_main(void* stat) {
static void mi_stat_update(mi_stat_count_t* stat, int64_t amount) { static void mi_stat_update(mi_stat_count_t* stat, int64_t amount) {
if (amount == 0) return; if (amount == 0) return;
if (mi_is_in_main(stat)) if mi_unlikely(mi_is_in_main(stat))
{ {
// add atomically (for abandoned pages) // add atomically (for abandoned pages)
int64_t current = mi_atomic_addi64_relaxed(&stat->current, amount); int64_t current = mi_atomic_addi64_relaxed(&stat->current, amount);
@ -51,6 +51,27 @@ static void mi_stat_update(mi_stat_count_t* stat, int64_t amount) {
} }
} }
// Adjust stats to compensate; for example before committing a range,
// first adjust downwards with parts that were already committed so
// we avoid double counting.
static void mi_stat_adjust(mi_stat_count_t* stat, int64_t amount) {
if (amount == 0) return;
if mi_unlikely(mi_is_in_main(stat))
{
// adjust atomically
mi_atomic_addi64_relaxed(&stat->current, amount);
mi_atomic_addi64_relaxed(&stat->allocated, amount);
mi_atomic_addi64_relaxed(&stat->freed, amount);
}
else {
// don't affect the peak
stat->current += amount;
// add to both
stat->allocated += amount;
stat->freed += amount;
}
}
void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount) { void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount) {
if (mi_is_in_main(stat)) { if (mi_is_in_main(stat)) {
mi_atomic_addi64_relaxed( &stat->count, 1 ); mi_atomic_addi64_relaxed( &stat->count, 1 );
@ -70,6 +91,14 @@ void _mi_stat_decrease(mi_stat_count_t* stat, size_t amount) {
mi_stat_update(stat, -((int64_t)amount)); mi_stat_update(stat, -((int64_t)amount));
} }
void _mi_stat_adjust_increase(mi_stat_count_t* stat, size_t amount) {
mi_stat_adjust(stat, (int64_t)amount);
}
void _mi_stat_adjust_decrease(mi_stat_count_t* stat, size_t amount) {
mi_stat_adjust(stat, -((int64_t)amount));
}
// must be thread safe as it is called from stats_merge // must be thread safe as it is called from stats_merge
static void mi_stat_add(mi_stat_count_t* stat, const mi_stat_count_t* src, int64_t unit) { static void mi_stat_add(mi_stat_count_t* stat, const mi_stat_count_t* src, int64_t unit) {
if (stat==src) return; if (stat==src) return;