diff --git a/azure-pipelines.yml b/azure-pipelines.yml index d853db2f..b44d7740 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -43,7 +43,7 @@ jobs: solution: $(BuildType)/libmimalloc.sln configuration: '$(MSBuildConfiguration)' msbuildArguments: -m - - script: ctest --verbose --timeout 180 -C $(MSBuildConfiguration) + - script: ctest --verbose --timeout 240 -C $(MSBuildConfiguration) workingDirectory: $(BuildType) displayName: CTest #- script: $(BuildType)\$(BuildType)\mimalloc-test-stress @@ -126,7 +126,7 @@ jobs: cmakeArgs: .. $(cmakeExtraArgs) - script: make -j$(nproc) -C $(BuildType) displayName: Make - - script: ctest --verbose --timeout 180 + - script: ctest --verbose --timeout 240 workingDirectory: $(BuildType) displayName: CTest env: @@ -157,7 +157,7 @@ jobs: cmakeArgs: .. $(cmakeExtraArgs) - script: make -j$(sysctl -n hw.ncpu) -C $(BuildType) displayName: Make - - script: ctest --verbose --timeout 180 + - script: ctest --verbose --timeout 240 workingDirectory: $(BuildType) displayName: CTest # - upload: $(Build.SourcesDirectory)/$(BuildType) @@ -192,7 +192,7 @@ jobs: solution: $(BuildType)/libmimalloc.sln configuration: '$(MSBuildConfiguration)' msbuildArguments: -m - - script: ctest --verbose --timeout 180 -C $(MSBuildConfiguration) + - script: ctest --verbose --timeout 240 -C $(MSBuildConfiguration) workingDirectory: $(BuildType) displayName: CTest @@ -235,7 +235,7 @@ jobs: cmakeArgs: .. $(cmakeExtraArgs) - script: make -j$(nproc) -C $(BuildType) displayName: Make - - script: ctest --verbose --timeout 180 + - script: ctest --verbose --timeout 240 workingDirectory: $(BuildType) displayName: CTest @@ -278,7 +278,7 @@ jobs: cmakeArgs: .. $(cmakeExtraArgs) - script: make -j$(nproc) -C $(BuildType) displayName: Make - - script: ctest --verbose --timeout 180 + - script: ctest --verbose --timeout 240 workingDirectory: $(BuildType) displayName: CTest @@ -302,7 +302,7 @@ jobs: cmakeArgs: .. $(cmakeExtraArgs) - script: make -j$(sysctl -n hw.ncpu) -C $(BuildType) displayName: Make - - script: ctest --verbose --timeout 180 + - script: ctest --verbose --timeout 240 workingDirectory: $(BuildType) displayName: CTest diff --git a/bin/minject-arm64.exe b/bin/minject-arm64.exe index 63987afc..97188c4f 100644 Binary files a/bin/minject-arm64.exe and b/bin/minject-arm64.exe differ diff --git a/bin/minject.exe b/bin/minject.exe index dba8f80f..53c42f5d 100644 Binary files a/bin/minject.exe and b/bin/minject.exe differ diff --git a/bin/minject32.exe b/bin/minject32.exe index f837383b..926facfd 100644 Binary files a/bin/minject32.exe and b/bin/minject32.exe differ diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index 920a8e2c..f95f481b 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -485,18 +485,27 @@ typedef struct mi_stats_s { } mi_stats_t; +// add to stat keeping track of the peak void _mi_stat_increase(mi_stat_count_t* stat, size_t amount); void _mi_stat_decrease(mi_stat_count_t* stat, size_t amount); +// adjust stat in special cases to compensate for double counting +void _mi_stat_adjust_increase(mi_stat_count_t* stat, size_t amount); +void _mi_stat_adjust_decrease(mi_stat_count_t* stat, size_t amount); +// counters can just be increased void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount); #if (MI_STAT) #define mi_stat_increase(stat,amount) _mi_stat_increase( &(stat), amount) #define mi_stat_decrease(stat,amount) _mi_stat_decrease( &(stat), amount) #define mi_stat_counter_increase(stat,amount) _mi_stat_counter_increase( &(stat), amount) +#define mi_stat_adjust_increase(stat,amount) _mi_stat_adjust_increase( &(stat), amount) +#define mi_stat_adjust_decrease(stat,amount) _mi_stat_adjust_decrease( &(stat), amount) #else -#define mi_stat_increase(stat,amount) (void)0 -#define mi_stat_decrease(stat,amount) (void)0 -#define mi_stat_counter_increase(stat,amount) (void)0 +#define mi_stat_increase(stat,amount) ((void)0) +#define mi_stat_decrease(stat,amount) ((void)0) +#define mi_stat_counter_increase(stat,amount) ((void)0) +#define mi_stat_adjuct_increase(stat,amount) ((void)0) +#define mi_stat_adjust_decrease(stat,amount) ((void)0) #endif #define mi_heap_stat_counter_increase(heap,stat,amount) mi_stat_counter_increase( (heap)->tld->stats.stat, amount) diff --git a/src/arena.c b/src/arena.c index 6c048073..122de4d1 100644 --- a/src/arena.c +++ b/src/arena.c @@ -231,12 +231,24 @@ static mi_decl_noinline void* mi_arena_try_alloc_at( // commit requested, but the range may not be committed as a whole: ensure it is committed now if (!mi_bitmap_is_setN(arena->slices_committed, slice_index, slice_count)) { // not fully committed: commit the full range and set the commit bits - // (this may race and we may double-commit which is fine) + // we set the bits first since we own these slices (they are no longer free) + size_t already_committed_count = 0; + mi_bitmap_setN(arena->slices_committed, slice_index, slice_count, &already_committed_count); + // adjust the stats so we don't double count the commits + if (already_committed_count > 0) { + _mi_stat_adjust_decrease(&_mi_stats_main.committed, mi_size_of_slices(already_committed_count)); + } + // now actually commit bool commit_zero = false; if (!_mi_os_commit(p, mi_size_of_slices(slice_count), &commit_zero)) { + // failed to commit (todo: give warning?) + if (already_committed_count > 0) { + _mi_stat_increase(&_mi_stats_main.committed, mi_size_of_slices(already_committed_count)); + } memid->initially_committed = false; } else { + // committed if (commit_zero) { memid->initially_zero = true; } #if MI_DEBUG > 1 if (memid->initially_zero) { @@ -245,13 +257,7 @@ static mi_decl_noinline void* mi_arena_try_alloc_at( memid->initially_zero = false; } } - #endif - size_t already_committed_count = 0; - mi_bitmap_setN(arena->slices_committed, slice_index, slice_count, &already_committed_count); - if (already_committed_count < slice_count) { - // todo: also decrease total - mi_stat_decrease(_mi_stats_main.committed, mi_size_of_slices(already_committed_count)); - } + #endif } } if (memid->initially_zero) { @@ -795,7 +801,7 @@ void _mi_arena_page_free(mi_page_t* page) { Arena abandon ----------------------------------------------------------- */ -static void mi_arena_page_abandon_no_stat(mi_page_t* page) { +void _mi_arena_page_abandon(mi_page_t* page) { mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN)); mi_assert_internal(_mi_ptr_page(page)==page); mi_assert_internal(mi_page_is_owned(page)); @@ -824,12 +830,8 @@ static void mi_arena_page_abandon_no_stat(mi_page_t* page) { // page is full (or a singleton), page is OS/externally allocated // leave as is; it will be reclaimed when an object is free'd in the page } - _mi_page_unown(page); -} - -void _mi_arena_page_abandon(mi_page_t* page) { - mi_arena_page_abandon_no_stat(page); _mi_stat_increase(&_mi_stats_main.pages_abandoned, 1); + _mi_page_unown(page); } bool _mi_arena_page_try_reabandon_to_mapped(mi_page_t* page) { @@ -846,7 +848,8 @@ bool _mi_arena_page_try_reabandon_to_mapped(mi_page_t* page) { } else { _mi_stat_counter_increase(&_mi_stats_main.pages_reabandon_full, 1); - mi_arena_page_abandon_no_stat(page); + _mi_stat_adjust_decrease(&_mi_stats_main.pages_abandoned, 1); // adjust as we are not abandoning fresh + _mi_arena_page_abandon(page); return true; } } @@ -1416,19 +1419,21 @@ int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserv // reset or decommit in an arena and update the commit bitmap // assumes we own the area (i.e. slices_free is claimed by us) -static void mi_arena_purge(mi_arena_t* arena, size_t slice_index, size_t slice_count) { +// returns if the memory is no longer committed (versus reset which keeps the commit) +static bool mi_arena_purge(mi_arena_t* arena, size_t slice_index, size_t slice_count) { mi_assert_internal(!arena->memid.is_pinned); mi_assert_internal(mi_bbitmap_is_clearN(arena->slices_free, slice_index, slice_count)); const size_t size = mi_size_of_slices(slice_count); void* const p = mi_arena_slice_start(arena, slice_index); const bool all_committed = mi_bitmap_is_setN(arena->slices_committed, slice_index, slice_count); - const bool needs_recommit = _mi_os_purge_ex(p, size, all_committed); + const bool needs_recommit = _mi_os_purge_ex(p, size, all_committed /* allow reset? */); // update committed bitmap if (needs_recommit) { mi_bitmap_clearN(arena->slices_committed, slice_index, slice_count); } + return needs_recommit; } @@ -1445,12 +1450,13 @@ static void mi_arena_schedule_purge(mi_arena_t* arena, size_t slice_index, size_ } else { // schedule purge - mi_msecs_t expire = mi_atomic_loadi64_relaxed(&arena->purge_expire); - if (expire == 0) { - mi_atomic_storei64_release(&arena->purge_expire, _mi_clock_now() + delay); + mi_msecs_t expire0 = 0; + if (mi_atomic_casi64_strong_acq_rel(&arena->purge_expire, &expire0, _mi_clock_now() + delay)) { + // expiration was not yet set mi_atomic_storei64_release(&arena->purge_expire_extend, 0); } else if (mi_atomic_loadi64_acquire(&arena->purge_expire_extend) < 10*delay) { // limit max extension time + // already an expiration was set mi_atomic_addi64_acq_rel(&arena->purge_expire_extend, (mi_msecs_t)(delay/10)); // add smallish extra delay } mi_bitmap_setN(arena->slices_purge, slice_index, slice_count, NULL); @@ -1467,8 +1473,8 @@ typedef struct mi_purge_visit_info_s { static bool mi_arena_try_purge_range(mi_arena_t* arena, size_t slice_index, size_t slice_count) { if (mi_bbitmap_try_clearN(arena->slices_free, slice_index, slice_count)) { // purge - mi_arena_purge(arena, slice_index, slice_count); - mi_assert_internal(mi_bitmap_is_clearN(arena->slices_committed, slice_index, slice_count)); + bool decommitted = mi_arena_purge(arena, slice_index, slice_count); MI_UNUSED(decommitted); + mi_assert_internal(!decommitted || mi_bitmap_is_clearN(arena->slices_committed, slice_index, slice_count)); // and reset the free range mi_bbitmap_setN(arena->slices_free, slice_index, slice_count); return true; @@ -1495,8 +1501,8 @@ static bool mi_arena_try_purge_visitor(size_t slice_index, size_t slice_count, m vinfo->all_purged = vinfo->all_purged && purged; } } - // done: clear the purge bits - mi_bitmap_clearN(arena->slices_purge, slice_index, slice_count); + // don't clear the purge bits as that is done atomically be the _bitmap_forall_set_ranges + // mi_bitmap_clearN(arena->slices_purge, slice_index, slice_count); return true; // continue } @@ -1520,17 +1526,11 @@ static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force) _mi_stat_counter_increase(&_mi_stats_main.arena_purges, 1); // go through all purge info's (with max MI_BFIELD_BITS ranges at a time) + // this also clears those ranges atomically (so any newly freed blocks will get purged next + // time around) mi_purge_visit_info_t vinfo = { now, mi_arena_purge_delay(), true /*all?*/, false /*any?*/}; - _mi_bitmap_forall_set_ranges(arena->slices_purge, &mi_arena_try_purge_visitor, arena, &vinfo); + _mi_bitmap_forall_setc_ranges(arena->slices_purge, &mi_arena_try_purge_visitor, arena, &vinfo); - // if not fully purged, make sure to purge again in the future - if (!vinfo.all_purged) { - const long delay = mi_arena_purge_delay(); - mi_msecs_t expected = 0; - if (mi_atomic_casi64_strong_acq_rel(&arena->purge_expire, &expected, _mi_clock_now() + delay)) { - mi_atomic_storei64_release(&arena->purge_expire_extend, (mi_msecs_t)0); - } - } return vinfo.any_purged; } diff --git a/src/bitmap.c b/src/bitmap.c index 5734b9f5..71e54ba9 100644 --- a/src/bitmap.c +++ b/src/bitmap.c @@ -1239,9 +1239,10 @@ bool _mi_bitmap_forall_set(mi_bitmap_t* bitmap, mi_forall_set_fun_t* visit, mi_a } // Visit all set bits in a bitmap but try to return ranges (within bfields) if possible. -// used by purging to purge larger ranges if possible +// Also clear those ranges atomically. +// Used by purging to purge larger ranges when possible // todo: optimize further? maybe use avx512 to directly get all indices using a mask_compressstore? -bool _mi_bitmap_forall_set_ranges(mi_bitmap_t* bitmap, mi_forall_set_fun_t* visit, mi_arena_t* arena, void* arg) { +bool _mi_bitmap_forall_setc_ranges(mi_bitmap_t* bitmap, mi_forall_set_fun_t* visit, mi_arena_t* arena, void* arg) { // for all chunkmap entries const size_t chunkmap_max = _mi_divide_up(mi_bitmap_chunk_count(bitmap), MI_BFIELD_BITS); for (size_t i = 0; i < chunkmap_max; i++) { @@ -1254,7 +1255,7 @@ bool _mi_bitmap_forall_set_ranges(mi_bitmap_t* bitmap, mi_forall_set_fun_t* visi mi_bchunk_t* const chunk = &bitmap->chunks[chunk_idx]; for (size_t j = 0; j < MI_BCHUNK_FIELDS; j++) { const size_t base_idx = (chunk_idx*MI_BCHUNK_BITS) + (j*MI_BFIELD_BITS); - mi_bfield_t b = mi_atomic_load_relaxed(&chunk->bfields[j]); + mi_bfield_t b = mi_atomic_exchange_acq_rel(&chunk->bfields[j], 0); // can be relaxed? #if MI_DEBUG > 1 const size_t bpopcount = mi_popcount(b); size_t rngcount = 0; diff --git a/src/bitmap.h b/src/bitmap.h index 4faaa3a1..ed37e975 100644 --- a/src/bitmap.h +++ b/src/bitmap.h @@ -204,7 +204,7 @@ typedef bool (mi_forall_set_fun_t)(size_t slice_index, size_t slice_count, mi_ar bool _mi_bitmap_forall_set(mi_bitmap_t* bitmap, mi_forall_set_fun_t* visit, mi_arena_t* arena, void* arg); // Visit all set bits in a bitmap with larger ranges if possible (`slice_count >= 1`) -bool _mi_bitmap_forall_set_ranges(mi_bitmap_t* bitmap, mi_forall_set_fun_t* visit, mi_arena_t* arena, void* arg); +bool _mi_bitmap_forall_setc_ranges(mi_bitmap_t* bitmap, mi_forall_set_fun_t* visit, mi_arena_t* arena, void* arg); /* ---------------------------------------------------------------------------- diff --git a/src/stats.c b/src/stats.c index 860a69ef..53937330 100644 --- a/src/stats.c +++ b/src/stats.c @@ -26,7 +26,7 @@ static bool mi_is_in_main(void* stat) { static void mi_stat_update(mi_stat_count_t* stat, int64_t amount) { if (amount == 0) return; - if (mi_is_in_main(stat)) + if mi_unlikely(mi_is_in_main(stat)) { // add atomically (for abandoned pages) int64_t current = mi_atomic_addi64_relaxed(&stat->current, amount); @@ -51,6 +51,27 @@ static void mi_stat_update(mi_stat_count_t* stat, int64_t amount) { } } +// Adjust stats to compensate; for example before committing a range, +// first adjust downwards with parts that were already committed so +// we avoid double counting. +static void mi_stat_adjust(mi_stat_count_t* stat, int64_t amount) { + if (amount == 0) return; + if mi_unlikely(mi_is_in_main(stat)) + { + // adjust atomically + mi_atomic_addi64_relaxed(&stat->current, amount); + mi_atomic_addi64_relaxed(&stat->allocated, amount); + mi_atomic_addi64_relaxed(&stat->freed, amount); + } + else { + // don't affect the peak + stat->current += amount; + // add to both + stat->allocated += amount; + stat->freed += amount; + } +} + void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount) { if (mi_is_in_main(stat)) { mi_atomic_addi64_relaxed( &stat->count, 1 ); @@ -70,6 +91,14 @@ void _mi_stat_decrease(mi_stat_count_t* stat, size_t amount) { mi_stat_update(stat, -((int64_t)amount)); } +void _mi_stat_adjust_increase(mi_stat_count_t* stat, size_t amount) { + mi_stat_adjust(stat, (int64_t)amount); +} + +void _mi_stat_adjust_decrease(mi_stat_count_t* stat, size_t amount) { + mi_stat_adjust(stat, -((int64_t)amount)); +} + // must be thread safe as it is called from stats_merge static void mi_stat_add(mi_stat_count_t* stat, const mi_stat_count_t* src, int64_t unit) { if (stat==src) return;