Add global counters for segments and allocation stats

This commit is contained in:
Sergiy Kuryata 2024-11-26 16:32:54 -08:00
parent 75eec9e61d
commit 0be44b2b0f
11 changed files with 304 additions and 7 deletions

View file

@ -373,6 +373,16 @@ mi_decl_nodiscard mi_decl_export size_t mi_option_get_size(mi_option_t option);
mi_decl_export void mi_option_set(mi_option_t option, long value);
mi_decl_export void mi_option_set_default(mi_option_t option, long value);
typedef struct mi_allocation_counter_s {
size_t counter;
size_t block_size; // size of the allocation block that related to this counter (for example, the block that caused a new segmented to be allocated)
} mi_allocation_counter_t;
mi_decl_export bool mi_get_segment_stats(size_t* abandoned, size_t* reclaimed, size_t* reclaim_failed, size_t* allocated, size_t* freed,
mi_allocation_counter_t* allocated_segments, int allocated_segments_count,
mi_allocation_counter_t* free_space_in_segments, int free_space_in_segments_count,
mi_allocation_counter_t* allocated_memory, int allocated_memory_count);
// -------------------------------------------------------------------------------------------------------
// "mi" prefixed implementations of various posix, Unix, Windows, and C++ allocation functions.

View file

@ -181,6 +181,7 @@ void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool o
bool _mi_page_try_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never);
size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue_t* append);
void _mi_deferred_free(mi_heap_t* heap, bool force);
mi_page_queue_t* mi_heap_page_queue_of(mi_heap_t* heap, const mi_page_t* page);
void _mi_page_free_collect(mi_page_t* page,bool force);
void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page); // callback from segments
@ -1017,5 +1018,11 @@ static inline void _mi_memzero_aligned(void* dst, size_t n) {
}
#endif
void mi_segment_increment_alloc_stats(size_t block_size);
void mi_segment_increment_freed_stats();
void mi_segment_increment_reclaimed_stats();
void mi_segment_increment_reclaim_failed_stats();
void mi_allocation_stats_increment(size_t block_size);
void mi_allocation_stats_decrement(size_t block_size);
#endif

View file

@ -503,6 +503,7 @@ typedef struct mi_page_queue_s {
mi_page_t* first;
mi_page_t* last;
size_t block_size;
size_t allocationCount;
} mi_page_queue_t;
#define MI_BIN_FULL (MI_BIN_HUGE+1)
@ -646,6 +647,18 @@ typedef struct mi_stats_s {
#endif
} mi_stats_t;
typedef struct mi_segment_alloc_counter_s {
_Atomic(size_t) counter;
size_t block_size; // size of the allocation block that caused a new segmented to be allocated)
} mi_segment_alloc_counter_t;
typedef struct mi_segment_stats_s {
_Atomic(size_t) reclaimed_count;
_Atomic(size_t) reclaim_failed_count;
_Atomic(size_t) allocated_count;
_Atomic(size_t) freed_count;
mi_segment_alloc_counter_t alloc_stats[MI_BIN_HUGE+1];
} mi_segment_stats_t;
void _mi_stat_increase(mi_stat_count_t* stat, size_t amount);
void _mi_stat_decrease(mi_stat_count_t* stat, size_t amount);

View file

@ -39,6 +39,11 @@ extern inline void* _mi_page_malloc_zero(mi_heap_t* heap, mi_page_t* page, size_
// pop from the free list
page->free = mi_block_next(page, block);
page->used++;
mi_page_queue_t* pq = mi_heap_page_queue_of(heap, page);
pq->allocationCount++;
mi_allocation_stats_increment(page->block_size);
mi_assert_internal(page->free == NULL || _mi_ptr_page(page->free) == page);
#if MI_DEBUG>3
if (page->free_is_zero) {

View file

@ -825,7 +825,14 @@ void _mi_arena_segment_mark_abandoned(mi_segment_t* segment)
// start a cursor at a randomized arena
void _mi_arena_field_cursor_init(mi_heap_t* heap, mi_arena_field_cursor_t* current) {
const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count);
current->start = (max_arena == 0 ? 0 : (mi_arena_id_t)( _mi_heap_random_next(heap) % max_arena));
if (heap != NULL) {
current->start = (max_arena == 0 ? 0 : (mi_arena_id_t)( _mi_heap_random_next(heap) % max_arena));
}
else {
current->start = 0;
}
current->count = 0;
current->bitmap_idx = 0;
current->free_space_mask = MI_FREE_SPACE_MASK_ANY;
@ -896,6 +903,56 @@ mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_field_cursor_t* pr
return NULL;
}
// NOTE: This function has RACE CONDITION. It access abandoned segments WITHOUT clearing the abandoned bit.
// This can result in touching a segment object that has been freed and cause a crash.
// This function is strictly for experimental purpose to be able to calculate free space in segments quickly
// without performing numerous interlock operations while traversing through ALL abandoned segments.
// It should be deleted after the experiment is done.
size_t _mi_arena_segment_abandoned_free_space_stats_next(mi_arena_field_cursor_t* previous)
{
const int max_arena = (int)mi_atomic_load_relaxed(&mi_arena_count);
if (max_arena <= 0 || mi_atomic_load_relaxed(&abandoned_count) == 0) return MI_FREE_SPACE_MASK_ALL;
int count = previous->count;
size_t field_idx = mi_bitmap_index_field(previous->bitmap_idx);
size_t bit_idx = mi_bitmap_index_bit_in_field(previous->bitmap_idx) + 1;
// visit arena's (from previous)
for (; count < max_arena; count++, field_idx = 0, bit_idx = 0) {
mi_arena_id_t arena_idx = previous->start + count;
if (arena_idx >= max_arena) { arena_idx = arena_idx % max_arena; } // wrap around
mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_idx]);
if (arena != NULL) {
// visit the abandoned fields (starting at previous_idx)
for ( ; field_idx < arena->field_count; field_idx++, bit_idx = 0) {
size_t field = mi_atomic_load_relaxed(&arena->blocks_abandoned[field_idx]);
if mi_unlikely(field != 0) { // skip zero fields quickly
// visit each set bit in the field (todo: maybe use `ctz` here?)
for ( ; bit_idx < MI_BITMAP_FIELD_BITS; bit_idx++) {
// pre-check if the bit is set
size_t mask = ((size_t)1 << bit_idx);
if mi_unlikely((field & mask) == mask) {
mi_bitmap_index_t bitmap_idx = mi_bitmap_index_create(field_idx, bit_idx);
// *** THIS CAN CAUSE A CRASH: the segment can be freed while we access it's fields.
mi_segment_t* segment = (mi_segment_t*)mi_arena_block_start(arena, bitmap_idx);
size_t free_space_mask = mi_atomic_load_relaxed(&segment->free_space_mask) & MI_FREE_SPACE_MASK_ANY;
previous->bitmap_idx = bitmap_idx;
previous->count = count;
return free_space_mask;
}
}
}
}
}
}
// no more found
previous->bitmap_idx = 0;
previous->count = 0;
return MI_FREE_SPACE_MASK_ALL;
}
/* -----------------------------------------------------------
Add an arena.

View file

@ -34,7 +34,10 @@ static inline void mi_free_block_local(mi_page_t* page, mi_block_t* block, bool
// checks
if mi_unlikely(mi_check_is_double_free(page, block)) return;
mi_check_padding(page, block);
if (track_stats) { mi_stat_free(page, block); }
if (track_stats) {
mi_stat_free(page, block);
mi_allocation_stats_decrement(page->block_size);
}
#if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN
if (!mi_page_is_huge(page)) { // huge page content may be already decommitted
memset(block, MI_DEBUG_FREED, mi_page_block_size(page));
@ -261,6 +264,7 @@ static void mi_decl_noinline mi_free_block_mt(mi_page_t* page, mi_segment_t* seg
// adjust stats (after padding check and potentially recursive `mi_free` above)
mi_stat_free(page, block); // stat_free may access the padding
mi_track_free_size(block, mi_page_usable_size_of(page,block));
mi_allocation_stats_decrement(page->block_size);
// for small size, ensure we can fit the delayed thread pointers without triggering overflow detection
_mi_padding_shrink(page, block, sizeof(mi_block_t));

View file

@ -665,7 +665,6 @@ static mi_segment_t* mi_heap_get_segment_to_drop(mi_heap_t* heap) {
}
const mi_slice_t* mi_segment_slices_end(const mi_segment_t* segment);
mi_page_queue_t* mi_heap_page_queue_of(mi_heap_t* heap, const mi_page_t* page);
// Visit all pages in a segment
static mi_decl_noinline void mi_segment_visit_pages(mi_heap_t* heap, mi_segment_t* segment, heap_page_visitor_fun* fn, void* arg1)
@ -744,7 +743,7 @@ void mi_heap_drop_segment_if_required(mi_heap_t* heap, size_t alloc_block_size)
{
size_t targetSegmentCount = mi_option_get_size(mi_option_max_segments_per_heap);
if ((targetSegmentCount > 0) &&
(alloc_block_size <= MI_MEDIUM_OBJ_SIZE_MAX) &&
(alloc_block_size <= MI_LARGE_OBJ_SIZE_MAX) &&
(heap->tld->segments.count >= targetSegmentCount)) {
mi_heap_drop_segment(heap, targetSegmentCount);

View file

@ -413,6 +413,9 @@ size_t _mi_current_thread_count(void) {
return mi_atomic_load_relaxed(&thread_count);
}
size_t _mi_get_next_thread_partition_id();
extern mi_decl_thread size_t _mi_current_thread_partitionId;
// This is called from the `mi_malloc_generic`
void mi_thread_init(void) mi_attr_noexcept
{
@ -424,6 +427,7 @@ void mi_thread_init(void) mi_attr_noexcept
// fiber/pthread key to a non-zero value, ensuring `_mi_thread_done` is called)
if (_mi_thread_heap_init()) return; // returns true if already initialized
_mi_current_thread_partitionId = _mi_get_next_thread_partition_id();
_mi_stat_increase(&_mi_stats_main.threads, 1);
mi_atomic_increment_relaxed(&thread_count);
//_mi_verbose_message("thread init: 0x%zx\n", _mi_thread_id());

View file

@ -94,7 +94,7 @@ static mi_option_desc_t options[_mi_option_last] =
{ 0, UNINIT, MI_OPTION(disallow_arena_alloc) }, // 1 = do not use arena's for allocation (except if using specific arena id's)
{ 400, UNINIT, MI_OPTION(retry_on_oom) }, // windows only: retry on out-of-memory for N milli seconds (=400), set to 0 to disable retries.
{ 8, UNINIT, MI_OPTION(max_segments_per_heap) }, // max number of segments that heap can own.
{ 2000, UNINIT, MI_OPTION(heap_collect_abandoned_interval) }, // max number of segments that heap can own.
{ 2000, UNINIT, MI_OPTION(heap_collect_abandoned_interval) }, // delay (ms) in between collecting abandoned segments when a heap drops exessive segments.
};
static void mi_option_init(mi_option_desc_t* desc);

View file

@ -981,6 +981,7 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t
// return it to the OS
mi_segment_os_free(segment, tld);
mi_segment_increment_freed_stats();
}
@ -1367,7 +1368,8 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice
// the segment due to concurrent frees (in which case `NULL` is returned).
mi_segment_t* segmentToReturn = mi_segment_reclaim(segment, heap, block_size, reclaimed, tld);
if (segmentToReturn != NULL) {
return segmentToReturn;
mi_segment_increment_reclaimed_stats();
return segmentToReturn;
}
}
else if (segment->abandoned_visits > 3 && is_suitable && mi_option_get_size(mi_option_max_segments_per_heap) == 0) {
@ -1380,6 +1382,8 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice
_mi_arena_segment_mark_abandoned(segment);
}
}
mi_segment_increment_reclaim_failed_stats();
return NULL;
}
@ -1431,7 +1435,12 @@ static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t needed_
return segment;
}
// 2. otherwise allocate a fresh segment
return mi_segment_alloc(0, 0, heap->arena_id, tld, os_tld, NULL);
segment = mi_segment_alloc(0, 0, heap->arena_id, tld, os_tld, NULL);
if (segment != NULL) {
mi_segment_increment_alloc_stats(block_size);
}
return segment;
}
@ -1482,6 +1491,7 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, size_t page_alignment,
segment->thread_id = 0; // huge segments are immediately abandoned
#endif
mi_segment_increment_alloc_stats(size);
// for huge pages we initialize the block_size as we may
// overallocate to accommodate large alignments.
size_t psize;

View file

@ -15,6 +15,192 @@ terms of the MIT license. A copy of the license can be found in the file
#pragma warning(disable:4204) // non-constant aggregate initializer
#endif
// --------------------------------------------------------
// Segment statistics
// --------------------------------------------------------
mi_segment_stats_t _mi_global_segment_stats;
void mi_init_segment_stats()
{
_mi_global_segment_stats.reclaimed_count = 0;
_mi_global_segment_stats.reclaim_failed_count = 0;
_mi_global_segment_stats.allocated_count = 0;
_mi_global_segment_stats.freed_count = 0;
static_assert((MI_BIN_HUGE + 1) == sizeof(_mi_global_segment_stats.alloc_stats) / sizeof(_mi_global_segment_stats.alloc_stats[0]));
for (int i = 0; i <= MI_BIN_HUGE; i++)
{
size_t block_size = _mi_bin_size((uint8_t)i);
_mi_global_segment_stats.alloc_stats[i].counter = 0;
_mi_global_segment_stats.alloc_stats[i].block_size = block_size;
}
// (MI_FREE_SPACE_MASK_BIT_COUNT-1) combines multiple block sizes. Set it INT32_MAX to distinguish from the rest.
_mi_global_segment_stats.alloc_stats[MI_FREE_SPACE_MASK_BIT_COUNT - 1].block_size = INT32_MAX;
}
void mi_segment_increment_alloc_stats(size_t block_size)
{
uint8_t page_queue_index = _mi_bin(block_size);
mi_atomic_increment_relaxed(&_mi_global_segment_stats.alloc_stats[page_queue_index].counter);
mi_atomic_increment_relaxed(&_mi_global_segment_stats.allocated_count);
}
void mi_segment_increment_freed_stats()
{
mi_atomic_increment_relaxed(&_mi_global_segment_stats.freed_count);
}
void mi_segment_increment_reclaimed_stats()
{
mi_atomic_increment_relaxed(&_mi_global_segment_stats.reclaimed_count);
}
void mi_segment_increment_reclaim_failed_stats()
{
mi_atomic_increment_relaxed(&_mi_global_segment_stats.reclaim_failed_count);
}
// --------------------------------------------------------
// Partitioned counter to avoid contention in interlocked operations
// --------------------------------------------------------
_Atomic(size_t) _mi_next_counter_partition_id;
#define NUMBER_OF_PARTITIONS 32
size_t _mi_get_next_thread_partition_id()
{
return mi_atomic_increment_relaxed(&_mi_next_counter_partition_id) % NUMBER_OF_PARTITIONS;
}
mi_decl_thread size_t _mi_current_thread_partitionId = 0;
// Implements a counter that has its value partitioned in a set of bucket (in separate cache lines)
// to reduce contention when the value of the counter is updated.
typedef struct mi_decl_cache_align mi_partitioned_counter_value_s
{
_Atomic(int64_t) counter_value;
} mi_partitioned_counter_value_t;
typedef struct mi_partitioned_counter_s
{
mi_partitioned_counter_value_t counter_partitions[NUMBER_OF_PARTITIONS];
} mi_partitioned_counter_t;
void mi_partitioned_counter_increment(mi_partitioned_counter_t* counter, size_t value)
{
mi_atomic_add_relaxed(&counter->counter_partitions[_mi_current_thread_partitionId].counter_value, value);
}
void mi_partitioned_counter_decrement(mi_partitioned_counter_t* counter, size_t value)
{
mi_atomic_sub_relaxed(&counter->counter_partitions[_mi_current_thread_partitionId].counter_value, value);
}
int64_t mi_partitioned_counter_get_value(mi_partitioned_counter_t* counter)
{
size_t total = 0;
for (int i = 0; i < NUMBER_OF_PARTITIONS; i++)
{
total += mi_atomic_load_relaxed(&counter->counter_partitions[i].counter_value);
}
int64_t retVal = ((int64_t)total);
if (retVal < 0)
{
retVal = 0;
}
return retVal;
}
mi_partitioned_counter_t _mi_allocated_memory[MI_BIN_HUGE+1];
void mi_allocation_stats_increment(size_t block_size)
{
uint8_t binIndex = _mi_bin(block_size);
mi_partitioned_counter_increment(&_mi_allocated_memory[binIndex], block_size);
}
void mi_allocation_stats_decrement(size_t block_size)
{
uint8_t binIndex = _mi_bin(block_size);
mi_partitioned_counter_decrement(&_mi_allocated_memory[binIndex], block_size);
}
size_t _mi_arena_segment_abandoned_free_space_stats_next(mi_arena_field_cursor_t* previous);
void mi_segment_update_free_space_stats(mi_allocation_counter_t* free_space_in_segments)
{
mi_arena_field_cursor_t current;
size_t free_space_mask = 0;
_mi_arena_field_cursor_init(NULL, &current);
while ((free_space_mask = _mi_arena_segment_abandoned_free_space_stats_next(&current)) != MI_FREE_SPACE_MASK_ALL) {
int bit_index = 0;
while (free_space_mask != 0) {
if ((free_space_mask & 1) != 0) {
free_space_in_segments[bit_index].counter++;
}
free_space_mask = free_space_mask >> 1;
bit_index++;
}
}
}
void mi_update_allocated_memory_stats(mi_allocation_counter_t* allocated_memory, int allocated_memory_count)
{
for (int i = 0; i < allocated_memory_count; i++) {
allocated_memory[i].counter = mi_partitioned_counter_get_value(&_mi_allocated_memory[i]);
}
}
bool mi_get_segment_stats(size_t* abandoned, size_t* reclaimed, size_t* reclaim_failed, size_t* allocated, size_t* freed,
mi_allocation_counter_t* allocated_segments, int allocated_segments_count,
mi_allocation_counter_t* free_space_in_segments, int free_space_in_segments_count,
mi_allocation_counter_t* allocated_memory, int allocated_memory_count)
{
int stat_count = sizeof(_mi_global_segment_stats.alloc_stats) / sizeof(_mi_global_segment_stats.alloc_stats[0]);
if ((allocated_segments == NULL) || (allocated_segments_count != stat_count)) {
return false;
}
if ((free_space_in_segments == NULL) || (free_space_in_segments_count != stat_count)) {
return false;
}
if ((allocated_memory == NULL) || (allocated_memory_count != stat_count)) {
return false;
}
*abandoned = _mi_arena_segment_abandoned_count();
*reclaimed = mi_atomic_load_relaxed(&_mi_global_segment_stats.reclaimed_count);
*reclaim_failed = mi_atomic_load_relaxed(&_mi_global_segment_stats.reclaim_failed_count);
*allocated = mi_atomic_load_relaxed(&_mi_global_segment_stats.allocated_count);
*freed = mi_atomic_load_relaxed(&_mi_global_segment_stats.freed_count);
for (int i = 0; i < stat_count; i++) {
allocated_segments[i].counter = mi_atomic_load_relaxed(&_mi_global_segment_stats.alloc_stats[i].counter);
allocated_segments[i].block_size = _mi_global_segment_stats.alloc_stats[i].block_size;
free_space_in_segments[i].counter = 0;
free_space_in_segments[i].block_size = allocated_segments[i].block_size;
allocated_memory[i].counter = 0;
allocated_memory[i].block_size = allocated_segments[i].block_size;
}
mi_segment_update_free_space_stats(free_space_in_segments);
mi_update_allocated_memory_stats(allocated_memory, allocated_memory_count);
return true;
}
/* -----------------------------------------------------------
Statistics operations
----------------------------------------------------------- */
@ -388,6 +574,8 @@ void mi_stats_reset(void) mi_attr_noexcept {
if (stats != &_mi_stats_main) { memset(stats, 0, sizeof(mi_stats_t)); }
memset(&_mi_stats_main, 0, sizeof(mi_stats_t));
if (mi_process_start == 0) { mi_process_start = _mi_clock_start(); };
mi_init_segment_stats();
}
void mi_stats_merge(void) mi_attr_noexcept {