initial delay slots

This commit is contained in:
daan 2019-11-04 11:48:41 -08:00
parent 3d0a1e249f
commit 829fd872f4
9 changed files with 171 additions and 50 deletions

View file

@ -61,15 +61,15 @@ int _mi_os_numa_node_count(void);
// memory.c
void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* id, mi_os_tld_t* tld);
void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats);
void _mi_mem_free(void* p, size_t size, size_t id, mi_os_tld_t* tld);
bool _mi_mem_reset(void* p, size_t size, mi_stats_t* stats);
bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld);
bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld);
bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld);
bool _mi_mem_protect(void* addr, size_t size);
bool _mi_mem_unprotect(void* addr, size_t size);
void _mi_mem_collect(mi_stats_t* stats);
void _mi_mem_collect(mi_os_tld_t* tld);
// "segment.c"
mi_page_t* _mi_segment_page_alloc(size_t block_wsize, mi_segments_tld_t* tld, mi_os_tld_t* os_tld);
@ -107,7 +107,6 @@ uintptr_t _mi_heap_random(mi_heap_t* heap);
// "stats.c"
void _mi_stats_done(mi_stats_t* stats);
typedef int64_t mi_msecs_t;
mi_msecs_t _mi_clock_now(void);
mi_msecs_t _mi_clock_end(mi_msecs_t start);
mi_msecs_t _mi_clock_start(void);

View file

@ -385,6 +385,19 @@ void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount);
#define mi_heap_stat_decrease(heap,stat,amount) mi_stat_decrease( (heap)->tld->stats.stat, amount)
// ------------------------------------------------------
// Delay slots (to avoid expensive OS calls)
// ------------------------------------------------------
typedef int64_t mi_msecs_t;
typedef struct mi_delay_slot_s {
mi_msecs_t expire;
uint8_t* addr;
size_t size;
} mi_delay_slot_t;
#define MI_RESET_DELAY_SLOTS (128)
// ------------------------------------------------------
// Thread Local data
// ------------------------------------------------------
@ -395,6 +408,12 @@ typedef struct mi_segment_queue_s {
mi_segment_t* last;
} mi_segment_queue_t;
// OS thread local data
typedef struct mi_os_tld_s {
size_t region_idx; // start point for next allocation
mi_stats_t* stats; // points to tld stats
mi_delay_slot_t reset_delay[MI_RESET_DELAY_SLOTS];
} mi_os_tld_t;
// Segments thread local data
typedef struct mi_segments_tld_s {
@ -408,14 +427,9 @@ typedef struct mi_segments_tld_s {
size_t cache_size; // total size of all segments in the cache
mi_segment_t* cache; // (small) cache of segments
mi_stats_t* stats; // points to tld stats
mi_os_tld_t* os; // points to os stats
} mi_segments_tld_t;
// OS thread local data
typedef struct mi_os_tld_s {
size_t region_idx; // start point for next allocation
mi_stats_t* stats; // points to tld stats
} mi_os_tld_t;
// Thread local data
struct mi_tld_s {
unsigned long long heartbeat; // monotonic heartbeat count

View file

@ -275,6 +275,7 @@ typedef enum mi_option_e {
mi_option_reset_decommits,
mi_option_eager_commit_delay,
mi_option_segment_reset,
mi_option_reset_delay,
mi_option_os_tag,
mi_option_max_numa_node,
mi_option_max_errors,

View file

@ -149,7 +149,7 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
// collect regions
if (collect >= FORCE && _mi_is_main_thread()) {
_mi_mem_collect(&heap->tld->stats);
_mi_mem_collect(&heap->tld->os);
}
}

View file

@ -94,11 +94,12 @@ mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty;
#define tld_main_stats ((mi_stats_t*)((uint8_t*)&tld_main + offsetof(mi_tld_t,stats)))
#define tld_main_os ((mi_os_tld_t*)((uint8_t*)&tld_main + offsetof(mi_tld_t,os)))
static mi_tld_t tld_main = {
0, false,
&_mi_heap_main,
{ { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats }, // segments
{ { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats, tld_main_os }, // segments
{ 0, tld_main_stats }, // os
{ MI_STATS_NULL } // stats
};
@ -218,6 +219,7 @@ static bool _mi_heap_init(void) {
memset(tld, 0, sizeof(*tld));
tld->heap_backing = heap;
tld->segments.stats = &tld->stats;
tld->segments.os = &tld->os;
tld->os.stats = &tld->stats;
_mi_heap_default = heap;
}

View file

@ -53,6 +53,9 @@ void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats);
void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
// local
static bool mi_delay_remove(mi_delay_slot_t* slots, size_t count, void* p, size_t size);
// Constants
#if (MI_INTPTR_SIZE==8)
@ -470,16 +473,19 @@ Free
-----------------------------------------------------------------------------*/
// Free previously allocated memory with a given id.
void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) {
mi_assert_internal(size > 0 && stats != NULL);
void _mi_mem_free(void* p, size_t size, size_t id, mi_os_tld_t* tld) {
mi_assert_internal(size > 0 && tld != NULL);
if (p==NULL) return;
if (size==0) return;
mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, p, size);
size_t arena_memid = 0;
size_t idx = 0;
size_t bitidx = 0;
if (mi_memid_indices(id,&idx,&bitidx,&arena_memid)) {
// was a direct arena allocation, pass through
_mi_arena_free(p, size, arena_memid, stats);
_mi_arena_free(p, size, arena_memid, tld->stats);
}
else {
// allocated in a region
@ -512,14 +518,14 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) {
(mi_option_is_enabled(mi_option_eager_commit) || // cannot reset halfway committed segments, use `option_page_reset` instead
mi_option_is_enabled(mi_option_reset_decommits))) // but we can decommit halfway committed segments
{
_mi_os_reset(p, size, stats);
_mi_os_reset(p, size, tld->stats); // cannot use delay reset! (due to concurrent allocation in the same region)
//_mi_os_decommit(p, size, stats); // todo: and clear dirty bits?
}
}
}
if (!is_eager_committed) {
// adjust commit statistics as we commit again when re-using the same slot
_mi_stat_decrease(&stats->committed, mi_good_commit_size(size));
_mi_stat_decrease(&tld->stats->committed, mi_good_commit_size(size));
}
// TODO: should we free empty regions? currently only done _mi_mem_collect.
@ -539,7 +545,7 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) {
/* ----------------------------------------------------------------------------
collection
-----------------------------------------------------------------------------*/
void _mi_mem_collect(mi_stats_t* stats) {
void _mi_mem_collect(mi_os_tld_t* tld) {
// free every region that has no segments in use.
for (size_t i = 0; i < regions_count; i++) {
mem_region_t* region = &regions[i];
@ -554,7 +560,8 @@ void _mi_mem_collect(mi_stats_t* stats) {
bool is_eager_committed;
void* start = mi_region_info_read(mi_atomic_read(&region->info), NULL, &is_eager_committed);
if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) {
_mi_arena_free(start, MI_REGION_SIZE, region->arena_memid, stats);
mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, start, MI_REGION_SIZE);
_mi_arena_free(start, MI_REGION_SIZE, region->arena_memid, tld->stats);
}
// and release
mi_atomic_write(&region->info,0);
@ -564,25 +571,123 @@ void _mi_mem_collect(mi_stats_t* stats) {
}
}
/* ----------------------------------------------------------------------------
Delay slots
-----------------------------------------------------------------------------*/
typedef void (mi_delay_resolve_fun)(void* addr, size_t size, void* arg);
static void mi_delay_insert(mi_delay_slot_t* slots, size_t count,
mi_msecs_t delay, uint8_t* addr, size_t size,
mi_delay_resolve_fun* resolve, void* arg)
{
if (delay==0) {
resolve(addr, size, arg);
return;
}
mi_msecs_t now = _mi_clock_now();
mi_delay_slot_t* oldest = slots;
// walk through all slots, resolving expired ones.
// remember the oldest slot to insert the new entry in.
for (size_t i = 0; i < count; i++) {
mi_delay_slot_t* slot = &slots[i];
if (slot->expire == 0) {
// empty slot
oldest = slot;
}
// TODO: should we handle overlapping areas too?
else if (slot->addr <= addr && slot->addr + slot->size >= addr + size) {
// earlier slot encompasses new area, increase expiration
slot->expire = now + delay;
delay = 0;
}
else if (addr <= slot->addr && addr + size >= slot->addr + slot->size) {
// new one encompasses old slot, overwrite
slot->expire = now + delay;
slot->addr = addr;
slot->size = size;
delay = 0;
}
else if (slot->expire < now) {
// expired slot, resolve now
slot->expire = 0;
resolve(slot->addr, slot->size, arg);
}
else if (oldest->expire > slot->expire) {
oldest = slot;
}
}
if (delay>0) {
// not yet registered, use the oldest slot
if (oldest->expire > 0) {
resolve(oldest->addr, oldest->size, arg); // evict if not empty
}
oldest->expire = now + delay;
oldest->addr = addr;
oldest->size = size;
}
}
static bool mi_delay_remove(mi_delay_slot_t* slots, size_t count, void* p, size_t size)
{
uint8_t* addr = (uint8_t*)p;
bool done = false;
// walk through all slots
for (size_t i = 0; i < count; i++) {
mi_delay_slot_t* slot = &slots[i];
if (slot->addr <= addr && slot->addr + slot->size >= addr + size) {
// earlier slot encompasses the area; remove it
slot->expire = 0;
done = true;
}
else if (addr <= slot->addr && addr + size >= slot->addr + slot->size) {
// new one encompasses old slot, remove it
slot->expire = 0;
}
else if ((addr <= slot->addr && addr + size > slot->addr) ||
(addr < slot->addr + slot->size && addr + size >= slot->addr + slot->size)) {
// partial overlap, remove slot
mi_assert_internal(false);
slot->expire = 0;
}
}
return done;
}
static void mi_resolve_reset(void* p, size_t size, void* vtld) {
mi_os_tld_t* tld = (mi_os_tld_t*)vtld;
_mi_os_reset(p, size, tld->stats);
}
bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) {
mi_delay_insert(tld->reset_delay, MI_RESET_DELAY_SLOTS, mi_option_get(mi_option_reset_delay),
(uint8_t*)p, size, &mi_resolve_reset, tld);
return true;
}
bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) {
if (!mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, (uint8_t*)p, size)) {
return _mi_os_unreset(p, size, is_zero, tld->stats);
}
return true;
}
/* ----------------------------------------------------------------------------
Other
-----------------------------------------------------------------------------*/
bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats) {
return _mi_os_commit(p, size, is_zero, stats);
bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) {
mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, p, size);
return _mi_os_commit(p, size, is_zero, tld->stats);
}
bool _mi_mem_decommit(void* p, size_t size, mi_stats_t* stats) {
return _mi_os_decommit(p, size, stats);
}
bool _mi_mem_reset(void* p, size_t size, mi_stats_t* stats) {
return _mi_os_reset(p, size, stats);
}
bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats) {
return _mi_os_unreset(p, size, is_zero, stats);
bool _mi_mem_decommit(void* p, size_t size, mi_os_tld_t* tld) {
mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, p, size);
return _mi_os_decommit(p, size, tld->stats);
}
bool _mi_mem_protect(void* p, size_t size) {

View file

@ -70,6 +70,7 @@ static mi_option_desc_t options[_mi_option_last] =
{ 0, UNINIT, MI_OPTION(reset_decommits) }, // note: cannot enable this if secure is on
{ 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed
{ 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit)
{ 500, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds
{ 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose
{ 256, UNINIT, MI_OPTION(max_numa_node) }, // maximum allowed numa node
{ 16, UNINIT, MI_OPTION(max_errors) } // maximum errors that are output

View file

@ -234,7 +234,7 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se
mi_assert_internal(!segment->mem_is_fixed);
_mi_mem_unprotect(segment, segment->segment_size); // ensure no more guard pages are set
}
_mi_mem_free(segment, segment_size, segment->memid, tld->stats);
_mi_mem_free(segment, segment_size, segment->memid, tld->os);
}
@ -281,7 +281,7 @@ static bool mi_segment_cache_push(mi_segment_t* segment, mi_segments_tld_t* tld)
}
mi_assert_internal(segment->segment_size == MI_SEGMENT_SIZE);
if (!segment->mem_is_fixed && mi_option_is_enabled(mi_option_cache_reset)) {
_mi_mem_reset((uint8_t*)segment + segment->segment_info_size, segment->segment_size - segment->segment_info_size, tld->stats);
_mi_mem_reset((uint8_t*)segment + segment->segment_info_size, segment->segment_size - segment->segment_info_size, tld->os);
}
segment->next = tld->cache;
tld->cache = segment;
@ -346,13 +346,13 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
}
if (!segment->mem_is_committed && page_kind > MI_PAGE_MEDIUM) {
mi_assert_internal(!segment->mem_is_fixed);
_mi_mem_commit(segment, segment->segment_size, &is_zero, tld->stats);
_mi_mem_commit(segment, segment->segment_size, &is_zero, tld->os);
segment->mem_is_committed = true;
}
if (!segment->mem_is_fixed &&
(mi_option_is_enabled(mi_option_cache_reset) || mi_option_is_enabled(mi_option_page_reset))) {
bool reset_zero = false;
_mi_mem_unreset(segment, segment->segment_size, &reset_zero, tld->stats);
_mi_mem_unreset(segment, segment->segment_size, &reset_zero, tld->os);
if (reset_zero) is_zero = true;
}
}
@ -365,7 +365,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
if (!commit) {
// ensure the initial info is committed
bool commit_zero = false;
_mi_mem_commit(segment, info_size, &commit_zero, tld->stats);
_mi_mem_commit(segment, info_size, &commit_zero, tld->os);
if (commit_zero) is_zero = true;
}
segment->memid = memid;
@ -459,7 +459,7 @@ static bool mi_segment_has_free(const mi_segment_t* segment) {
return (segment->used < segment->capacity);
}
static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_stats_t* stats) {
static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_segments_tld_t* tld) {
mi_assert_internal(mi_segment_has_free(segment));
mi_assert_expensive(mi_segment_is_valid(segment));
for (size_t i = 0; i < segment->capacity; i++) {
@ -472,14 +472,14 @@ static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_stats_t* stats)
mi_assert_internal(!segment->mem_is_fixed);
page->is_committed = true;
bool is_zero = false;
_mi_mem_commit(start,psize,&is_zero,stats);
_mi_mem_commit(start,psize,&is_zero,tld->os);
if (is_zero) page->is_zero_init = true;
}
if (page->is_reset) {
mi_assert_internal(!segment->mem_is_fixed);
page->is_reset = false;
bool is_zero = false;
_mi_mem_unreset(start, psize, &is_zero, stats);
_mi_mem_unreset(start, psize, &is_zero, tld->os);
if (is_zero) page->is_zero_init = true;
}
}
@ -497,21 +497,20 @@ static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_stats_t* stats)
static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld);
static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_stats_t* stats) {
UNUSED(stats);
static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) {
mi_assert_internal(page->segment_in_use);
mi_assert_internal(mi_page_all_free(page));
mi_assert_internal(page->is_committed);
size_t inuse = page->capacity * page->block_size;
_mi_stat_decrease(&stats->page_committed, inuse);
_mi_stat_decrease(&stats->pages, 1);
_mi_stat_decrease(&tld->stats->page_committed, inuse);
_mi_stat_decrease(&tld->stats->pages, 1);
// reset the page memory to reduce memory pressure?
if (!segment->mem_is_fixed && !page->is_reset && mi_option_is_enabled(mi_option_page_reset)) {
size_t psize;
uint8_t* start = _mi_page_start(segment, page, &psize);
page->is_reset = true;
_mi_mem_reset(start, psize, stats);
_mi_mem_reset(start, psize, tld->os);
}
// zero the page data, but not the segment fields
@ -529,7 +528,7 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld)
mi_assert_expensive(mi_segment_is_valid(segment));
// mark it as free now
mi_segment_page_clear(segment, page, tld->stats);
mi_segment_page_clear(segment, page, tld);
if (segment->used == 0) {
// no more used pages; remove from the free list and free the segment
@ -634,7 +633,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen
_mi_stat_decrease(&tld->stats->pages_abandoned, 1);
if (mi_page_all_free(page)) {
// if everything free by now, free the page
mi_segment_page_clear(segment,page,tld->stats);
mi_segment_page_clear(segment,page,tld);
}
else {
// otherwise reclaim it
@ -666,7 +665,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen
// Requires that the page has free pages
static mi_page_t* mi_segment_page_alloc_in(mi_segment_t* segment, mi_segments_tld_t* tld) {
mi_assert_internal(mi_segment_has_free(segment));
mi_page_t* page = mi_segment_find_free(segment, tld->stats);
mi_page_t* page = mi_segment_find_free(segment, tld);
page->segment_in_use = true;
segment->used++;
mi_assert_internal(segment->used <= segment->capacity);

View file

@ -206,7 +206,7 @@ static void mi_stat_counter_print_avg(const mi_stat_counter_t* stat, const char*
const int64_t avg_tens = (stat->count == 0 ? 0 : (stat->total*10 / stat->count));
const long avg_whole = (long)(avg_tens/10);
const long avg_frac1 = (long)(avg_tens%10);
_mi_fprintf(out, "%10s: %5ld.%ld avg %ld %ld\n", msg, avg_whole, avg_frac1);
_mi_fprintf(out, "%10s: %5ld.%ld avg\n", msg, avg_whole, avg_frac1);
}