fix page info size and order; atomic page flags

This commit is contained in:
daanx 2024-12-06 22:37:59 -08:00
parent 5a5943ad33
commit 659a9dd51d
10 changed files with 87 additions and 89 deletions

View file

@ -360,7 +360,7 @@ if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU|Intel" AND NOT CMAKE_SYSTEM
list(APPEND mi_cflags_dynamic -ftls-model=initial-exec)
message(STATUS "Use local dynamic TLS for the static build (since MI_LIBC_MUSL=ON)")
else()
list(APPEND mi_cflags -ftls-model=initial-exec -march=haswell -mavx2)
list(APPEND mi_cflags -ftls-model=initial-exec -march=haswell -mavx2 -O2)
endif()
endif()
if(MI_OVERRIDE)

View file

@ -80,10 +80,12 @@ terms of the MIT license. A copy of the license can be found in the file
#define mi_atomic_cas_strong_acq_rel(p,exp,des) mi_atomic_cas_strong(p,exp,des,mi_memory_order(acq_rel),mi_memory_order(acquire))
#define mi_atomic_add_relaxed(p,x) mi_atomic(fetch_add_explicit)(p,x,mi_memory_order(relaxed))
#define mi_atomic_sub_relaxed(p,x) mi_atomic(fetch_sub_explicit)(p,x,mi_memory_order(relaxed))
#define mi_atomic_add_acq_rel(p,x) mi_atomic(fetch_add_explicit)(p,x,mi_memory_order(acq_rel))
#define mi_atomic_sub_relaxed(p,x) mi_atomic(fetch_sub_explicit)(p,x,mi_memory_order(relaxed))
#define mi_atomic_sub_acq_rel(p,x) mi_atomic(fetch_sub_explicit)(p,x,mi_memory_order(acq_rel))
#define mi_atomic_and_relaxed(p,x) mi_atomic(fetch_and_explicit)(p,x,mi_memory_order(relaxed))
#define mi_atomic_and_acq_rel(p,x) mi_atomic(fetch_and_explicit)(p,x,mi_memory_order(acq_rel))
#define mi_atomic_or_relaxed(p,x) mi_atomic(fetch_or_explicit)(p,x,mi_memory_order(relaxed))
#define mi_atomic_or_acq_rel(p,x) mi_atomic(fetch_or_explicit)(p,x,mi_memory_order(acq_rel))
#define mi_atomic_increment_relaxed(p) mi_atomic_add_relaxed(p,(uintptr_t)1)

View file

@ -667,7 +667,8 @@ static inline void mi_page_clear_abandoned_mapped(mi_page_t* page) {
static inline bool mi_page_is_huge(const mi_page_t* page) {
return (page->block_size > MI_LARGE_MAX_OBJ_SIZE || (mi_memkind_is_os(page->memid.memkind) && page->memid.mem.os.alignment > MI_PAGE_MAX_OVERALLOC_ALIGN));
return (page->block_size > MI_LARGE_MAX_OBJ_SIZE ||
(mi_memkind_is_os(page->memid.memkind) && page->memid.mem.os.base < (void*)page));
}
@ -727,20 +728,33 @@ static inline bool _mi_page_unown(mi_page_t* page) {
//-----------------------------------------------------------
// Page flags
//-----------------------------------------------------------
static inline mi_page_flags_t mi_page_flags(const mi_page_t* page) {
return mi_atomic_load_acquire(&page->xflags);
}
static inline void mi_page_flags_set(mi_page_t* page, bool set, mi_page_flags_t newflag) {
if (set) {
mi_atomic_or_acq_rel(&page->xflags, newflag);
}
else {
mi_atomic_and_acq_rel(&page->xflags, ~newflag);
}
}
static inline bool mi_page_is_in_full(const mi_page_t* page) {
return page->flags.x.in_full;
return ((mi_page_flags(page) & MI_PAGE_IN_FULL_QUEUE) != 0);
}
static inline void mi_page_set_in_full(mi_page_t* page, bool in_full) {
page->flags.x.in_full = in_full;
mi_page_flags_set(page, in_full, MI_PAGE_IN_FULL_QUEUE);
}
static inline bool mi_page_has_aligned(const mi_page_t* page) {
return page->flags.x.has_aligned;
return ((mi_page_flags(page) & MI_PAGE_HAS_ALIGNED) != 0);
}
static inline void mi_page_set_has_aligned(mi_page_t* page, bool has_aligned) {
page->flags.x.has_aligned = has_aligned;
mi_page_flags_set(page, has_aligned, MI_PAGE_HAS_ALIGNED);
}
/* -------------------------------------------------------------------

View file

@ -167,8 +167,8 @@ static inline bool mi_memkind_is_os(mi_memkind_t memkind) {
typedef struct mi_memid_os_info {
void* base; // actual base address of the block (used for offset aligned allocations)
size_t alignment; // alignment at allocation
size_t size; // allocated full size
// size_t alignment; // alignment at allocation
} mi_memid_os_info_t;
typedef struct mi_memid_arena_info {
@ -224,26 +224,11 @@ typedef enum mi_owned_e {
} mi_owned_t;
// The `in_full` and `has_aligned` page flags are put in a union to efficiently
// test if both are false (`full_aligned == 0`) in the `mi_free` routine.
#if !MI_TSAN
typedef union mi_page_flags_s {
uint8_t full_aligned;
struct {
uint8_t in_full : 1;
uint8_t has_aligned : 1;
} x;
} mi_page_flags_t;
#else
// under thread sanitizer, use a byte for each flag to suppress warning, issue #130
typedef union mi_page_flags_s {
uint32_t full_aligned;
struct {
uint8_t in_full;
uint8_t has_aligned;
} x;
} mi_page_flags_t;
#endif
// The `in_full` and `has_aligned` page flags are put in the same field
// to efficiently test if both are false (`full_aligned == 0`) in the `mi_free` routine.
#define MI_PAGE_IN_FULL_QUEUE MI_ZU(0x01)
#define MI_PAGE_HAS_ALIGNED MI_ZU(0x02)
typedef size_t mi_page_flags_t;
// Thread free list.
// We use the bottom bit of the pointer for `mi_owned_t` flags
@ -287,23 +272,21 @@ typedef struct mi_page_s {
uint16_t capacity; // number of blocks committed (must be the first field for proper zero-initialisation)
uint16_t reserved; // number of blocks reserved in memory
uint8_t block_size_shift; // if not zero, then `(1 << block_size_shift) == block_size` (only used for fast path in `free.c:_mi_page_ptr_unalign`)
uint8_t heap_tag; // tag of the owning heap, used to separate heaps by object type
mi_page_flags_t flags; // `in_full` and `has_aligned` flags (8 bits)
uint8_t free_is_zero:1; // `true` if the blocks in the free list are zero initialized
uint8_t retire_expire:7; // expiration count for retired blocks
// padding
uint8_t retire_expire; // expiration count for retired blocks
mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`)
_Atomic(mi_thread_free_t) xthread_free; // list of deferred free blocks freed by other threads
_Atomic(mi_page_flags_t) xflags; // `in_full` and `has_aligned` flags
size_t block_size; // size available in each block (always `>0`)
uint8_t* page_start; // start of the blocks
uint8_t heap_tag; // tag of the owning heap, used to separate heaps by object type
bool free_is_zero; // `true` if the blocks in the free list are zero initialized
// padding
#if (MI_ENCODE_FREELIST || MI_PADDING)
uintptr_t keys[2]; // two random keys to encode the free lists (see `_mi_block_next`) or padding canary
#endif
_Atomic(mi_thread_free_t) xthread_free; // list of deferred free blocks freed by other threads
mi_heap_t* heap; // heap this threads belong to.
struct mi_page_s* next; // next page owned by the heap with the same `block_size`
struct mi_page_s* prev; // previous page owned by the heap with the same `block_size`
@ -317,10 +300,10 @@ typedef struct mi_page_s {
// ------------------------------------------------------
#define MI_PAGE_ALIGN MI_ARENA_SLICE_ALIGN // pages must be aligned on this for the page map.
#define MI_PAGE_MIN_BLOCK_ALIGN (32) // minimal block alignment in a page
#define MI_PAGE_MIN_BLOCK_ALIGN (64) // minimal block alignment in a page
#define MI_PAGE_MAX_OVERALLOC_ALIGN MI_ARENA_SLICE_SIZE // (64 KiB) limit for which we overallocate in arena pages, beyond this use OS allocation
#if MI_DEBUG && MI_SIZE_SIZE == 8
#if (MI_ENCODE_FREELIST || MI_PADDING) && MI_SIZE_SIZE == 8
#define MI_PAGE_INFO_SIZE ((MI_INTPTR_SHIFT+2)*MI_PAGE_MIN_BLOCK_ALIGN) // >= sizeof(mi_page_t)
#else
#define MI_PAGE_INFO_SIZE ((MI_INTPTR_SHIFT+1)*MI_PAGE_MIN_BLOCK_ALIGN) // >= sizeof(mi_page_t)

View file

@ -596,7 +596,9 @@ static mi_page_t* mi_arena_page_alloc_fresh(size_t slice_count, size_t block_siz
}
}
#endif
mi_assert(MI_PAGE_INFO_SIZE >= _mi_align_up(sizeof(*page), MI_PAGE_MIN_BLOCK_ALIGN));
if (MI_PAGE_INFO_SIZE < _mi_align_up(sizeof(*page), MI_PAGE_MIN_BLOCK_ALIGN)) {
_mi_error_message(EFAULT, "fatal internal error: MI_PAGE_INFO_SIZE is too small\n");
};
const size_t block_start = (os_align ? MI_PAGE_ALIGN : MI_PAGE_INFO_SIZE);
const size_t reserved = (os_align ? 1 : (mi_size_of_slices(slice_count) - block_start) / block_size);
mi_assert_internal(reserved > 0 && reserved <= UINT16_MAX);
@ -1126,8 +1128,8 @@ static size_t mi_debug_show_bfield(mi_bfield_t field, char* buf) {
return bit_set_count;
}
static size_t mi_debug_show_bitmap(const char* prefix, const char* header, size_t slice_count, mi_bitmap_t* bitmap, bool invert) {
_mi_output_message("%s%s:\n", prefix, header);
static size_t mi_debug_show_bitmap(const char* header, size_t slice_count, mi_bitmap_t* bitmap, bool invert) {
_mi_output_message("%s:\n", header);
size_t bit_count = 0;
size_t bit_set_count = 0;
for (size_t i = 0; i < mi_bitmap_chunk_count(bitmap) && bit_count < slice_count; i++) {
@ -1135,19 +1137,13 @@ static size_t mi_debug_show_bitmap(const char* prefix, const char* header, size_
size_t k = 0;
mi_bchunk_t* chunk = &bitmap->chunks[i];
if (i<10) { buf[k++] = ' '; }
if (i<100) { itoa((int)i, buf+k, 10); k += (i < 10 ? 1 : 2); }
buf[k++] = ' ';
if (i<10) { buf[k++] = ('0' + (char)i); buf[k++] = ' '; buf[k++] = ' '; }
else if (i<100) { buf[k++] = ('0' + (char)(i/10)); buf[k++] = ('0' + (char)(i%10)); buf[k++] = ' '; }
else if (i<1000) { buf[k++] = ('0' + (char)(i/100)); buf[k++] = ('0' + (char)((i%100)/10)); buf[k++] = ('0' + (char)(i%10)); }
for (size_t j = 0; j < MI_BCHUNK_FIELDS; j++) {
if (j > 0 && (j % 4) == 0) {
buf[k++] = '\n';
_mi_memcpy(buf+k, prefix, strlen(prefix)); k += strlen(prefix);
buf[k++] = ' ';
buf[k++] = ' ';
buf[k++] = ' ';
buf[k++] = ' ';
buf[k++] = ' ';
buf[k++] = '\n'; _mi_memset(buf+k,' ',5); k += 5;
}
if (bit_count < slice_count) {
mi_bfield_t bfield = chunk->bfields[j];
@ -1164,9 +1160,9 @@ static size_t mi_debug_show_bitmap(const char* prefix, const char* header, size_
}
bit_count += MI_BFIELD_BITS;
}
_mi_output_message("%s %s\n", prefix, buf);
_mi_output_message(" %s\n", buf);
}
_mi_output_message("%s total ('x'): %zu\n", prefix, bit_set_count);
_mi_output_message(" total ('x'): %zu\n", bit_set_count);
return bit_set_count;
}
@ -1183,12 +1179,12 @@ void mi_debug_show_arenas(bool show_inuse, bool show_abandoned, bool show_purge)
slice_total += arena->slice_count;
_mi_output_message("arena %zu: %zu slices (%zu MiB)%s\n", i, arena->slice_count, mi_size_of_slices(arena->slice_count)/MI_MiB, (arena->memid.is_pinned ? ", pinned" : ""));
if (show_inuse) {
free_total += mi_debug_show_bitmap(" ", "in-use slices", arena->slice_count, arena->slices_free, true);
free_total += mi_debug_show_bitmap("in-use slices", arena->slice_count, arena->slices_free, true);
}
mi_debug_show_bitmap(" ", "committed slices", arena->slice_count, arena->slices_committed, false);
mi_debug_show_bitmap("committed slices", arena->slice_count, arena->slices_committed, false);
// todo: abandoned slices
if (show_purge) {
purge_total += mi_debug_show_bitmap(" ", "purgeable slices", arena->slice_count, arena->slices_purge, false);
purge_total += mi_debug_show_bitmap("purgeable slices", arena->slice_count, arena->slices_purge, false);
}
}
if (show_inuse) _mi_output_message("total inuse slices : %zu\n", slice_total - free_total);

View file

@ -805,10 +805,10 @@ static bool mi_bitmap_chunkmap_try_clear(mi_bitmap_t* bitmap, size_t chunk_idx)
return false;
}
// record the max clear
size_t oldmax = mi_atomic_load_relaxed(&bitmap->chunk_max_clear);
/*size_t oldmax = mi_atomic_load_relaxed(&bitmap->chunk_max_clear);
do {
if mi_likely(chunk_idx <= oldmax) break;
} while (!mi_atomic_cas_weak_acq_rel(&bitmap->chunk_max_clear, &oldmax, chunk_idx));
} while (!mi_atomic_cas_weak_acq_rel(&bitmap->chunk_max_clear, &oldmax, chunk_idx));*/
return true;
}
@ -1046,7 +1046,7 @@ bool mi_bitmap_is_xsetN(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx, size_t n
{ \
/* start chunk index -- todo: can depend on the tseq to decrease contention between threads */ \
MI_UNUSED(tseq); \
const size_t chunk_max = mi_atomic_load_acquire(&bitmap->chunk_max_clear); /* mi_bitmap_chunk_count(bitmap) */ \
/* const size_t chunk_max = mi_atomic_load_acquire(&bitmap->chunk_max_clear); */ /* mi_bitmap_chunk_count(bitmap) */ \
const size_t chunk_start = 0; /* (chunk_max <= 1 ? 0 : (tseq % chunk_max)); */ /* space out threads */ \
const size_t chunkmap_max_bfield = _mi_divide_up( mi_bitmap_chunk_count(bitmap), MI_BCHUNK_BITS ); \
const size_t chunkmap_start = chunk_start / MI_BFIELD_BITS; \

View file

@ -163,8 +163,9 @@ void mi_free(void* p) mi_attr_noexcept
if mi_unlikely(page==NULL) return;
const bool is_local = (_mi_prim_thread_id() == mi_page_thread_id(page));
const mi_page_flags_t flags = mi_page_flags(page);
if mi_likely(is_local) { // thread-local free?
if mi_likely(page->flags.full_aligned == 0) { // and it is not a full page (full pages need to move from the full bin), nor has aligned blocks (aligned blocks need to be unaligned)
if mi_likely(flags == 0) { // and it is not a full page (full pages need to move from the full bin), nor has aligned blocks (aligned blocks need to be unaligned)
// thread-local, aligned, and not a full page
mi_block_t* const block = (mi_block_t*)p;
mi_free_block_local(page, block, true /* track stats */, false /* no need to check if the page is full */);
@ -176,7 +177,7 @@ void mi_free(void* p) mi_attr_noexcept
}
else {
// free-ing in a page owned by a heap in another thread, or on abandoned page (not belonging to a heap)
if mi_likely(page->flags.full_aligned == 0) {
if mi_likely(flags == 0) {
// blocks are aligned (and not a full page)
mi_block_t* const block = (mi_block_t*)p;
mi_free_block_mt(page,block);

View file

@ -20,21 +20,21 @@ const mi_page_t _mi_page_empty = {
0, // capacity
0, // reserved capacity
0, // block size shift
0, // heap tag
{ 0 }, // flags
false, // is_zero
0, // retire_expire
NULL, // local_free
MI_ATOMIC_VAR_INIT(0), // xthread_free
MI_ATOMIC_VAR_INIT(0), // xflags
0, // block_size
NULL, // page_start
0, // heap tag
false, // is_zero
#if (MI_PADDING || MI_ENCODE_FREELIST)
{ 0, 0 },
#endif
MI_ATOMIC_VAR_INIT(0), // xthread_free
NULL, // xheap
NULL, NULL, // next, prev
NULL, // subproc
{ {{ NULL, 0, 0}}, false, false, false, MI_MEM_NONE } // memid
{ {{ NULL, 0}}, false, false, false, MI_MEM_NONE } // memid
};
#define MI_PAGE_EMPTY() ((mi_page_t*)&_mi_page_empty)

View file

@ -128,7 +128,7 @@ void _mi_os_free_ex(void* addr, size_t size, bool still_committed, mi_memid_t me
// different base? (due to alignment)
if (memid.mem.os.base != base) {
mi_assert(memid.mem.os.base <= addr);
mi_assert((uint8_t*)memid.mem.os.base + memid.mem.os.alignment >= (uint8_t*)addr);
// mi_assert((uint8_t*)memid.mem.os.base + memid.mem.os.alignment >= (uint8_t*)addr);
base = memid.mem.os.base;
if (memid.mem.os.size==0) { csize += ((uint8_t*)addr - (uint8_t*)memid.mem.os.base); }
}
@ -305,7 +305,7 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allo
if (p != NULL) {
*memid = _mi_memid_create_os(p, size, commit, os_is_zero, os_is_large);
memid->mem.os.base = os_base;
memid->mem.os.alignment = alignment;
// memid->mem.os.alignment = alignment;
memid->mem.os.size += ((uint8_t*)p - (uint8_t*)os_base); // todo: return from prim_alloc_aligned
}
return p;

View file

@ -40,7 +40,7 @@ static int ITER = 20;
static int THREADS = 8;
static int SCALE = 10;
static int ITER = 10;
#elif 0
#elif 1
static int THREADS = 4;
static int SCALE = 100;
static int ITER = 10;
@ -347,6 +347,8 @@ int main(int argc, char** argv) {
mi_collect(true);
mi_debug_show_arenas(true,true,false);
#endif
mi_collect(true);
mi_debug_show_arenas(true, true, false);
// mi_stats_print(NULL);
#else
mi_stats_print(NULL); // so we see rss/commit/elapsed