mirror of
https://github.com/microsoft/mimalloc.git
synced 2025-05-06 15:29:31 +03:00
fix page info size and order; atomic page flags
This commit is contained in:
parent
5a5943ad33
commit
659a9dd51d
10 changed files with 87 additions and 89 deletions
|
@ -360,7 +360,7 @@ if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU|Intel" AND NOT CMAKE_SYSTEM
|
|||
list(APPEND mi_cflags_dynamic -ftls-model=initial-exec)
|
||||
message(STATUS "Use local dynamic TLS for the static build (since MI_LIBC_MUSL=ON)")
|
||||
else()
|
||||
list(APPEND mi_cflags -ftls-model=initial-exec -march=haswell -mavx2)
|
||||
list(APPEND mi_cflags -ftls-model=initial-exec -march=haswell -mavx2 -O2)
|
||||
endif()
|
||||
endif()
|
||||
if(MI_OVERRIDE)
|
||||
|
|
|
@ -80,10 +80,12 @@ terms of the MIT license. A copy of the license can be found in the file
|
|||
#define mi_atomic_cas_strong_acq_rel(p,exp,des) mi_atomic_cas_strong(p,exp,des,mi_memory_order(acq_rel),mi_memory_order(acquire))
|
||||
|
||||
#define mi_atomic_add_relaxed(p,x) mi_atomic(fetch_add_explicit)(p,x,mi_memory_order(relaxed))
|
||||
#define mi_atomic_sub_relaxed(p,x) mi_atomic(fetch_sub_explicit)(p,x,mi_memory_order(relaxed))
|
||||
#define mi_atomic_add_acq_rel(p,x) mi_atomic(fetch_add_explicit)(p,x,mi_memory_order(acq_rel))
|
||||
#define mi_atomic_sub_relaxed(p,x) mi_atomic(fetch_sub_explicit)(p,x,mi_memory_order(relaxed))
|
||||
#define mi_atomic_sub_acq_rel(p,x) mi_atomic(fetch_sub_explicit)(p,x,mi_memory_order(acq_rel))
|
||||
#define mi_atomic_and_relaxed(p,x) mi_atomic(fetch_and_explicit)(p,x,mi_memory_order(relaxed))
|
||||
#define mi_atomic_and_acq_rel(p,x) mi_atomic(fetch_and_explicit)(p,x,mi_memory_order(acq_rel))
|
||||
#define mi_atomic_or_relaxed(p,x) mi_atomic(fetch_or_explicit)(p,x,mi_memory_order(relaxed))
|
||||
#define mi_atomic_or_acq_rel(p,x) mi_atomic(fetch_or_explicit)(p,x,mi_memory_order(acq_rel))
|
||||
|
||||
#define mi_atomic_increment_relaxed(p) mi_atomic_add_relaxed(p,(uintptr_t)1)
|
||||
|
|
|
@ -667,7 +667,8 @@ static inline void mi_page_clear_abandoned_mapped(mi_page_t* page) {
|
|||
|
||||
|
||||
static inline bool mi_page_is_huge(const mi_page_t* page) {
|
||||
return (page->block_size > MI_LARGE_MAX_OBJ_SIZE || (mi_memkind_is_os(page->memid.memkind) && page->memid.mem.os.alignment > MI_PAGE_MAX_OVERALLOC_ALIGN));
|
||||
return (page->block_size > MI_LARGE_MAX_OBJ_SIZE ||
|
||||
(mi_memkind_is_os(page->memid.memkind) && page->memid.mem.os.base < (void*)page));
|
||||
}
|
||||
|
||||
|
||||
|
@ -727,20 +728,33 @@ static inline bool _mi_page_unown(mi_page_t* page) {
|
|||
//-----------------------------------------------------------
|
||||
// Page flags
|
||||
//-----------------------------------------------------------
|
||||
static inline mi_page_flags_t mi_page_flags(const mi_page_t* page) {
|
||||
return mi_atomic_load_acquire(&page->xflags);
|
||||
}
|
||||
|
||||
static inline void mi_page_flags_set(mi_page_t* page, bool set, mi_page_flags_t newflag) {
|
||||
if (set) {
|
||||
mi_atomic_or_acq_rel(&page->xflags, newflag);
|
||||
}
|
||||
else {
|
||||
mi_atomic_and_acq_rel(&page->xflags, ~newflag);
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool mi_page_is_in_full(const mi_page_t* page) {
|
||||
return page->flags.x.in_full;
|
||||
return ((mi_page_flags(page) & MI_PAGE_IN_FULL_QUEUE) != 0);
|
||||
}
|
||||
|
||||
static inline void mi_page_set_in_full(mi_page_t* page, bool in_full) {
|
||||
page->flags.x.in_full = in_full;
|
||||
mi_page_flags_set(page, in_full, MI_PAGE_IN_FULL_QUEUE);
|
||||
}
|
||||
|
||||
static inline bool mi_page_has_aligned(const mi_page_t* page) {
|
||||
return page->flags.x.has_aligned;
|
||||
return ((mi_page_flags(page) & MI_PAGE_HAS_ALIGNED) != 0);
|
||||
}
|
||||
|
||||
static inline void mi_page_set_has_aligned(mi_page_t* page, bool has_aligned) {
|
||||
page->flags.x.has_aligned = has_aligned;
|
||||
mi_page_flags_set(page, has_aligned, MI_PAGE_HAS_ALIGNED);
|
||||
}
|
||||
|
||||
/* -------------------------------------------------------------------
|
||||
|
|
|
@ -167,8 +167,8 @@ static inline bool mi_memkind_is_os(mi_memkind_t memkind) {
|
|||
|
||||
typedef struct mi_memid_os_info {
|
||||
void* base; // actual base address of the block (used for offset aligned allocations)
|
||||
size_t alignment; // alignment at allocation
|
||||
size_t size; // allocated full size
|
||||
// size_t alignment; // alignment at allocation
|
||||
} mi_memid_os_info_t;
|
||||
|
||||
typedef struct mi_memid_arena_info {
|
||||
|
@ -224,26 +224,11 @@ typedef enum mi_owned_e {
|
|||
} mi_owned_t;
|
||||
|
||||
|
||||
// The `in_full` and `has_aligned` page flags are put in a union to efficiently
|
||||
// test if both are false (`full_aligned == 0`) in the `mi_free` routine.
|
||||
#if !MI_TSAN
|
||||
typedef union mi_page_flags_s {
|
||||
uint8_t full_aligned;
|
||||
struct {
|
||||
uint8_t in_full : 1;
|
||||
uint8_t has_aligned : 1;
|
||||
} x;
|
||||
} mi_page_flags_t;
|
||||
#else
|
||||
// under thread sanitizer, use a byte for each flag to suppress warning, issue #130
|
||||
typedef union mi_page_flags_s {
|
||||
uint32_t full_aligned;
|
||||
struct {
|
||||
uint8_t in_full;
|
||||
uint8_t has_aligned;
|
||||
} x;
|
||||
} mi_page_flags_t;
|
||||
#endif
|
||||
// The `in_full` and `has_aligned` page flags are put in the same field
|
||||
// to efficiently test if both are false (`full_aligned == 0`) in the `mi_free` routine.
|
||||
#define MI_PAGE_IN_FULL_QUEUE MI_ZU(0x01)
|
||||
#define MI_PAGE_HAS_ALIGNED MI_ZU(0x02)
|
||||
typedef size_t mi_page_flags_t;
|
||||
|
||||
// Thread free list.
|
||||
// We use the bottom bit of the pointer for `mi_owned_t` flags
|
||||
|
@ -287,23 +272,21 @@ typedef struct mi_page_s {
|
|||
uint16_t capacity; // number of blocks committed (must be the first field for proper zero-initialisation)
|
||||
uint16_t reserved; // number of blocks reserved in memory
|
||||
uint8_t block_size_shift; // if not zero, then `(1 << block_size_shift) == block_size` (only used for fast path in `free.c:_mi_page_ptr_unalign`)
|
||||
uint8_t heap_tag; // tag of the owning heap, used to separate heaps by object type
|
||||
|
||||
mi_page_flags_t flags; // `in_full` and `has_aligned` flags (8 bits)
|
||||
uint8_t free_is_zero:1; // `true` if the blocks in the free list are zero initialized
|
||||
uint8_t retire_expire:7; // expiration count for retired blocks
|
||||
// padding
|
||||
uint8_t retire_expire; // expiration count for retired blocks
|
||||
|
||||
mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`)
|
||||
_Atomic(mi_thread_free_t) xthread_free; // list of deferred free blocks freed by other threads
|
||||
_Atomic(mi_page_flags_t) xflags; // `in_full` and `has_aligned` flags
|
||||
|
||||
size_t block_size; // size available in each block (always `>0`)
|
||||
uint8_t* page_start; // start of the blocks
|
||||
|
||||
uint8_t heap_tag; // tag of the owning heap, used to separate heaps by object type
|
||||
bool free_is_zero; // `true` if the blocks in the free list are zero initialized
|
||||
// padding
|
||||
#if (MI_ENCODE_FREELIST || MI_PADDING)
|
||||
uintptr_t keys[2]; // two random keys to encode the free lists (see `_mi_block_next`) or padding canary
|
||||
#endif
|
||||
|
||||
_Atomic(mi_thread_free_t) xthread_free; // list of deferred free blocks freed by other threads
|
||||
|
||||
mi_heap_t* heap; // heap this threads belong to.
|
||||
struct mi_page_s* next; // next page owned by the heap with the same `block_size`
|
||||
struct mi_page_s* prev; // previous page owned by the heap with the same `block_size`
|
||||
|
@ -317,10 +300,10 @@ typedef struct mi_page_s {
|
|||
// ------------------------------------------------------
|
||||
|
||||
#define MI_PAGE_ALIGN MI_ARENA_SLICE_ALIGN // pages must be aligned on this for the page map.
|
||||
#define MI_PAGE_MIN_BLOCK_ALIGN (32) // minimal block alignment in a page
|
||||
#define MI_PAGE_MIN_BLOCK_ALIGN (64) // minimal block alignment in a page
|
||||
#define MI_PAGE_MAX_OVERALLOC_ALIGN MI_ARENA_SLICE_SIZE // (64 KiB) limit for which we overallocate in arena pages, beyond this use OS allocation
|
||||
|
||||
#if MI_DEBUG && MI_SIZE_SIZE == 8
|
||||
#if (MI_ENCODE_FREELIST || MI_PADDING) && MI_SIZE_SIZE == 8
|
||||
#define MI_PAGE_INFO_SIZE ((MI_INTPTR_SHIFT+2)*MI_PAGE_MIN_BLOCK_ALIGN) // >= sizeof(mi_page_t)
|
||||
#else
|
||||
#define MI_PAGE_INFO_SIZE ((MI_INTPTR_SHIFT+1)*MI_PAGE_MIN_BLOCK_ALIGN) // >= sizeof(mi_page_t)
|
||||
|
|
32
src/arena.c
32
src/arena.c
|
@ -596,7 +596,9 @@ static mi_page_t* mi_arena_page_alloc_fresh(size_t slice_count, size_t block_siz
|
|||
}
|
||||
}
|
||||
#endif
|
||||
mi_assert(MI_PAGE_INFO_SIZE >= _mi_align_up(sizeof(*page), MI_PAGE_MIN_BLOCK_ALIGN));
|
||||
if (MI_PAGE_INFO_SIZE < _mi_align_up(sizeof(*page), MI_PAGE_MIN_BLOCK_ALIGN)) {
|
||||
_mi_error_message(EFAULT, "fatal internal error: MI_PAGE_INFO_SIZE is too small\n");
|
||||
};
|
||||
const size_t block_start = (os_align ? MI_PAGE_ALIGN : MI_PAGE_INFO_SIZE);
|
||||
const size_t reserved = (os_align ? 1 : (mi_size_of_slices(slice_count) - block_start) / block_size);
|
||||
mi_assert_internal(reserved > 0 && reserved <= UINT16_MAX);
|
||||
|
@ -1126,8 +1128,8 @@ static size_t mi_debug_show_bfield(mi_bfield_t field, char* buf) {
|
|||
return bit_set_count;
|
||||
}
|
||||
|
||||
static size_t mi_debug_show_bitmap(const char* prefix, const char* header, size_t slice_count, mi_bitmap_t* bitmap, bool invert) {
|
||||
_mi_output_message("%s%s:\n", prefix, header);
|
||||
static size_t mi_debug_show_bitmap(const char* header, size_t slice_count, mi_bitmap_t* bitmap, bool invert) {
|
||||
_mi_output_message("%s:\n", header);
|
||||
size_t bit_count = 0;
|
||||
size_t bit_set_count = 0;
|
||||
for (size_t i = 0; i < mi_bitmap_chunk_count(bitmap) && bit_count < slice_count; i++) {
|
||||
|
@ -1135,19 +1137,13 @@ static size_t mi_debug_show_bitmap(const char* prefix, const char* header, size_
|
|||
size_t k = 0;
|
||||
mi_bchunk_t* chunk = &bitmap->chunks[i];
|
||||
|
||||
if (i<10) { buf[k++] = ' '; }
|
||||
if (i<100) { itoa((int)i, buf+k, 10); k += (i < 10 ? 1 : 2); }
|
||||
buf[k++] = ' ';
|
||||
if (i<10) { buf[k++] = ('0' + (char)i); buf[k++] = ' '; buf[k++] = ' '; }
|
||||
else if (i<100) { buf[k++] = ('0' + (char)(i/10)); buf[k++] = ('0' + (char)(i%10)); buf[k++] = ' '; }
|
||||
else if (i<1000) { buf[k++] = ('0' + (char)(i/100)); buf[k++] = ('0' + (char)((i%100)/10)); buf[k++] = ('0' + (char)(i%10)); }
|
||||
|
||||
for (size_t j = 0; j < MI_BCHUNK_FIELDS; j++) {
|
||||
if (j > 0 && (j % 4) == 0) {
|
||||
buf[k++] = '\n';
|
||||
_mi_memcpy(buf+k, prefix, strlen(prefix)); k += strlen(prefix);
|
||||
buf[k++] = ' ';
|
||||
buf[k++] = ' ';
|
||||
buf[k++] = ' ';
|
||||
buf[k++] = ' ';
|
||||
buf[k++] = ' ';
|
||||
buf[k++] = '\n'; _mi_memset(buf+k,' ',5); k += 5;
|
||||
}
|
||||
if (bit_count < slice_count) {
|
||||
mi_bfield_t bfield = chunk->bfields[j];
|
||||
|
@ -1164,9 +1160,9 @@ static size_t mi_debug_show_bitmap(const char* prefix, const char* header, size_
|
|||
}
|
||||
bit_count += MI_BFIELD_BITS;
|
||||
}
|
||||
_mi_output_message("%s %s\n", prefix, buf);
|
||||
_mi_output_message(" %s\n", buf);
|
||||
}
|
||||
_mi_output_message("%s total ('x'): %zu\n", prefix, bit_set_count);
|
||||
_mi_output_message(" total ('x'): %zu\n", bit_set_count);
|
||||
return bit_set_count;
|
||||
}
|
||||
|
||||
|
@ -1183,12 +1179,12 @@ void mi_debug_show_arenas(bool show_inuse, bool show_abandoned, bool show_purge)
|
|||
slice_total += arena->slice_count;
|
||||
_mi_output_message("arena %zu: %zu slices (%zu MiB)%s\n", i, arena->slice_count, mi_size_of_slices(arena->slice_count)/MI_MiB, (arena->memid.is_pinned ? ", pinned" : ""));
|
||||
if (show_inuse) {
|
||||
free_total += mi_debug_show_bitmap(" ", "in-use slices", arena->slice_count, arena->slices_free, true);
|
||||
free_total += mi_debug_show_bitmap("in-use slices", arena->slice_count, arena->slices_free, true);
|
||||
}
|
||||
mi_debug_show_bitmap(" ", "committed slices", arena->slice_count, arena->slices_committed, false);
|
||||
mi_debug_show_bitmap("committed slices", arena->slice_count, arena->slices_committed, false);
|
||||
// todo: abandoned slices
|
||||
if (show_purge) {
|
||||
purge_total += mi_debug_show_bitmap(" ", "purgeable slices", arena->slice_count, arena->slices_purge, false);
|
||||
purge_total += mi_debug_show_bitmap("purgeable slices", arena->slice_count, arena->slices_purge, false);
|
||||
}
|
||||
}
|
||||
if (show_inuse) _mi_output_message("total inuse slices : %zu\n", slice_total - free_total);
|
||||
|
|
|
@ -805,10 +805,10 @@ static bool mi_bitmap_chunkmap_try_clear(mi_bitmap_t* bitmap, size_t chunk_idx)
|
|||
return false;
|
||||
}
|
||||
// record the max clear
|
||||
size_t oldmax = mi_atomic_load_relaxed(&bitmap->chunk_max_clear);
|
||||
/*size_t oldmax = mi_atomic_load_relaxed(&bitmap->chunk_max_clear);
|
||||
do {
|
||||
if mi_likely(chunk_idx <= oldmax) break;
|
||||
} while (!mi_atomic_cas_weak_acq_rel(&bitmap->chunk_max_clear, &oldmax, chunk_idx));
|
||||
} while (!mi_atomic_cas_weak_acq_rel(&bitmap->chunk_max_clear, &oldmax, chunk_idx));*/
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1046,7 +1046,7 @@ bool mi_bitmap_is_xsetN(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx, size_t n
|
|||
{ \
|
||||
/* start chunk index -- todo: can depend on the tseq to decrease contention between threads */ \
|
||||
MI_UNUSED(tseq); \
|
||||
const size_t chunk_max = mi_atomic_load_acquire(&bitmap->chunk_max_clear); /* mi_bitmap_chunk_count(bitmap) */ \
|
||||
/* const size_t chunk_max = mi_atomic_load_acquire(&bitmap->chunk_max_clear); */ /* mi_bitmap_chunk_count(bitmap) */ \
|
||||
const size_t chunk_start = 0; /* (chunk_max <= 1 ? 0 : (tseq % chunk_max)); */ /* space out threads */ \
|
||||
const size_t chunkmap_max_bfield = _mi_divide_up( mi_bitmap_chunk_count(bitmap), MI_BCHUNK_BITS ); \
|
||||
const size_t chunkmap_start = chunk_start / MI_BFIELD_BITS; \
|
||||
|
|
|
@ -163,8 +163,9 @@ void mi_free(void* p) mi_attr_noexcept
|
|||
if mi_unlikely(page==NULL) return;
|
||||
|
||||
const bool is_local = (_mi_prim_thread_id() == mi_page_thread_id(page));
|
||||
const mi_page_flags_t flags = mi_page_flags(page);
|
||||
if mi_likely(is_local) { // thread-local free?
|
||||
if mi_likely(page->flags.full_aligned == 0) { // and it is not a full page (full pages need to move from the full bin), nor has aligned blocks (aligned blocks need to be unaligned)
|
||||
if mi_likely(flags == 0) { // and it is not a full page (full pages need to move from the full bin), nor has aligned blocks (aligned blocks need to be unaligned)
|
||||
// thread-local, aligned, and not a full page
|
||||
mi_block_t* const block = (mi_block_t*)p;
|
||||
mi_free_block_local(page, block, true /* track stats */, false /* no need to check if the page is full */);
|
||||
|
@ -176,7 +177,7 @@ void mi_free(void* p) mi_attr_noexcept
|
|||
}
|
||||
else {
|
||||
// free-ing in a page owned by a heap in another thread, or on abandoned page (not belonging to a heap)
|
||||
if mi_likely(page->flags.full_aligned == 0) {
|
||||
if mi_likely(flags == 0) {
|
||||
// blocks are aligned (and not a full page)
|
||||
mi_block_t* const block = (mi_block_t*)p;
|
||||
mi_free_block_mt(page,block);
|
||||
|
|
10
src/init.c
10
src/init.c
|
@ -20,21 +20,21 @@ const mi_page_t _mi_page_empty = {
|
|||
0, // capacity
|
||||
0, // reserved capacity
|
||||
0, // block size shift
|
||||
0, // heap tag
|
||||
{ 0 }, // flags
|
||||
false, // is_zero
|
||||
0, // retire_expire
|
||||
NULL, // local_free
|
||||
MI_ATOMIC_VAR_INIT(0), // xthread_free
|
||||
MI_ATOMIC_VAR_INIT(0), // xflags
|
||||
0, // block_size
|
||||
NULL, // page_start
|
||||
0, // heap tag
|
||||
false, // is_zero
|
||||
#if (MI_PADDING || MI_ENCODE_FREELIST)
|
||||
{ 0, 0 },
|
||||
#endif
|
||||
MI_ATOMIC_VAR_INIT(0), // xthread_free
|
||||
NULL, // xheap
|
||||
NULL, NULL, // next, prev
|
||||
NULL, // subproc
|
||||
{ {{ NULL, 0, 0}}, false, false, false, MI_MEM_NONE } // memid
|
||||
{ {{ NULL, 0}}, false, false, false, MI_MEM_NONE } // memid
|
||||
};
|
||||
|
||||
#define MI_PAGE_EMPTY() ((mi_page_t*)&_mi_page_empty)
|
||||
|
|
4
src/os.c
4
src/os.c
|
@ -128,7 +128,7 @@ void _mi_os_free_ex(void* addr, size_t size, bool still_committed, mi_memid_t me
|
|||
// different base? (due to alignment)
|
||||
if (memid.mem.os.base != base) {
|
||||
mi_assert(memid.mem.os.base <= addr);
|
||||
mi_assert((uint8_t*)memid.mem.os.base + memid.mem.os.alignment >= (uint8_t*)addr);
|
||||
// mi_assert((uint8_t*)memid.mem.os.base + memid.mem.os.alignment >= (uint8_t*)addr);
|
||||
base = memid.mem.os.base;
|
||||
if (memid.mem.os.size==0) { csize += ((uint8_t*)addr - (uint8_t*)memid.mem.os.base); }
|
||||
}
|
||||
|
@ -305,7 +305,7 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allo
|
|||
if (p != NULL) {
|
||||
*memid = _mi_memid_create_os(p, size, commit, os_is_zero, os_is_large);
|
||||
memid->mem.os.base = os_base;
|
||||
memid->mem.os.alignment = alignment;
|
||||
// memid->mem.os.alignment = alignment;
|
||||
memid->mem.os.size += ((uint8_t*)p - (uint8_t*)os_base); // todo: return from prim_alloc_aligned
|
||||
}
|
||||
return p;
|
||||
|
|
|
@ -40,7 +40,7 @@ static int ITER = 20;
|
|||
static int THREADS = 8;
|
||||
static int SCALE = 10;
|
||||
static int ITER = 10;
|
||||
#elif 0
|
||||
#elif 1
|
||||
static int THREADS = 4;
|
||||
static int SCALE = 100;
|
||||
static int ITER = 10;
|
||||
|
@ -347,6 +347,8 @@ int main(int argc, char** argv) {
|
|||
mi_collect(true);
|
||||
mi_debug_show_arenas(true,true,false);
|
||||
#endif
|
||||
mi_collect(true);
|
||||
mi_debug_show_arenas(true, true, false);
|
||||
// mi_stats_print(NULL);
|
||||
#else
|
||||
mi_stats_print(NULL); // so we see rss/commit/elapsed
|
||||
|
|
Loading…
Add table
Reference in a new issue