merge from dev

This commit is contained in:
daanx 2023-04-18 17:33:01 -07:00
commit fb07276d48
12 changed files with 253 additions and 241 deletions

View file

@ -88,9 +88,9 @@ void _mi_thread_data_collect(void);
// os.c
void _mi_os_init(void); // called from process init
void* _mi_os_alloc(size_t size, bool* is_zero, mi_stats_t* stats);
void _mi_os_free(void* p, size_t size, mi_stats_t* stats);
void _mi_os_free_ex(void* p, size_t size, bool is_committed, mi_stats_t* stats);
void* _mi_os_alloc(size_t size, mi_memid_t* memid, mi_stats_t* stats);
void _mi_os_free(void* p, size_t size, mi_memid_t memid, mi_stats_t* stats);
void _mi_os_free_ex(void* p, size_t size, bool still_committed, mi_memid_t memid, mi_stats_t* stats);
size_t _mi_os_page_size(void);
size_t _mi_os_good_alloc_size(size_t size);
@ -106,16 +106,14 @@ bool _mi_os_unprotect(void* addr, size_t size);
bool _mi_os_purge(void* p, size_t size, mi_stats_t* stats);
bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, mi_stats_t* stats);
void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* stats);
void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* tld_stats);
void _mi_os_free_aligned_at_offset(void* p, size_t size, size_t alignment, size_t align_offset, bool was_committed, mi_stats_t* tld_stats);
void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* stats);
void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* tld_stats);
void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size);
bool _mi_os_use_large_page(size_t size, size_t alignment);
size_t _mi_os_large_page_size(void);
void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize, bool* is_zero);
void _mi_os_free_huge_os_pages(void* p, size_t size, mi_stats_t* stats);
void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize, mi_memid_t* memid);
// arena.c
mi_arena_id_t _mi_arena_id_none(void);
@ -271,6 +269,10 @@ bool _mi_page_is_valid(mi_page_t* page);
#define MI_INIT256(x) MI_INIT128(x),MI_INIT128(x)
#include <string.h>
// initialize a local variable to zero; use memset as compilers optimize constant sized memset's
#define _mi_memzero_var(x) memset(&x,0,sizeof(x))
// Is `x` a power of two? (0 is considered a power of two)
static inline bool _mi_is_power_of_two(uintptr_t x) {
return ((x & (x - 1)) == 0);
@ -313,7 +315,7 @@ static inline uintptr_t _mi_divide_up(uintptr_t size, size_t divider) {
}
// Is memory zero initialized?
static inline bool mi_mem_is_zero(void* p, size_t size) {
static inline bool mi_mem_is_zero(const void* p, size_t size) {
for (size_t i = 0; i < size; i++) {
if (((uint8_t*)p)[i] != 0) return false;
}
@ -732,6 +734,28 @@ size_t _mi_commit_mask_next_run(const mi_commit_mask_t* cm, size_t* idx);
/* -----------------------------------------------------------
memory id's
----------------------------------------------------------- */
static inline mi_memid_t _mi_memid_create(mi_memkind_t memkind) {
mi_memid_t memid;
_mi_memzero_var(memid);
memid.memkind = memkind;
return memid;
}
static inline mi_memid_t _mi_memid_none(void) {
return _mi_memid_create(MI_MEM_NONE);
}
static inline mi_memid_t _mi_memid_create_os(bool committed, bool is_zero) {
mi_memid_t memid = _mi_memid_create(MI_MEM_OS);
memid.was_committed = committed;
memid.was_zero = is_zero;
return memid;
}
// -------------------------------------------------------------------
// Fast "random" shuffle
@ -892,7 +916,6 @@ static inline size_t mi_bsr(uintptr_t x) {
#if !MI_TRACK_ENABLED && defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64))
#include <intrin.h>
#include <string.h>
extern bool _mi_cpu_has_fsrm;
static inline void _mi_memcpy(void* dst, const void* src, size_t n) {
if (_mi_cpu_has_fsrm) {
@ -911,7 +934,6 @@ static inline void _mi_memzero(void* dst, size_t n) {
}
}
#else
#include <string.h>
static inline void _mi_memcpy(void* dst, const void* src, size_t n) {
memcpy(dst, src, n);
}
@ -920,9 +942,6 @@ static inline void _mi_memzero(void* dst, size_t n) {
}
#endif
// initialize a local variable to zero; use memset as compilers optimize constant sized memset's
#define _mi_memzero_var(x) memset(&x,0,sizeof(x))
// -------------------------------------------------------------------------------
// The `_mi_memcpy_aligned` can be used if the pointers are machine-word aligned
// This is used for example in `mi_realloc`.
@ -930,7 +949,6 @@ static inline void _mi_memzero(void* dst, size_t n) {
#if (defined(__GNUC__) && (__GNUC__ >= 4)) || defined(__clang__)
// On GCC/CLang we provide a hint that the pointers are word aligned.
#include <string.h>
static inline void _mi_memcpy_aligned(void* dst, const void* src, size_t n) {
mi_assert_internal(((uintptr_t)dst % MI_INTPTR_SIZE == 0) && ((uintptr_t)src % MI_INTPTR_SIZE == 0));
void* adst = __builtin_assume_aligned(dst, MI_INTPTR_SIZE);

View file

@ -293,7 +293,7 @@ typedef struct mi_page_s {
uint32_t slice_count; // slices in this page (0 if not a page)
uint32_t slice_offset; // distance from the actual page data slice (0 if a page)
uint8_t is_committed : 1; // `true` if the page virtual memory is committed
uint8_t is_zero_init : 1; // `true` if the page was zero initialized
uint8_t is_zero_init : 1; // `true` if the page was initially zero initialized
// layout like this to optimize access in `mi_malloc` and `mi_free`
uint16_t capacity; // number of blocks committed, must be the first field, see `segment.c:page_clear`
@ -377,12 +377,17 @@ typedef enum mi_memkind_e {
MI_MEM_EXTERNAL, // not owned by mimalloc but provided externally (via `mi_manage_os_memory` for example)
MI_MEM_STATIC, // allocated in a static area and should not be freed (for arena meta data for example)
MI_MEM_OS, // allocated from the OS
MI_MEM_OS_HUGE, // allocated as huge os pages
MI_MEM_OS_REMAP, // allocated in a remapable area (i.e. using `mremap`)
MI_MEM_ARENA // allocated from an arena (the usual case)
} mi_memkind_t;
static inline bool mi_memkind_is_os(mi_memkind_t memkind) {
return (memkind >= MI_MEM_OS && memkind <= MI_MEM_OS_REMAP);
}
typedef struct mi_memid_os_info {
size_t alignment; // allocated with the given alignment
size_t align_offset; // the offset that was aligned (used only for huge aligned pages)
void* base; // actual base address of the block (used for offset aligned allocations)
} mi_memid_os_info_t;
typedef struct mi_memid_arena_info {

View file

@ -46,12 +46,17 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz
// zero the block? note: we need to zero the full block size (issue #63)
if mi_unlikely(zero) {
mi_assert_internal(page->xblock_size != 0); // do not call with zero'ing for huge blocks (see _mi_malloc_generic)
const size_t zsize = (page->is_zero ? sizeof(block->next) + MI_PADDING_SIZE : page->xblock_size);
_mi_memzero_aligned(block, zsize - MI_PADDING_SIZE);
if (page->is_zero) {
block->next = 0;
}
else {
mi_assert_internal(page->xblock_size >= MI_PADDING_SIZE);
_mi_memzero_aligned(block, page->xblock_size - MI_PADDING_SIZE);
}
}
#if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN
if (!page->is_zero && !zero && !mi_page_is_huge(page)) {
if (!zero && !mi_page_is_huge(page)) {
memset(block, MI_DEBUG_UNINIT, mi_page_usable_block_size(page));
}
#elif (MI_SECURE!=0)
@ -110,6 +115,7 @@ static inline mi_decl_restrict void* mi_heap_malloc_small_zero(mi_heap_t* heap,
mi_heap_stat_increase(heap, malloc, mi_usable_size(p));
}
#endif
if (zero && p != NULL) { mi_assert_internal(mi_mem_is_zero(p, size)); }
return p;
}
@ -139,6 +145,7 @@ extern inline void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool z
mi_heap_stat_increase(heap, malloc, mi_usable_size(p));
}
#endif
if (zero && p != NULL) { mi_assert_internal(mi_mem_is_zero(p, size)); }
return p;
}
}

View file

@ -41,18 +41,15 @@ typedef uintptr_t mi_block_info_t;
// A memory arena descriptor
typedef struct mi_arena_s {
mi_arena_id_t id; // arena id; 0 for non-specific
bool exclusive; // only allow allocations if specifically for this arena
bool owned; // if true, the arena will be released when the process exits if `mi_option_destroy_on_exit` is set.
mi_memid_t memid; // memid of the memory area
_Atomic(uint8_t*) start; // the start of the memory area
size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`)
size_t field_count; // number of bitmap fields (where `field_count * MI_BITMAP_FIELD_BITS >= block_count`)
size_t meta_size; // size of the arena structure itself (including its bitmaps)
mi_memid_t meta_memid; // memid of the arena structure itself (OS or static allocation)
int numa_node; // associated NUMA node
bool is_zero_init; // is the arena zero initialized?
bool is_large; // large- or huge OS pages (always committed)
bool is_huge_alloc; // huge OS pages allocated by `_mi_os_alloc_huge_pages`
bool allow_decommit; // is decommit allowed? if true, is_large should be false and blocks_committed != NULL
bool exclusive; // only allow allocations if specifically for this arena
bool is_large; // memory area consists of large- or huge OS pages (always committed)
_Atomic(size_t) search_idx; // optimization to start the search for free blocks
_Atomic(mi_msecs_t) purge_expire; // expiration time when blocks should be decommitted from `blocks_decommit`.
mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero?
@ -67,7 +64,7 @@ static mi_decl_cache_align _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS];
static mi_decl_cache_align _Atomic(size_t) mi_arena_count; // = 0
static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_committed, bool is_large, bool is_huge_alloc, bool is_zero, int numa_node, bool exclusive, bool owned, mi_arena_id_t* arena_id) mi_attr_noexcept;
//static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int numa_node, bool exclusive, mi_memid_t memid, mi_arena_id_t* arena_id) mi_attr_noexcept;
/* -----------------------------------------------------------
Arena id's
@ -92,27 +89,6 @@ static bool mi_arena_id_is_suitable(mi_arena_id_t arena_id, bool arena_is_exclus
(arena_id == req_arena_id));
}
/* -----------------------------------------------------------
memory id's
----------------------------------------------------------- */
static mi_memid_t mi_memid_create(mi_memkind_t memkind) {
mi_memid_t memid;
_mi_memzero_var(memid);
memid.memkind = memkind;
return memid;
}
static mi_memid_t mi_memid_none(void) {
return mi_memid_create(MI_MEM_NONE);
}
static mi_memid_t mi_memid_create_os(bool committed) {
mi_memid_t memid = mi_memid_create(MI_MEM_OS);
memid.was_committed = committed;
return memid;
}
bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_id_t request_arena_id) {
if (memid.memkind == MI_MEM_ARENA) {
return mi_arena_id_is_suitable(memid.mem.arena.id, memid.mem.arena.is_exclusive, request_arena_id);
@ -126,7 +102,6 @@ bool _mi_arena_memid_is_os_allocated(mi_memid_t memid) {
return (memid.memkind == MI_MEM_OS);
}
/* -----------------------------------------------------------
Arena allocations get a (currently) 16-bit memory id where the
lower 8 bits are the arena id, and the upper bits the block index.
@ -145,7 +120,7 @@ static size_t mi_arena_size(mi_arena_t* arena) {
}
static mi_memid_t mi_memid_create_arena(mi_arena_id_t id, bool is_exclusive, mi_bitmap_index_t bitmap_index) {
mi_memid_t memid = mi_memid_create(MI_MEM_ARENA);
mi_memid_t memid = _mi_memid_create(MI_MEM_ARENA);
memid.mem.arena.id = id;
memid.mem.arena.block_index = bitmap_index;
memid.mem.arena.is_exclusive = is_exclusive;
@ -172,7 +147,7 @@ static uint8_t mi_arena_static[MI_ARENA_STATIC_MAX];
static _Atomic(size_t) mi_arena_static_top;
static void* mi_arena_static_zalloc(size_t size, size_t alignment, mi_memid_t* memid) {
*memid = mi_memid_none();
*memid = _mi_memid_none();
if (size == 0 || size > MI_ARENA_STATIC_MAX) return NULL;
if (mi_atomic_load_relaxed(&mi_arena_static_top) >= MI_ARENA_STATIC_MAX) return NULL;
@ -189,7 +164,7 @@ static void* mi_arena_static_zalloc(size_t size, size_t alignment, mi_memid_t* m
}
// success
*memid = mi_memid_create(MI_MEM_STATIC);
*memid = _mi_memid_create(MI_MEM_STATIC);
const size_t start = _mi_align_up(oldtop, alignment);
uint8_t* const p = &mi_arena_static[start];
_mi_memzero(p, size);
@ -197,27 +172,22 @@ static void* mi_arena_static_zalloc(size_t size, size_t alignment, mi_memid_t* m
}
static void* mi_arena_meta_zalloc(size_t size, mi_memid_t* memid, mi_stats_t* stats) {
*memid = mi_memid_none();
*memid = _mi_memid_none();
// try static
void* p = mi_arena_static_zalloc(size, MI_ALIGNMENT_MAX, memid);
if (p != NULL) return p;
// or fall back to the OS
bool is_zero = false;
p = _mi_os_alloc(size, &is_zero, stats);
if (p != NULL) {
*memid = mi_memid_create_os(true);
if (!is_zero) { _mi_memzero_aligned(p, size); }
return p;
}
return NULL;
return _mi_os_alloc(size, memid, stats);
}
static void mi_arena_meta_free(void* p, mi_memid_t memid, size_t size, mi_stats_t* stats) {
if (memid.memkind == MI_MEM_OS) {
_mi_os_free(p, size, stats);
if (mi_memkind_is_os(memid.memkind)) {
_mi_os_free(p, size, memid, stats);
}
else {
mi_assert(memid.memkind == MI_MEM_STATIC);
}
}
@ -255,7 +225,7 @@ static mi_decl_noinline void* mi_arena_alloc_at(mi_arena_t* arena, size_t arena_
// claimed it!
void* p = arena->start + mi_arena_block_size(mi_bitmap_index_bit(bitmap_index));
*memid = mi_memid_create_arena(arena->id, arena->exclusive, bitmap_index);
memid->is_pinned = (arena->is_large || !arena->allow_decommit);
memid->is_pinned = arena->memid.is_pinned;
// none of the claimed blocks should be scheduled for a decommit
if (arena->blocks_purge != NULL) {
@ -407,7 +377,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset
{
mi_assert_internal(memid != NULL && tld != NULL);
mi_assert_internal(size > 0);
*memid = mi_memid_none();
*memid = _mi_memid_none();
const int numa_node = _mi_os_numa_node(tld); // current numa node
@ -432,17 +402,12 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset
}
// finally, fall back to the OS
bool os_is_large = false;
bool os_is_zero = false;
void* p = _mi_os_alloc_aligned_at_offset(size, alignment, align_offset, commit, allow_large, &os_is_large, &os_is_zero, tld->stats);
if (p != NULL) {
*memid = mi_memid_create_os(commit);
memid->is_pinned = os_is_large;
memid->was_zero = os_is_zero;
memid->mem.os.alignment = alignment;
memid->mem.os.align_offset = align_offset;
if (align_offset > 0) {
return _mi_os_alloc_aligned_at_offset(size, alignment, align_offset, commit, allow_large, memid, tld->stats);
}
else {
return _mi_os_alloc_aligned(size, alignment, commit, allow_large, memid, tld->stats);
}
return p;
}
void* _mi_arena_alloc(size_t size, bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld)
@ -470,7 +435,7 @@ void* mi_arena_area(mi_arena_id_t arena_id, size_t* size) {
static void mi_arena_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks, mi_stats_t* stats) {
mi_assert_internal(arena->blocks_committed != NULL);
mi_assert_internal(arena->blocks_purge != NULL);
mi_assert_internal(arena->allow_decommit);
mi_assert_internal(!arena->memid.is_pinned);
const size_t size = mi_arena_block_size(blocks);
void* const p = arena->start + mi_arena_block_size(mi_bitmap_index_bit(bitmap_idx));
bool needs_recommit;
@ -544,7 +509,7 @@ static bool mi_arena_purge_range(mi_arena_t* arena, size_t idx, size_t startidx,
// returns true if anything was purged
static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force, mi_stats_t* stats)
{
if (!arena->allow_decommit || arena->blocks_purge == NULL) return false;
if (arena->memid.is_pinned || arena->blocks_purge == NULL) return false;
mi_msecs_t expire = mi_atomic_loadi64_relaxed(&arena->purge_expire);
if (expire == 0) return false;
if (!force && expire > now) return false;
@ -634,18 +599,13 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi
if (size==0) return;
const bool all_committed = (committed_size == size);
if (memid.memkind == MI_MEM_OS) {
if (mi_memkind_is_os(memid.memkind)) {
// was a direct OS allocation, pass through
if (!all_committed && committed_size > 0) {
// if partially committed, adjust the committed stats
_mi_stat_decrease(&stats->committed, committed_size);
}
if (memid.mem.os.align_offset != 0) {
_mi_os_free_aligned_at_offset(p, size, memid.mem.os.alignment, memid.mem.os.align_offset, all_committed, stats);
}
else {
_mi_os_free(p, size, stats);
}
_mi_os_free(p, size, memid, stats);
}
else if (memid.memkind == MI_MEM_ARENA) {
// allocated in an arena
@ -672,7 +632,7 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi
mi_track_mem_undefined(p,size);
// potentially decommit
if (!arena->allow_decommit || arena->blocks_committed == NULL) {
if (arena->memid.is_pinned || arena->blocks_committed == NULL) {
mi_assert_internal(all_committed);
}
else {
@ -720,14 +680,9 @@ static void mi_arenas_unsafe_destroy(void) {
for (size_t i = 0; i < max_arena; i++) {
mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]);
if (arena != NULL) {
if (arena->owned && arena->start != NULL) {
if (arena->start != NULL && mi_memkind_is_os(arena->memid.memkind)) {
mi_atomic_store_ptr_release(mi_arena_t, &mi_arenas[i], NULL);
if (arena->is_huge_alloc) {
_mi_os_free_huge_os_pages(arena->start, mi_arena_size(arena), &_mi_stats_main);
}
else {
_mi_os_free(arena->start, mi_arena_size(arena), &_mi_stats_main);
}
_mi_os_free(arena->start, mi_arena_size(arena), arena->memid, &_mi_stats_main);
}
else {
new_max_arena = i;
@ -787,21 +742,18 @@ static bool mi_arena_add(mi_arena_t* arena, mi_arena_id_t* arena_id) {
return true;
}
static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_committed, bool is_large, bool is_huge_alloc, bool is_zero, int numa_node, bool exclusive, bool owned, mi_arena_id_t* arena_id) mi_attr_noexcept
static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int numa_node, bool exclusive, mi_memid_t memid, mi_arena_id_t* arena_id) mi_attr_noexcept
{
if (arena_id != NULL) *arena_id = _mi_arena_id_none();
if (size < MI_ARENA_BLOCK_SIZE) return false;
if (is_large) {
mi_assert_internal(is_committed);
is_committed = true;
mi_assert_internal(memid.was_committed && memid.is_pinned);
}
const bool allow_decommit = !is_large; // && !is_committed; // only allow decommit for initially uncommitted memory
const size_t bcount = size / MI_ARENA_BLOCK_SIZE;
const size_t fields = _mi_divide_up(bcount, MI_BITMAP_FIELD_BITS);
const size_t bitmaps = (allow_decommit ? 4 : 2);
const size_t bitmaps = (memid.is_pinned ? 2 : 4);
const size_t asize = sizeof(mi_arena_t) + (bitmaps*fields*sizeof(mi_bitmap_field_t));
mi_memid_t meta_memid;
mi_arena_t* arena = (mi_arena_t*)mi_arena_meta_zalloc(asize, &meta_memid, &_mi_stats_main); // TODO: can we avoid allocating from the OS?
@ -810,8 +762,8 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_committed,
// already zero'd due to os_alloc
// _mi_memzero(arena, asize);
arena->id = _mi_arena_id_none();
arena->memid = memid;
arena->exclusive = exclusive;
arena->owned = owned;
arena->meta_size = asize;
arena->meta_memid = meta_memid;
arena->block_count = bcount;
@ -819,16 +771,13 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_committed,
arena->start = (uint8_t*)start;
arena->numa_node = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1)
arena->is_large = is_large;
arena->is_huge_alloc= is_huge_alloc;
arena->is_zero_init = is_zero;
arena->allow_decommit = allow_decommit;
arena->purge_expire = 0;
arena->search_idx = 0;
arena->blocks_dirty = &arena->blocks_inuse[fields]; // just after inuse bitmap
arena->blocks_committed = (!arena->allow_decommit ? NULL : &arena->blocks_inuse[2*fields]); // just after dirty bitmap
arena->blocks_purge = (!arena->allow_decommit ? NULL : &arena->blocks_inuse[3*fields]); // just after committed bitmap
arena->blocks_committed = (arena->memid.is_pinned ? NULL : &arena->blocks_inuse[2*fields]); // just after dirty bitmap
arena->blocks_purge = (arena->memid.is_pinned ? NULL : &arena->blocks_inuse[3*fields]); // just after committed bitmap
// initialize committed bitmap?
if (arena->blocks_committed != NULL && is_committed) {
if (arena->blocks_committed != NULL && arena->memid.was_committed) {
memset((void*)arena->blocks_committed, 0xFF, fields*sizeof(mi_bitmap_field_t)); // cast to void* to avoid atomic warning
}
@ -845,21 +794,23 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_committed,
}
bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept {
return mi_manage_os_memory_ex2(start,size,is_committed,is_large,false,is_zero,numa_node,exclusive,false /* not owned */, arena_id);
mi_memid_t memid = _mi_memid_create(MI_MEM_EXTERNAL);
memid.was_committed = is_committed;
memid.was_zero = is_zero;
memid.is_pinned = is_large;
return mi_manage_os_memory_ex2(start,size,is_large,numa_node,exclusive,memid, arena_id);
}
// Reserve a range of regular OS memory
static int mi_reserve_os_memory_ex2(size_t size, bool commit, bool allow_large, bool exclusive, bool owned, mi_arena_id_t* arena_id) mi_attr_noexcept
{
int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept {
if (arena_id != NULL) *arena_id = _mi_arena_id_none();
size = _mi_align_up(size, MI_ARENA_BLOCK_SIZE); // at least one block
bool is_large = false;
bool is_zero = false;
void* start = _mi_os_alloc_aligned(size, MI_SEGMENT_ALIGN, commit, allow_large, &is_large, &is_zero, &_mi_stats_main);
mi_memid_t memid;
void* start = _mi_os_alloc_aligned(size, MI_SEGMENT_ALIGN, commit, allow_large, &memid, &_mi_stats_main);
if (start == NULL) return ENOMEM;
if (!mi_manage_os_memory_ex2(start, size, (is_large || commit), is_large, false, is_zero, -1, exclusive, owned, arena_id)) {
_mi_os_free_ex(start, size, commit, &_mi_stats_main);
const bool is_large = memid.is_pinned; // todo: use separate is_large field?
if (!mi_manage_os_memory_ex2(start, size, is_large, -1 /* numa node */, exclusive, memid, arena_id)) {
_mi_os_free_ex(start, size, commit, memid, &_mi_stats_main);
_mi_verbose_message("failed to reserve %zu k memory\n", _mi_divide_up(size, 1024));
return ENOMEM;
}
@ -867,11 +818,6 @@ static int mi_reserve_os_memory_ex2(size_t size, bool commit, bool allow_large,
return 0;
}
// Reserve a range of regular OS memory
int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept {
return mi_reserve_os_memory_ex2(size,commit,allow_large,exclusive,true /*owned*/, arena_id);
}
// Manage a range of regular OS memory
bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept {
@ -928,16 +874,16 @@ int mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_m
if (numa_node >= 0) numa_node = numa_node % _mi_os_numa_node_count();
size_t hsize = 0;
size_t pages_reserved = 0;
bool is_zero = false;
void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, timeout_msecs, &pages_reserved, &hsize, &is_zero);
mi_memid_t memid;
void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, timeout_msecs, &pages_reserved, &hsize, &memid);
if (p==NULL || pages_reserved==0) {
_mi_warning_message("failed to reserve %zu GiB huge pages\n", pages);
return ENOMEM;
}
_mi_verbose_message("numa node %i: reserved %zu GiB huge pages (of the %zu GiB requested)\n", numa_node, pages_reserved, pages);
if (!mi_manage_os_memory_ex2(p, hsize, true, true, true, is_zero, numa_node, exclusive, true /* owned */, arena_id)) {
_mi_os_free_huge_os_pages(p, hsize, &_mi_stats_main);
if (!mi_manage_os_memory_ex2(p, hsize, true, numa_node, exclusive, memid, arena_id)) {
_mi_os_free(p, hsize, memid, &_mi_stats_main);
return ENOMEM;
}
return 0;

View file

@ -1,5 +1,5 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2019-2021 Microsoft Research, Daan Leijen
Copyright (c) 2019-2023 Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
@ -11,7 +11,6 @@ represeted as an array of fields where each field is a machine word (`size_t`)
There are two api's; the standard one cannot have sequences that cross
between the bitmap fields (and a sequence must be <= MI_BITMAP_FIELD_BITS).
(this is used in region allocation)
The `_across` postfixed functions do allow sequences that can cross over
between the fields. (This is used in arena allocation)
@ -63,12 +62,12 @@ inline bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, cons
// scan linearly for a free range of zero bits
while (bitidx <= bitidx_max) {
const size_t mapm = map & m;
const size_t mapm = (map & m);
if (mapm == 0) { // are the mask bits free at bitidx?
mi_assert_internal((m >> bitidx) == mask); // no overflow?
const size_t newmap = map | m;
const size_t newmap = (map | m);
mi_assert_internal((newmap^map) >> bitidx == mask);
if (!mi_atomic_cas_weak_acq_rel(field, &map, newmap)) { // TODO: use strong cas here?
if (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)) { // TODO: use weak cas here?
// no success, another thread claimed concurrently.. keep going (with updated `map`)
continue;
}
@ -81,7 +80,8 @@ inline bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, cons
else {
// on to the next bit range
#ifdef MI_HAVE_FAST_BITSCAN
const size_t shift = (count == 1 ? 1 : mi_bsr(mapm) - bitidx + 1);
mi_assert_internal(mapm != 0);
const size_t shift = (count == 1 ? 1 : (MI_INTPTR_BITS - mi_clz(mapm) - bitidx));
mi_assert_internal(shift > 0 && shift <= count);
#else
const size_t shift = 1;
@ -100,7 +100,7 @@ inline bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, cons
bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx) {
size_t idx = start_field_idx;
for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) {
if (idx >= bitmap_fields) idx = 0; // wrap
if (idx >= bitmap_fields) { idx = 0; } // wrap
if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) {
return true;
}
@ -127,14 +127,6 @@ bool _mi_bitmap_try_find_from_claim_pred(mi_bitmap_t bitmap, const size_t bitmap
return false;
}
/*
// Find `count` bits of 0 and set them to 1 atomically; returns `true` on success.
// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never span fields.
bool _mi_bitmap_try_find_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t count, mi_bitmap_index_t* bitmap_idx) {
return _mi_bitmap_try_find_from_claim(bitmap, bitmap_fields, 0, count, bitmap_idx);
}
*/
// Set `count` bits at `bitmap_idx` to 0 atomically
// Returns `true` if all `count` bits were 1 previously.
bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
@ -143,7 +135,7 @@ bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count,
const size_t mask = mi_bitmap_mask_(count, bitidx);
mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
// mi_assert_internal((bitmap[idx] & mask) == mask);
size_t prev = mi_atomic_and_acq_rel(&bitmap[idx], ~mask);
const size_t prev = mi_atomic_and_acq_rel(&bitmap[idx], ~mask);
return ((prev & mask) == mask);
}
@ -157,7 +149,7 @@ bool _mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi
mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
//mi_assert_internal(any_zero != NULL || (bitmap[idx] & mask) == 0);
size_t prev = mi_atomic_or_acq_rel(&bitmap[idx], mask);
if (any_zero != NULL) *any_zero = ((prev & mask) != mask);
if (any_zero != NULL) { *any_zero = ((prev & mask) != mask); }
return ((prev & mask) == 0);
}
@ -167,8 +159,8 @@ static bool mi_bitmap_is_claimedx(mi_bitmap_t bitmap, size_t bitmap_fields, size
const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
const size_t mask = mi_bitmap_mask_(count, bitidx);
mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
size_t field = mi_atomic_load_relaxed(&bitmap[idx]);
if (any_ones != NULL) *any_ones = ((field & mask) != 0);
const size_t field = mi_atomic_load_relaxed(&bitmap[idx]);
if (any_ones != NULL) { *any_ones = ((field & mask) != 0); }
return ((field & mask) == mask);
}
@ -179,10 +171,13 @@ bool _mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count
const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
const size_t mask = mi_bitmap_mask_(count, bitidx);
mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
size_t expected = 0;
if (mi_atomic_cas_strong_acq_rel(&bitmap[idx], &expected, mask)) return true;
size_t expected = mi_atomic_load_relaxed(&bitmap[idx]);
do {
if ((expected & mask) != 0) return false;
return mi_atomic_cas_strong_acq_rel(&bitmap[idx], &expected, expected | mask);
}
while (!mi_atomic_cas_strong_acq_rel(&bitmap[idx], &expected, expected | mask));
mi_assert_internal((expected & mask) == 0);
return true;
}
@ -204,6 +199,7 @@ bool _mi_bitmap_is_any_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t
// Try to atomically claim a sequence of `count` bits starting from the field
// at `idx` in `bitmap` and crossing into subsequent fields. Returns `true` on success.
// Only needs to consider crossing into the next fields (see `mi_bitmap_try_find_from_claim_across`)
static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t idx, const size_t count, const size_t retries, mi_bitmap_index_t* bitmap_idx)
{
mi_assert_internal(bitmap_idx != NULL);
@ -214,7 +210,7 @@ static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bit
const size_t initial = mi_clz(map); // count of initial zeros starting at idx
mi_assert_internal(initial <= MI_BITMAP_FIELD_BITS);
if (initial == 0) return false;
if (initial >= count) return _mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx); // no need to cross fields
if (initial >= count) return _mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx); // no need to cross fields (this case won't happen for us)
if (_mi_divide_up(count - initial, MI_BITMAP_FIELD_BITS) >= (bitmap_fields - idx)) return false; // not enough entries
// scan ahead
@ -224,25 +220,27 @@ static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bit
field++;
map = mi_atomic_load_relaxed(field);
const size_t mask_bits = (found + MI_BITMAP_FIELD_BITS <= count ? MI_BITMAP_FIELD_BITS : (count - found));
mi_assert_internal(mask_bits > 0 && mask_bits <= MI_BITMAP_FIELD_BITS);
mask = mi_bitmap_mask_(mask_bits, 0);
if ((map & mask) != 0) return false;
if ((map & mask) != 0) return false; // some part is already claimed
found += mask_bits;
}
mi_assert_internal(field < &bitmap[bitmap_fields]);
// found range of zeros up to the final field; mask contains mask in the final field
// now claim it atomically
// we found a range of contiguous zeros up to the final field; mask contains mask in the final field
// now try to claim the range atomically
mi_bitmap_field_t* const final_field = field;
const size_t final_mask = mask;
mi_bitmap_field_t* const initial_field = &bitmap[idx];
const size_t initial_mask = mi_bitmap_mask_(initial, MI_BITMAP_FIELD_BITS - initial);
const size_t initial_idx = MI_BITMAP_FIELD_BITS - initial;
const size_t initial_mask = mi_bitmap_mask_(initial, initial_idx);
// initial field
size_t newmap;
field = initial_field;
map = mi_atomic_load_relaxed(field);
do {
newmap = map | initial_mask;
newmap = (map | initial_mask);
if ((map & initial_mask) != 0) { goto rollback; };
} while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap));
@ -257,31 +255,32 @@ static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bit
mi_assert_internal(field == final_field);
map = mi_atomic_load_relaxed(field);
do {
newmap = map | final_mask;
newmap = (map | final_mask);
if ((map & final_mask) != 0) { goto rollback; }
} while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap));
// claimed!
*bitmap_idx = mi_bitmap_index_create(idx, MI_BITMAP_FIELD_BITS - initial);
*bitmap_idx = mi_bitmap_index_create(idx, initial_idx);
return true;
rollback:
// roll back intermediate fields
// (we just failed to claim `field` so decrement first)
while (--field > initial_field) {
newmap = 0;
map = MI_BITMAP_FIELD_FULL;
mi_assert_internal(mi_atomic_load_relaxed(field) == map);
mi_atomic_store_release(field, newmap);
}
if (field == initial_field) {
if (field == initial_field) { // (if we failed on the initial field, `field + 1 == initial_field`)
map = mi_atomic_load_relaxed(field);
do {
mi_assert_internal((map & initial_mask) == initial_mask);
newmap = map & ~initial_mask;
newmap = (map & ~initial_mask);
} while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap));
}
// retry? (we make a recursive call instead of goto to be able to use const declarations)
if (retries < 4) {
if (retries <= 2) {
return mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, retries+1, bitmap_idx);
}
else {
@ -294,17 +293,22 @@ rollback:
// Starts at idx, and wraps around to search in all `bitmap_fields` fields.
bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx) {
mi_assert_internal(count > 0);
if (count==1) return _mi_bitmap_try_find_from_claim(bitmap, bitmap_fields, start_field_idx, count, bitmap_idx);
if (count <= 2) {
// we don't bother with crossover fields for small counts
return _mi_bitmap_try_find_from_claim(bitmap, bitmap_fields, start_field_idx, count, bitmap_idx);
}
// visit the fields
size_t idx = start_field_idx;
for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) {
if (idx >= bitmap_fields) idx = 0; // wrap
// try to claim inside the field
if (idx >= bitmap_fields) { idx = 0; } // wrap
// first try to claim inside a field
if (count <= MI_BITMAP_FIELD_BITS) {
if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) {
return true;
}
}
// try to claim across fields
// if that fails, then try to claim across fields
if (mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, 0, bitmap_idx)) {
return true;
}
@ -347,14 +351,14 @@ bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t
size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask);
bool all_one = true;
mi_bitmap_field_t* field = &bitmap[idx];
size_t prev = mi_atomic_and_acq_rel(field++, ~pre_mask);
size_t prev = mi_atomic_and_acq_rel(field++, ~pre_mask); // clear first part
if ((prev & pre_mask) != pre_mask) all_one = false;
while(mid_count-- > 0) {
prev = mi_atomic_and_acq_rel(field++, ~mid_mask);
prev = mi_atomic_and_acq_rel(field++, ~mid_mask); // clear mid part
if ((prev & mid_mask) != mid_mask) all_one = false;
}
if (post_mask!=0) {
prev = mi_atomic_and_acq_rel(field, ~post_mask);
prev = mi_atomic_and_acq_rel(field, ~post_mask); // clear end part
if ((prev & post_mask) != post_mask) all_one = false;
}
return all_one;
@ -384,7 +388,7 @@ bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t co
if ((prev & post_mask) != 0) all_zero = false;
if ((prev & post_mask) != post_mask) any_zero = true;
}
if (pany_zero != NULL) *pany_zero = any_zero;
if (pany_zero != NULL) { *pany_zero = any_zero; }
return all_zero;
}
@ -413,7 +417,7 @@ static bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_field
if ((prev & post_mask) != post_mask) all_ones = false;
if ((prev & post_mask) != 0) any_ones = true;
}
if (pany_ones != NULL) *pany_ones = any_ones;
if (pany_ones != NULL) { *pany_ones = any_ones; }
return all_ones;
}

View file

@ -203,6 +203,7 @@ mi_heap_t* _mi_heap_main_get(void) {
typedef struct mi_thread_data_s {
mi_heap_t heap; // must come first due to cast in `_mi_heap_done`
mi_tld_t tld;
mi_memid_t memid;
} mi_thread_data_t;
@ -231,15 +232,20 @@ static mi_thread_data_t* mi_thread_data_zalloc(void) {
// if that fails, allocate as meta data
if (td == NULL) {
td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &is_zero, &_mi_stats_main);
mi_memid_t memid;
td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &memid, &_mi_stats_main);
if (td == NULL) {
// if this fails, try once more. (issue #257)
td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &is_zero, &_mi_stats_main);
td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &memid, &_mi_stats_main);
if (td == NULL) {
// really out of memory
_mi_error_message(ENOMEM, "unable to allocate thread local heap metadata (%zu bytes)\n", sizeof(mi_thread_data_t));
}
}
if (td != NULL) {
td->memid = memid;
is_zero = memid.was_zero;
}
}
if (td != NULL && !is_zero) {
@ -260,7 +266,7 @@ static void mi_thread_data_free( mi_thread_data_t* tdfree ) {
}
}
// if that fails, just free it directly
_mi_os_free(tdfree, sizeof(mi_thread_data_t), &_mi_stats_main);
_mi_os_free(tdfree, sizeof(mi_thread_data_t), tdfree->memid, &_mi_stats_main);
}
void _mi_thread_data_collect(void) {
@ -270,7 +276,7 @@ void _mi_thread_data_collect(void) {
if (td != NULL) {
td = mi_atomic_exchange_ptr_acq_rel(mi_thread_data_t, &td_cache[i], NULL);
if (td != NULL) {
_mi_os_free( td, sizeof(mi_thread_data_t), &_mi_stats_main );
_mi_os_free(td, sizeof(mi_thread_data_t), td->memid, &_mi_stats_main);
}
}
}

115
src/os.c
View file

@ -137,7 +137,9 @@ void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size) {
Free memory
-------------------------------------------------------------- */
static void mi_os_mem_free(void* addr, size_t size, bool was_committed, mi_stats_t* tld_stats) {
static void mi_os_free_huge_os_pages(void* p, size_t size, mi_stats_t* stats);
static void mi_os_prim_free(void* addr, size_t size, bool still_committed, mi_stats_t* tld_stats) {
MI_UNUSED(tld_stats);
mi_assert_internal((size % _mi_os_page_size()) == 0);
if (addr == NULL || size == 0) return; // || _mi_os_is_huge_reserved(addr)
@ -146,18 +148,34 @@ static void mi_os_mem_free(void* addr, size_t size, bool was_committed, mi_stats
_mi_warning_message("unable to free OS memory (error: %d (0x%x), size: 0x%zx bytes, address: %p)\n", err, err, size, addr);
}
mi_stats_t* stats = &_mi_stats_main;
if (was_committed) { _mi_stat_decrease(&stats->committed, size); }
if (still_committed) { _mi_stat_decrease(&stats->committed, size); }
_mi_stat_decrease(&stats->reserved, size);
}
void _mi_os_free_ex(void* addr, size_t size, bool was_committed, mi_stats_t* tld_stats) {
const size_t csize = _mi_os_good_alloc_size(size);
mi_os_mem_free(addr,csize,was_committed,tld_stats);
void _mi_os_free_ex(void* addr, size_t size, bool still_committed, mi_memid_t memid, mi_stats_t* tld_stats) {
size_t csize = _mi_os_good_alloc_size(size);
if (memid.memkind == MI_MEM_OS) {
if (memid.mem.os.base != NULL) {
mi_assert(memid.mem.os.base <= addr);
csize += ((uint8_t*)addr - (uint8_t*)memid.mem.os.base);
mi_os_prim_free(memid.mem.os.base, csize, still_committed, tld_stats);
}
else {
mi_os_prim_free(addr, csize, still_committed, tld_stats);
}
}
else if (memid.memkind == MI_MEM_OS_HUGE) {
mi_assert(memid.is_pinned);
mi_os_free_huge_os_pages(addr, size, tld_stats);
}
else {
// nothing to do
mi_assert(memid.memkind <= MI_MEM_EXTERNAL);
}
}
void _mi_os_free(void* p, size_t size, mi_stats_t* tld_stats) {
_mi_os_free_ex(p, size, true, tld_stats);
void _mi_os_free(void* p, size_t size, mi_memid_t memid, mi_stats_t* tld_stats) {
_mi_os_free_ex(p, size, true, memid, tld_stats);
}
@ -166,7 +184,7 @@ void _mi_os_free(void* p, size_t size, mi_stats_t* tld_stats) {
-------------------------------------------------------------- */
// Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned.
static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* stats) {
static void* mi_os_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* stats) {
mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0);
mi_assert_internal(is_zero != NULL);
mi_assert_internal(is_large != NULL);
@ -191,7 +209,7 @@ static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, boo
// Primitive aligned allocation from the OS.
// This function guarantees the allocated memory is aligned.
static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* stats) {
static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* stats) {
mi_assert_internal(alignment >= _mi_os_page_size() && ((alignment & (alignment - 1)) == 0));
mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0);
mi_assert_internal(is_large != NULL);
@ -201,19 +219,19 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit,
size = _mi_align_up(size, _mi_os_page_size());
// try first with a hint (this will be aligned directly on Win 10+ or BSD)
void* p = mi_os_mem_alloc(size, alignment, commit, allow_large, is_large, is_zero, stats);
void* p = mi_os_prim_alloc(size, alignment, commit, allow_large, is_large, is_zero, stats);
if (p == NULL) return NULL;
// if not aligned, free it, overallocate, and unmap around it
if (((uintptr_t)p % alignment != 0)) {
_mi_warning_message("unable to allocate aligned OS memory directly, fall back to over-allocation (size: 0x%zx bytes, address: %p, alignment: 0x%zx, commit: %d)\n", size, p, alignment, commit);
mi_os_mem_free(p, size, commit, stats);
mi_os_prim_free(p, size, commit, stats);
if (size >= (SIZE_MAX - alignment)) return NULL; // overflow
const size_t over_size = size + alignment;
if (mi_os_mem_config.must_free_whole) { // win32 virtualAlloc cannot free parts of an allocate block
// over-allocate uncommitted (virtual) memory
p = mi_os_mem_alloc(over_size, 0 /*alignment*/, false /* commit? */, false /* allow_large */, is_large, is_zero, stats);
p = mi_os_prim_alloc(over_size, 0 /*alignment*/, false /* commit? */, false /* allow_large */, is_large, is_zero, stats);
if (p == NULL) return NULL;
// set p to the aligned part in the full region
@ -228,7 +246,7 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit,
}
else { // mmap can free inside an allocation
// overallocate...
p = mi_os_mem_alloc(over_size, 1, commit, false, is_large, is_zero, stats);
p = mi_os_prim_alloc(over_size, 1, commit, false, is_large, is_zero, stats);
if (p == NULL) return NULL;
// and selectively unmap parts around the over-allocated area. (noop on sbrk)
void* aligned_p = mi_align_up_ptr(p, alignment);
@ -236,8 +254,8 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit,
size_t mid_size = _mi_align_up(size, _mi_os_page_size());
size_t post_size = over_size - pre_size - mid_size;
mi_assert_internal(pre_size < over_size&& post_size < over_size&& mid_size >= size);
if (pre_size > 0) mi_os_mem_free(p, pre_size, commit, stats);
if (post_size > 0) mi_os_mem_free((uint8_t*)aligned_p + mid_size, post_size, commit, stats);
if (pre_size > 0) mi_os_prim_free(p, pre_size, commit, stats);
if (post_size > 0) mi_os_prim_free((uint8_t*)aligned_p + mid_size, post_size, commit, stats);
// we can return the aligned pointer on `mmap` (and sbrk) systems
p = aligned_p;
}
@ -252,31 +270,38 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit,
OS API: alloc and alloc_aligned
----------------------------------------------------------- */
void* _mi_os_alloc(size_t size, bool* is_zero, mi_stats_t* tld_stats) {
void* _mi_os_alloc(size_t size, mi_memid_t* memid, mi_stats_t* tld_stats) {
MI_UNUSED(tld_stats);
*memid = _mi_memid_none();
mi_stats_t* stats = &_mi_stats_main;
if (size == 0) return NULL;
size = _mi_os_good_alloc_size(size);
bool is_large = false;
bool is_zerox = false;
void* p = mi_os_mem_alloc(size, 0, true, false, &is_large, &is_zerox, stats);
if (is_zero != NULL) { *is_zero = is_zerox; }
bool os_is_large = false;
bool os_is_zero = false;
void* p = mi_os_prim_alloc(size, 0, true, false, &os_is_large, &os_is_zero, stats);
if (p != NULL) {
*memid = _mi_memid_create_os(true, os_is_zero);
memid->is_pinned = os_is_large;
}
return p;
}
void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* tld_stats)
void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* tld_stats)
{
MI_UNUSED(&_mi_os_get_aligned_hint); // suppress unused warnings
MI_UNUSED(tld_stats);
*memid = _mi_memid_none();
if (size == 0) return NULL;
size = _mi_os_good_alloc_size(size);
alignment = _mi_align_up(alignment, _mi_os_page_size());
bool os_is_large = false;
bool os_is_zero = false;
void* p = mi_os_mem_alloc_aligned(size, alignment, commit, allow_large, &os_is_large, &os_is_zero, &_mi_stats_main /*tld->stats*/ );
if (is_large != NULL) { *is_large = os_is_large; }
if (is_zero != NULL) { *is_zero = os_is_zero; }
void* p = mi_os_prim_alloc_aligned(size, alignment, commit, allow_large, &os_is_large, &os_is_zero, &_mi_stats_main /*tld->stats*/ );
if (p != NULL) {
*memid = _mi_memid_create_os(commit, os_is_zero);
memid->is_pinned = os_is_large;
}
return p;
}
@ -288,22 +313,24 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allo
to use the actual start of the memory region.
----------------------------------------------------------- */
void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t offset, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* tld_stats) {
void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t offset, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* tld_stats) {
mi_assert(offset <= MI_SEGMENT_SIZE);
mi_assert(offset <= size);
mi_assert((alignment % _mi_os_page_size()) == 0);
*memid = _mi_memid_none();
if (offset > MI_SEGMENT_SIZE) return NULL;
if (offset == 0) {
// regular aligned allocation
return _mi_os_alloc_aligned(size, alignment, commit, allow_large, is_large, is_zero, tld_stats);
return _mi_os_alloc_aligned(size, alignment, commit, allow_large, memid, tld_stats);
}
else {
// overallocate to align at an offset
const size_t extra = _mi_align_up(offset, alignment) - offset;
const size_t oversize = size + extra;
void* start = _mi_os_alloc_aligned(oversize, alignment, commit, allow_large, is_large, is_zero, tld_stats);
void* const start = _mi_os_alloc_aligned(oversize, alignment, commit, allow_large, memid, tld_stats);
if (start == NULL) return NULL;
void* p = (uint8_t*)start + extra;
memid->mem.os.base = start;
void* const p = (uint8_t*)start + extra;
mi_assert(_mi_is_aligned((uint8_t*)p + offset, alignment));
// decommit the overallocation at the start
if (commit && extra > _mi_os_page_size()) {
@ -313,14 +340,6 @@ void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t offse
}
}
void _mi_os_free_aligned_at_offset(void* p, size_t size, size_t alignment, size_t align_offset, bool was_committed, mi_stats_t* tld_stats) {
mi_assert(align_offset <= MI_SEGMENT_SIZE);
const size_t extra = _mi_align_up(align_offset, alignment) - align_offset;
void* start = (uint8_t*)p - extra;
_mi_os_free_ex(start, size + extra, was_committed, tld_stats);
}
/* -----------------------------------------------------------
OS memory API: reset, commit, decommit, protect, unprotect.
----------------------------------------------------------- */
@ -526,7 +545,8 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {
#endif
// Allocate MI_SEGMENT_SIZE aligned huge pages
void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_msecs, size_t* pages_reserved, size_t* psize, bool* is_zero) {
void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_msecs, size_t* pages_reserved, size_t* psize, mi_memid_t* memid) {
*memid = _mi_memid_none();
if (psize != NULL) *psize = 0;
if (pages_reserved != NULL) *pages_reserved = 0;
size_t size = 0;
@ -541,11 +561,11 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse
bool all_zero = true;
while (page < pages) {
// allocate a page
bool is_zerox = false;
bool is_zero = false;
void* addr = start + (page * MI_HUGE_OS_PAGE_SIZE);
void* p = NULL;
int err = _mi_prim_alloc_huge_os_pages(addr, MI_HUGE_OS_PAGE_SIZE, numa_node, &is_zerox, &p);
if (!is_zerox) { all_zero = false; }
int err = _mi_prim_alloc_huge_os_pages(addr, MI_HUGE_OS_PAGE_SIZE, numa_node, &is_zero, &p);
if (!is_zero) { all_zero = false; }
if (err != 0) {
_mi_warning_message("unable to allocate huge OS page (error: %d (0x%x), address: %p, size: %zx bytes)\n", err, err, addr, MI_HUGE_OS_PAGE_SIZE);
break;
@ -556,7 +576,7 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse
// no success, issue a warning and break
if (p != NULL) {
_mi_warning_message("could not allocate contiguous huge OS page %zu at %p\n", page, addr);
_mi_os_free(p, MI_HUGE_OS_PAGE_SIZE, &_mi_stats_main);
mi_os_prim_free(p, MI_HUGE_OS_PAGE_SIZE, true, &_mi_stats_main);
}
break;
}
@ -584,17 +604,22 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse
mi_assert_internal(page*MI_HUGE_OS_PAGE_SIZE <= size);
if (pages_reserved != NULL) { *pages_reserved = page; }
if (psize != NULL) { *psize = page * MI_HUGE_OS_PAGE_SIZE; }
if (is_zero != NULL) { *is_zero = all_zero; }
if (page != 0) {
mi_assert(start != NULL);
*memid = _mi_memid_create_os(true, all_zero);
memid->memkind = MI_MEM_OS_HUGE;
memid->is_pinned = true;
}
return (page == 0 ? NULL : start);
}
// free every huge page in a range individually (as we allocated per page)
// note: needed with VirtualAlloc but could potentially be done in one go on mmap'd systems.
void _mi_os_free_huge_os_pages(void* p, size_t size, mi_stats_t* stats) {
static void mi_os_free_huge_os_pages(void* p, size_t size, mi_stats_t* stats) {
if (p==NULL || size==0) return;
uint8_t* base = (uint8_t*)p;
while (size >= MI_HUGE_OS_PAGE_SIZE) {
_mi_os_free(base, MI_HUGE_OS_PAGE_SIZE, stats);
mi_os_prim_free(base, MI_HUGE_OS_PAGE_SIZE, true, stats);
size -= MI_HUGE_OS_PAGE_SIZE;
base += MI_HUGE_OS_PAGE_SIZE;
}

View file

@ -641,11 +641,6 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld)
// enable the new free list
page->capacity += (uint16_t)extend;
mi_stat_increase(tld->stats.page_committed, extend * bsize);
// extension into zero initialized memory preserves the zero'd free list
if (!page->is_zero_init) {
page->is_zero = false;
}
mi_assert_expensive(mi_page_is_valid_init(page));
}
@ -671,10 +666,11 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
page->keys[0] = _mi_heap_random_next(heap);
page->keys[1] = _mi_heap_random_next(heap);
#endif
#if MI_DEBUG > 0
page->is_zero = false; // ensure in debug mode we initialize with MI_DEBUG_UNINIT, see issue #501
#else
page->is_zero = page->is_zero_init;
#if MI_DEBUG>1
if (page->is_zero_init) {
mi_assert(mi_mem_is_zero(page_start, page_size));
}
#endif
mi_assert_internal(page->is_committed);

View file

@ -437,7 +437,7 @@ int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bo
int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) {
MI_UNUSED(hint_addr); MI_UNUSED(size); MI_UNUSED(numa_node);
*is_zero = true;
*is_zero = false;
*addr = NULL;
return ENOMEM;
}

View file

@ -125,7 +125,7 @@ static mi_segment_t* _mi_segment_of(const void* p) {
// Is this a valid pointer in our heap?
static bool mi_is_valid_pointer(const void* p) {
return (_mi_segment_of(p) != NULL);
return ((_mi_segment_of(p) != NULL) || (_mi_arena_contains(p)));
}
mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {

View file

@ -998,7 +998,7 @@ static mi_slice_t* mi_segment_page_clear(mi_page_t* page, mi_segments_tld_t* tld
// zero the page data, but not the segment fields
page->is_zero_init = false;
ptrdiff_t ofs = offsetof(mi_page_t, capacity);
memset((uint8_t*)page + ofs, 0, sizeof(*page) - ofs);
_mi_memzero((uint8_t*)page + ofs, sizeof(*page) - ofs);
page->xblock_size = 1;
// and free it

View file

@ -20,6 +20,7 @@ terms of the MIT license.
#include <stdint.h>
#include <stdbool.h>
#include <string.h>
#include <assert.h>
// > mimalloc-test-stress [THREADS] [SCALE] [ITER]
//
@ -38,18 +39,18 @@ static int ITER = 50; // N full iterations destructing and re-creating a
#define STRESS // undefine for leak test
static bool allow_large_objects = true; // allow very large objects?
static bool allow_large_objects = false; // allow very large objects? (set to `true` if SCALE>100)
static size_t use_one_size = 0; // use single object size of `N * sizeof(uintptr_t)`?
// #define USE_STD_MALLOC
#ifdef USE_STD_MALLOC
#define custom_calloc(n,s) malloc(n*s)
#define custom_calloc(n,s) calloc(n,s)
#define custom_realloc(p,s) realloc(p,s)
#define custom_free(p) free(p)
#else
#include <mimalloc.h>
#define custom_calloc(n,s) mi_malloc(n*s)
#define custom_calloc(n,s) mi_calloc(n,s)
#define custom_realloc(p,s) mi_realloc(p,s)
#define custom_free(p) mi_free(p)
#endif
@ -106,6 +107,7 @@ static void* alloc_items(size_t items, random_t r) {
uintptr_t* p = (uintptr_t*)custom_calloc(items,sizeof(uintptr_t));
if (p != NULL) {
for (uintptr_t i = 0; i < items; i++) {
assert(p[i] == 0);
p[i] = (items - i) ^ cookie;
}
}
@ -247,7 +249,10 @@ int main(int argc, char** argv) {
long n = (strtol(argv[3], &end, 10));
if (n > 0) ITER = n;
}
printf("Using %d threads with a %d%% load-per-thread and %d iterations\n", THREADS, SCALE, ITER);
if (SCALE > 100) {
allow_large_objects = true;
}
printf("Using %d threads with a %d%% load-per-thread and %d iterations %s\n", THREADS, SCALE, ITER, (allow_large_objects ? "(allow large objects)" : ""));
//mi_reserve_os_memory(1024*1024*1024ULL, false, true);
//int res = mi_reserve_huge_os_pages(4,1);
//printf("(reserve huge: %i\n)", res);