merge from dev

This commit is contained in:
daanx 2023-04-18 17:33:01 -07:00
commit fb07276d48
12 changed files with 253 additions and 241 deletions

View file

@ -88,9 +88,9 @@ void _mi_thread_data_collect(void);
// os.c // os.c
void _mi_os_init(void); // called from process init void _mi_os_init(void); // called from process init
void* _mi_os_alloc(size_t size, bool* is_zero, mi_stats_t* stats); void* _mi_os_alloc(size_t size, mi_memid_t* memid, mi_stats_t* stats);
void _mi_os_free(void* p, size_t size, mi_stats_t* stats); void _mi_os_free(void* p, size_t size, mi_memid_t memid, mi_stats_t* stats);
void _mi_os_free_ex(void* p, size_t size, bool is_committed, mi_stats_t* stats); void _mi_os_free_ex(void* p, size_t size, bool still_committed, mi_memid_t memid, mi_stats_t* stats);
size_t _mi_os_page_size(void); size_t _mi_os_page_size(void);
size_t _mi_os_good_alloc_size(size_t size); size_t _mi_os_good_alloc_size(size_t size);
@ -106,16 +106,14 @@ bool _mi_os_unprotect(void* addr, size_t size);
bool _mi_os_purge(void* p, size_t size, mi_stats_t* stats); bool _mi_os_purge(void* p, size_t size, mi_stats_t* stats);
bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, mi_stats_t* stats); bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, mi_stats_t* stats);
void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* stats); void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* stats);
void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* tld_stats); void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* tld_stats);
void _mi_os_free_aligned_at_offset(void* p, size_t size, size_t alignment, size_t align_offset, bool was_committed, mi_stats_t* tld_stats);
void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size); void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size);
bool _mi_os_use_large_page(size_t size, size_t alignment); bool _mi_os_use_large_page(size_t size, size_t alignment);
size_t _mi_os_large_page_size(void); size_t _mi_os_large_page_size(void);
void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize, bool* is_zero); void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize, mi_memid_t* memid);
void _mi_os_free_huge_os_pages(void* p, size_t size, mi_stats_t* stats);
// arena.c // arena.c
mi_arena_id_t _mi_arena_id_none(void); mi_arena_id_t _mi_arena_id_none(void);
@ -271,6 +269,10 @@ bool _mi_page_is_valid(mi_page_t* page);
#define MI_INIT256(x) MI_INIT128(x),MI_INIT128(x) #define MI_INIT256(x) MI_INIT128(x),MI_INIT128(x)
#include <string.h>
// initialize a local variable to zero; use memset as compilers optimize constant sized memset's
#define _mi_memzero_var(x) memset(&x,0,sizeof(x))
// Is `x` a power of two? (0 is considered a power of two) // Is `x` a power of two? (0 is considered a power of two)
static inline bool _mi_is_power_of_two(uintptr_t x) { static inline bool _mi_is_power_of_two(uintptr_t x) {
return ((x & (x - 1)) == 0); return ((x & (x - 1)) == 0);
@ -313,7 +315,7 @@ static inline uintptr_t _mi_divide_up(uintptr_t size, size_t divider) {
} }
// Is memory zero initialized? // Is memory zero initialized?
static inline bool mi_mem_is_zero(void* p, size_t size) { static inline bool mi_mem_is_zero(const void* p, size_t size) {
for (size_t i = 0; i < size; i++) { for (size_t i = 0; i < size; i++) {
if (((uint8_t*)p)[i] != 0) return false; if (((uint8_t*)p)[i] != 0) return false;
} }
@ -732,6 +734,28 @@ size_t _mi_commit_mask_next_run(const mi_commit_mask_t* cm, size_t* idx);
/* -----------------------------------------------------------
memory id's
----------------------------------------------------------- */
static inline mi_memid_t _mi_memid_create(mi_memkind_t memkind) {
mi_memid_t memid;
_mi_memzero_var(memid);
memid.memkind = memkind;
return memid;
}
static inline mi_memid_t _mi_memid_none(void) {
return _mi_memid_create(MI_MEM_NONE);
}
static inline mi_memid_t _mi_memid_create_os(bool committed, bool is_zero) {
mi_memid_t memid = _mi_memid_create(MI_MEM_OS);
memid.was_committed = committed;
memid.was_zero = is_zero;
return memid;
}
// ------------------------------------------------------------------- // -------------------------------------------------------------------
// Fast "random" shuffle // Fast "random" shuffle
@ -892,7 +916,6 @@ static inline size_t mi_bsr(uintptr_t x) {
#if !MI_TRACK_ENABLED && defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64)) #if !MI_TRACK_ENABLED && defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64))
#include <intrin.h> #include <intrin.h>
#include <string.h>
extern bool _mi_cpu_has_fsrm; extern bool _mi_cpu_has_fsrm;
static inline void _mi_memcpy(void* dst, const void* src, size_t n) { static inline void _mi_memcpy(void* dst, const void* src, size_t n) {
if (_mi_cpu_has_fsrm) { if (_mi_cpu_has_fsrm) {
@ -911,7 +934,6 @@ static inline void _mi_memzero(void* dst, size_t n) {
} }
} }
#else #else
#include <string.h>
static inline void _mi_memcpy(void* dst, const void* src, size_t n) { static inline void _mi_memcpy(void* dst, const void* src, size_t n) {
memcpy(dst, src, n); memcpy(dst, src, n);
} }
@ -920,9 +942,6 @@ static inline void _mi_memzero(void* dst, size_t n) {
} }
#endif #endif
// initialize a local variable to zero; use memset as compilers optimize constant sized memset's
#define _mi_memzero_var(x) memset(&x,0,sizeof(x))
// ------------------------------------------------------------------------------- // -------------------------------------------------------------------------------
// The `_mi_memcpy_aligned` can be used if the pointers are machine-word aligned // The `_mi_memcpy_aligned` can be used if the pointers are machine-word aligned
// This is used for example in `mi_realloc`. // This is used for example in `mi_realloc`.
@ -930,7 +949,6 @@ static inline void _mi_memzero(void* dst, size_t n) {
#if (defined(__GNUC__) && (__GNUC__ >= 4)) || defined(__clang__) #if (defined(__GNUC__) && (__GNUC__ >= 4)) || defined(__clang__)
// On GCC/CLang we provide a hint that the pointers are word aligned. // On GCC/CLang we provide a hint that the pointers are word aligned.
#include <string.h>
static inline void _mi_memcpy_aligned(void* dst, const void* src, size_t n) { static inline void _mi_memcpy_aligned(void* dst, const void* src, size_t n) {
mi_assert_internal(((uintptr_t)dst % MI_INTPTR_SIZE == 0) && ((uintptr_t)src % MI_INTPTR_SIZE == 0)); mi_assert_internal(((uintptr_t)dst % MI_INTPTR_SIZE == 0) && ((uintptr_t)src % MI_INTPTR_SIZE == 0));
void* adst = __builtin_assume_aligned(dst, MI_INTPTR_SIZE); void* adst = __builtin_assume_aligned(dst, MI_INTPTR_SIZE);

View file

@ -293,7 +293,7 @@ typedef struct mi_page_s {
uint32_t slice_count; // slices in this page (0 if not a page) uint32_t slice_count; // slices in this page (0 if not a page)
uint32_t slice_offset; // distance from the actual page data slice (0 if a page) uint32_t slice_offset; // distance from the actual page data slice (0 if a page)
uint8_t is_committed : 1; // `true` if the page virtual memory is committed uint8_t is_committed : 1; // `true` if the page virtual memory is committed
uint8_t is_zero_init : 1; // `true` if the page was zero initialized uint8_t is_zero_init : 1; // `true` if the page was initially zero initialized
// layout like this to optimize access in `mi_malloc` and `mi_free` // layout like this to optimize access in `mi_malloc` and `mi_free`
uint16_t capacity; // number of blocks committed, must be the first field, see `segment.c:page_clear` uint16_t capacity; // number of blocks committed, must be the first field, see `segment.c:page_clear`
@ -377,12 +377,17 @@ typedef enum mi_memkind_e {
MI_MEM_EXTERNAL, // not owned by mimalloc but provided externally (via `mi_manage_os_memory` for example) MI_MEM_EXTERNAL, // not owned by mimalloc but provided externally (via `mi_manage_os_memory` for example)
MI_MEM_STATIC, // allocated in a static area and should not be freed (for arena meta data for example) MI_MEM_STATIC, // allocated in a static area and should not be freed (for arena meta data for example)
MI_MEM_OS, // allocated from the OS MI_MEM_OS, // allocated from the OS
MI_MEM_OS_HUGE, // allocated as huge os pages
MI_MEM_OS_REMAP, // allocated in a remapable area (i.e. using `mremap`)
MI_MEM_ARENA // allocated from an arena (the usual case) MI_MEM_ARENA // allocated from an arena (the usual case)
} mi_memkind_t; } mi_memkind_t;
static inline bool mi_memkind_is_os(mi_memkind_t memkind) {
return (memkind >= MI_MEM_OS && memkind <= MI_MEM_OS_REMAP);
}
typedef struct mi_memid_os_info { typedef struct mi_memid_os_info {
size_t alignment; // allocated with the given alignment void* base; // actual base address of the block (used for offset aligned allocations)
size_t align_offset; // the offset that was aligned (used only for huge aligned pages)
} mi_memid_os_info_t; } mi_memid_os_info_t;
typedef struct mi_memid_arena_info { typedef struct mi_memid_arena_info {

View file

@ -46,12 +46,17 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz
// zero the block? note: we need to zero the full block size (issue #63) // zero the block? note: we need to zero the full block size (issue #63)
if mi_unlikely(zero) { if mi_unlikely(zero) {
mi_assert_internal(page->xblock_size != 0); // do not call with zero'ing for huge blocks (see _mi_malloc_generic) mi_assert_internal(page->xblock_size != 0); // do not call with zero'ing for huge blocks (see _mi_malloc_generic)
const size_t zsize = (page->is_zero ? sizeof(block->next) + MI_PADDING_SIZE : page->xblock_size); if (page->is_zero) {
_mi_memzero_aligned(block, zsize - MI_PADDING_SIZE); block->next = 0;
}
else {
mi_assert_internal(page->xblock_size >= MI_PADDING_SIZE);
_mi_memzero_aligned(block, page->xblock_size - MI_PADDING_SIZE);
}
} }
#if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN #if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN
if (!page->is_zero && !zero && !mi_page_is_huge(page)) { if (!zero && !mi_page_is_huge(page)) {
memset(block, MI_DEBUG_UNINIT, mi_page_usable_block_size(page)); memset(block, MI_DEBUG_UNINIT, mi_page_usable_block_size(page));
} }
#elif (MI_SECURE!=0) #elif (MI_SECURE!=0)
@ -110,6 +115,7 @@ static inline mi_decl_restrict void* mi_heap_malloc_small_zero(mi_heap_t* heap,
mi_heap_stat_increase(heap, malloc, mi_usable_size(p)); mi_heap_stat_increase(heap, malloc, mi_usable_size(p));
} }
#endif #endif
if (zero && p != NULL) { mi_assert_internal(mi_mem_is_zero(p, size)); }
return p; return p;
} }
@ -139,6 +145,7 @@ extern inline void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool z
mi_heap_stat_increase(heap, malloc, mi_usable_size(p)); mi_heap_stat_increase(heap, malloc, mi_usable_size(p));
} }
#endif #endif
if (zero && p != NULL) { mi_assert_internal(mi_mem_is_zero(p, size)); }
return p; return p;
} }
} }

View file

@ -41,18 +41,15 @@ typedef uintptr_t mi_block_info_t;
// A memory arena descriptor // A memory arena descriptor
typedef struct mi_arena_s { typedef struct mi_arena_s {
mi_arena_id_t id; // arena id; 0 for non-specific mi_arena_id_t id; // arena id; 0 for non-specific
bool exclusive; // only allow allocations if specifically for this arena mi_memid_t memid; // memid of the memory area
bool owned; // if true, the arena will be released when the process exits if `mi_option_destroy_on_exit` is set.
_Atomic(uint8_t*) start; // the start of the memory area _Atomic(uint8_t*) start; // the start of the memory area
size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`) size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`)
size_t field_count; // number of bitmap fields (where `field_count * MI_BITMAP_FIELD_BITS >= block_count`) size_t field_count; // number of bitmap fields (where `field_count * MI_BITMAP_FIELD_BITS >= block_count`)
size_t meta_size; // size of the arena structure itself (including its bitmaps) size_t meta_size; // size of the arena structure itself (including its bitmaps)
mi_memid_t meta_memid; // memid of the arena structure itself (OS or static allocation) mi_memid_t meta_memid; // memid of the arena structure itself (OS or static allocation)
int numa_node; // associated NUMA node int numa_node; // associated NUMA node
bool is_zero_init; // is the arena zero initialized? bool exclusive; // only allow allocations if specifically for this arena
bool is_large; // large- or huge OS pages (always committed) bool is_large; // memory area consists of large- or huge OS pages (always committed)
bool is_huge_alloc; // huge OS pages allocated by `_mi_os_alloc_huge_pages`
bool allow_decommit; // is decommit allowed? if true, is_large should be false and blocks_committed != NULL
_Atomic(size_t) search_idx; // optimization to start the search for free blocks _Atomic(size_t) search_idx; // optimization to start the search for free blocks
_Atomic(mi_msecs_t) purge_expire; // expiration time when blocks should be decommitted from `blocks_decommit`. _Atomic(mi_msecs_t) purge_expire; // expiration time when blocks should be decommitted from `blocks_decommit`.
mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero? mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero?
@ -67,7 +64,7 @@ static mi_decl_cache_align _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS];
static mi_decl_cache_align _Atomic(size_t) mi_arena_count; // = 0 static mi_decl_cache_align _Atomic(size_t) mi_arena_count; // = 0
static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_committed, bool is_large, bool is_huge_alloc, bool is_zero, int numa_node, bool exclusive, bool owned, mi_arena_id_t* arena_id) mi_attr_noexcept; //static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int numa_node, bool exclusive, mi_memid_t memid, mi_arena_id_t* arena_id) mi_attr_noexcept;
/* ----------------------------------------------------------- /* -----------------------------------------------------------
Arena id's Arena id's
@ -92,27 +89,6 @@ static bool mi_arena_id_is_suitable(mi_arena_id_t arena_id, bool arena_is_exclus
(arena_id == req_arena_id)); (arena_id == req_arena_id));
} }
/* -----------------------------------------------------------
memory id's
----------------------------------------------------------- */
static mi_memid_t mi_memid_create(mi_memkind_t memkind) {
mi_memid_t memid;
_mi_memzero_var(memid);
memid.memkind = memkind;
return memid;
}
static mi_memid_t mi_memid_none(void) {
return mi_memid_create(MI_MEM_NONE);
}
static mi_memid_t mi_memid_create_os(bool committed) {
mi_memid_t memid = mi_memid_create(MI_MEM_OS);
memid.was_committed = committed;
return memid;
}
bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_id_t request_arena_id) { bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_id_t request_arena_id) {
if (memid.memkind == MI_MEM_ARENA) { if (memid.memkind == MI_MEM_ARENA) {
return mi_arena_id_is_suitable(memid.mem.arena.id, memid.mem.arena.is_exclusive, request_arena_id); return mi_arena_id_is_suitable(memid.mem.arena.id, memid.mem.arena.is_exclusive, request_arena_id);
@ -126,7 +102,6 @@ bool _mi_arena_memid_is_os_allocated(mi_memid_t memid) {
return (memid.memkind == MI_MEM_OS); return (memid.memkind == MI_MEM_OS);
} }
/* ----------------------------------------------------------- /* -----------------------------------------------------------
Arena allocations get a (currently) 16-bit memory id where the Arena allocations get a (currently) 16-bit memory id where the
lower 8 bits are the arena id, and the upper bits the block index. lower 8 bits are the arena id, and the upper bits the block index.
@ -145,7 +120,7 @@ static size_t mi_arena_size(mi_arena_t* arena) {
} }
static mi_memid_t mi_memid_create_arena(mi_arena_id_t id, bool is_exclusive, mi_bitmap_index_t bitmap_index) { static mi_memid_t mi_memid_create_arena(mi_arena_id_t id, bool is_exclusive, mi_bitmap_index_t bitmap_index) {
mi_memid_t memid = mi_memid_create(MI_MEM_ARENA); mi_memid_t memid = _mi_memid_create(MI_MEM_ARENA);
memid.mem.arena.id = id; memid.mem.arena.id = id;
memid.mem.arena.block_index = bitmap_index; memid.mem.arena.block_index = bitmap_index;
memid.mem.arena.is_exclusive = is_exclusive; memid.mem.arena.is_exclusive = is_exclusive;
@ -172,7 +147,7 @@ static uint8_t mi_arena_static[MI_ARENA_STATIC_MAX];
static _Atomic(size_t) mi_arena_static_top; static _Atomic(size_t) mi_arena_static_top;
static void* mi_arena_static_zalloc(size_t size, size_t alignment, mi_memid_t* memid) { static void* mi_arena_static_zalloc(size_t size, size_t alignment, mi_memid_t* memid) {
*memid = mi_memid_none(); *memid = _mi_memid_none();
if (size == 0 || size > MI_ARENA_STATIC_MAX) return NULL; if (size == 0 || size > MI_ARENA_STATIC_MAX) return NULL;
if (mi_atomic_load_relaxed(&mi_arena_static_top) >= MI_ARENA_STATIC_MAX) return NULL; if (mi_atomic_load_relaxed(&mi_arena_static_top) >= MI_ARENA_STATIC_MAX) return NULL;
@ -189,7 +164,7 @@ static void* mi_arena_static_zalloc(size_t size, size_t alignment, mi_memid_t* m
} }
// success // success
*memid = mi_memid_create(MI_MEM_STATIC); *memid = _mi_memid_create(MI_MEM_STATIC);
const size_t start = _mi_align_up(oldtop, alignment); const size_t start = _mi_align_up(oldtop, alignment);
uint8_t* const p = &mi_arena_static[start]; uint8_t* const p = &mi_arena_static[start];
_mi_memzero(p, size); _mi_memzero(p, size);
@ -197,27 +172,22 @@ static void* mi_arena_static_zalloc(size_t size, size_t alignment, mi_memid_t* m
} }
static void* mi_arena_meta_zalloc(size_t size, mi_memid_t* memid, mi_stats_t* stats) { static void* mi_arena_meta_zalloc(size_t size, mi_memid_t* memid, mi_stats_t* stats) {
*memid = mi_memid_none(); *memid = _mi_memid_none();
// try static // try static
void* p = mi_arena_static_zalloc(size, MI_ALIGNMENT_MAX, memid); void* p = mi_arena_static_zalloc(size, MI_ALIGNMENT_MAX, memid);
if (p != NULL) return p; if (p != NULL) return p;
// or fall back to the OS // or fall back to the OS
bool is_zero = false; return _mi_os_alloc(size, memid, stats);
p = _mi_os_alloc(size, &is_zero, stats);
if (p != NULL) {
*memid = mi_memid_create_os(true);
if (!is_zero) { _mi_memzero_aligned(p, size); }
return p;
}
return NULL;
} }
static void mi_arena_meta_free(void* p, mi_memid_t memid, size_t size, mi_stats_t* stats) { static void mi_arena_meta_free(void* p, mi_memid_t memid, size_t size, mi_stats_t* stats) {
if (memid.memkind == MI_MEM_OS) { if (mi_memkind_is_os(memid.memkind)) {
_mi_os_free(p, size, stats); _mi_os_free(p, size, memid, stats);
}
else {
mi_assert(memid.memkind == MI_MEM_STATIC);
} }
} }
@ -255,7 +225,7 @@ static mi_decl_noinline void* mi_arena_alloc_at(mi_arena_t* arena, size_t arena_
// claimed it! // claimed it!
void* p = arena->start + mi_arena_block_size(mi_bitmap_index_bit(bitmap_index)); void* p = arena->start + mi_arena_block_size(mi_bitmap_index_bit(bitmap_index));
*memid = mi_memid_create_arena(arena->id, arena->exclusive, bitmap_index); *memid = mi_memid_create_arena(arena->id, arena->exclusive, bitmap_index);
memid->is_pinned = (arena->is_large || !arena->allow_decommit); memid->is_pinned = arena->memid.is_pinned;
// none of the claimed blocks should be scheduled for a decommit // none of the claimed blocks should be scheduled for a decommit
if (arena->blocks_purge != NULL) { if (arena->blocks_purge != NULL) {
@ -407,7 +377,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset
{ {
mi_assert_internal(memid != NULL && tld != NULL); mi_assert_internal(memid != NULL && tld != NULL);
mi_assert_internal(size > 0); mi_assert_internal(size > 0);
*memid = mi_memid_none(); *memid = _mi_memid_none();
const int numa_node = _mi_os_numa_node(tld); // current numa node const int numa_node = _mi_os_numa_node(tld); // current numa node
@ -432,17 +402,12 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset
} }
// finally, fall back to the OS // finally, fall back to the OS
bool os_is_large = false; if (align_offset > 0) {
bool os_is_zero = false; return _mi_os_alloc_aligned_at_offset(size, alignment, align_offset, commit, allow_large, memid, tld->stats);
void* p = _mi_os_alloc_aligned_at_offset(size, alignment, align_offset, commit, allow_large, &os_is_large, &os_is_zero, tld->stats); }
if (p != NULL) { else {
*memid = mi_memid_create_os(commit); return _mi_os_alloc_aligned(size, alignment, commit, allow_large, memid, tld->stats);
memid->is_pinned = os_is_large;
memid->was_zero = os_is_zero;
memid->mem.os.alignment = alignment;
memid->mem.os.align_offset = align_offset;
} }
return p;
} }
void* _mi_arena_alloc(size_t size, bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld) void* _mi_arena_alloc(size_t size, bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld)
@ -470,7 +435,7 @@ void* mi_arena_area(mi_arena_id_t arena_id, size_t* size) {
static void mi_arena_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks, mi_stats_t* stats) { static void mi_arena_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks, mi_stats_t* stats) {
mi_assert_internal(arena->blocks_committed != NULL); mi_assert_internal(arena->blocks_committed != NULL);
mi_assert_internal(arena->blocks_purge != NULL); mi_assert_internal(arena->blocks_purge != NULL);
mi_assert_internal(arena->allow_decommit); mi_assert_internal(!arena->memid.is_pinned);
const size_t size = mi_arena_block_size(blocks); const size_t size = mi_arena_block_size(blocks);
void* const p = arena->start + mi_arena_block_size(mi_bitmap_index_bit(bitmap_idx)); void* const p = arena->start + mi_arena_block_size(mi_bitmap_index_bit(bitmap_idx));
bool needs_recommit; bool needs_recommit;
@ -544,7 +509,7 @@ static bool mi_arena_purge_range(mi_arena_t* arena, size_t idx, size_t startidx,
// returns true if anything was purged // returns true if anything was purged
static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force, mi_stats_t* stats) static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force, mi_stats_t* stats)
{ {
if (!arena->allow_decommit || arena->blocks_purge == NULL) return false; if (arena->memid.is_pinned || arena->blocks_purge == NULL) return false;
mi_msecs_t expire = mi_atomic_loadi64_relaxed(&arena->purge_expire); mi_msecs_t expire = mi_atomic_loadi64_relaxed(&arena->purge_expire);
if (expire == 0) return false; if (expire == 0) return false;
if (!force && expire > now) return false; if (!force && expire > now) return false;
@ -634,18 +599,13 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi
if (size==0) return; if (size==0) return;
const bool all_committed = (committed_size == size); const bool all_committed = (committed_size == size);
if (memid.memkind == MI_MEM_OS) { if (mi_memkind_is_os(memid.memkind)) {
// was a direct OS allocation, pass through // was a direct OS allocation, pass through
if (!all_committed && committed_size > 0) { if (!all_committed && committed_size > 0) {
// if partially committed, adjust the committed stats // if partially committed, adjust the committed stats
_mi_stat_decrease(&stats->committed, committed_size); _mi_stat_decrease(&stats->committed, committed_size);
} }
if (memid.mem.os.align_offset != 0) { _mi_os_free(p, size, memid, stats);
_mi_os_free_aligned_at_offset(p, size, memid.mem.os.alignment, memid.mem.os.align_offset, all_committed, stats);
}
else {
_mi_os_free(p, size, stats);
}
} }
else if (memid.memkind == MI_MEM_ARENA) { else if (memid.memkind == MI_MEM_ARENA) {
// allocated in an arena // allocated in an arena
@ -672,7 +632,7 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi
mi_track_mem_undefined(p,size); mi_track_mem_undefined(p,size);
// potentially decommit // potentially decommit
if (!arena->allow_decommit || arena->blocks_committed == NULL) { if (arena->memid.is_pinned || arena->blocks_committed == NULL) {
mi_assert_internal(all_committed); mi_assert_internal(all_committed);
} }
else { else {
@ -720,14 +680,9 @@ static void mi_arenas_unsafe_destroy(void) {
for (size_t i = 0; i < max_arena; i++) { for (size_t i = 0; i < max_arena; i++) {
mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]); mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]);
if (arena != NULL) { if (arena != NULL) {
if (arena->owned && arena->start != NULL) { if (arena->start != NULL && mi_memkind_is_os(arena->memid.memkind)) {
mi_atomic_store_ptr_release(mi_arena_t, &mi_arenas[i], NULL); mi_atomic_store_ptr_release(mi_arena_t, &mi_arenas[i], NULL);
if (arena->is_huge_alloc) { _mi_os_free(arena->start, mi_arena_size(arena), arena->memid, &_mi_stats_main);
_mi_os_free_huge_os_pages(arena->start, mi_arena_size(arena), &_mi_stats_main);
}
else {
_mi_os_free(arena->start, mi_arena_size(arena), &_mi_stats_main);
}
} }
else { else {
new_max_arena = i; new_max_arena = i;
@ -787,21 +742,18 @@ static bool mi_arena_add(mi_arena_t* arena, mi_arena_id_t* arena_id) {
return true; return true;
} }
static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_committed, bool is_large, bool is_huge_alloc, bool is_zero, int numa_node, bool exclusive, bool owned, mi_arena_id_t* arena_id) mi_attr_noexcept static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int numa_node, bool exclusive, mi_memid_t memid, mi_arena_id_t* arena_id) mi_attr_noexcept
{ {
if (arena_id != NULL) *arena_id = _mi_arena_id_none(); if (arena_id != NULL) *arena_id = _mi_arena_id_none();
if (size < MI_ARENA_BLOCK_SIZE) return false; if (size < MI_ARENA_BLOCK_SIZE) return false;
if (is_large) { if (is_large) {
mi_assert_internal(is_committed); mi_assert_internal(memid.was_committed && memid.is_pinned);
is_committed = true;
} }
const bool allow_decommit = !is_large; // && !is_committed; // only allow decommit for initially uncommitted memory
const size_t bcount = size / MI_ARENA_BLOCK_SIZE; const size_t bcount = size / MI_ARENA_BLOCK_SIZE;
const size_t fields = _mi_divide_up(bcount, MI_BITMAP_FIELD_BITS); const size_t fields = _mi_divide_up(bcount, MI_BITMAP_FIELD_BITS);
const size_t bitmaps = (allow_decommit ? 4 : 2); const size_t bitmaps = (memid.is_pinned ? 2 : 4);
const size_t asize = sizeof(mi_arena_t) + (bitmaps*fields*sizeof(mi_bitmap_field_t)); const size_t asize = sizeof(mi_arena_t) + (bitmaps*fields*sizeof(mi_bitmap_field_t));
mi_memid_t meta_memid; mi_memid_t meta_memid;
mi_arena_t* arena = (mi_arena_t*)mi_arena_meta_zalloc(asize, &meta_memid, &_mi_stats_main); // TODO: can we avoid allocating from the OS? mi_arena_t* arena = (mi_arena_t*)mi_arena_meta_zalloc(asize, &meta_memid, &_mi_stats_main); // TODO: can we avoid allocating from the OS?
@ -810,8 +762,8 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_committed,
// already zero'd due to os_alloc // already zero'd due to os_alloc
// _mi_memzero(arena, asize); // _mi_memzero(arena, asize);
arena->id = _mi_arena_id_none(); arena->id = _mi_arena_id_none();
arena->memid = memid;
arena->exclusive = exclusive; arena->exclusive = exclusive;
arena->owned = owned;
arena->meta_size = asize; arena->meta_size = asize;
arena->meta_memid = meta_memid; arena->meta_memid = meta_memid;
arena->block_count = bcount; arena->block_count = bcount;
@ -819,16 +771,13 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_committed,
arena->start = (uint8_t*)start; arena->start = (uint8_t*)start;
arena->numa_node = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1) arena->numa_node = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1)
arena->is_large = is_large; arena->is_large = is_large;
arena->is_huge_alloc= is_huge_alloc;
arena->is_zero_init = is_zero;
arena->allow_decommit = allow_decommit;
arena->purge_expire = 0; arena->purge_expire = 0;
arena->search_idx = 0; arena->search_idx = 0;
arena->blocks_dirty = &arena->blocks_inuse[fields]; // just after inuse bitmap arena->blocks_dirty = &arena->blocks_inuse[fields]; // just after inuse bitmap
arena->blocks_committed = (!arena->allow_decommit ? NULL : &arena->blocks_inuse[2*fields]); // just after dirty bitmap arena->blocks_committed = (arena->memid.is_pinned ? NULL : &arena->blocks_inuse[2*fields]); // just after dirty bitmap
arena->blocks_purge = (!arena->allow_decommit ? NULL : &arena->blocks_inuse[3*fields]); // just after committed bitmap arena->blocks_purge = (arena->memid.is_pinned ? NULL : &arena->blocks_inuse[3*fields]); // just after committed bitmap
// initialize committed bitmap? // initialize committed bitmap?
if (arena->blocks_committed != NULL && is_committed) { if (arena->blocks_committed != NULL && arena->memid.was_committed) {
memset((void*)arena->blocks_committed, 0xFF, fields*sizeof(mi_bitmap_field_t)); // cast to void* to avoid atomic warning memset((void*)arena->blocks_committed, 0xFF, fields*sizeof(mi_bitmap_field_t)); // cast to void* to avoid atomic warning
} }
@ -845,31 +794,28 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_committed,
} }
bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept { bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept {
return mi_manage_os_memory_ex2(start,size,is_committed,is_large,false,is_zero,numa_node,exclusive,false /* not owned */, arena_id); mi_memid_t memid = _mi_memid_create(MI_MEM_EXTERNAL);
} memid.was_committed = is_committed;
memid.was_zero = is_zero;
memid.is_pinned = is_large;
// Reserve a range of regular OS memory return mi_manage_os_memory_ex2(start,size,is_large,numa_node,exclusive,memid, arena_id);
static int mi_reserve_os_memory_ex2(size_t size, bool commit, bool allow_large, bool exclusive, bool owned, mi_arena_id_t* arena_id) mi_attr_noexcept
{
if (arena_id != NULL) *arena_id = _mi_arena_id_none();
size = _mi_align_up(size, MI_ARENA_BLOCK_SIZE); // at least one block
bool is_large = false;
bool is_zero = false;
void* start = _mi_os_alloc_aligned(size, MI_SEGMENT_ALIGN, commit, allow_large, &is_large, &is_zero, &_mi_stats_main);
if (start==NULL) return ENOMEM;
if (!mi_manage_os_memory_ex2(start, size, (is_large || commit), is_large, false, is_zero, -1, exclusive, owned, arena_id)) {
_mi_os_free_ex(start, size, commit, &_mi_stats_main);
_mi_verbose_message("failed to reserve %zu k memory\n", _mi_divide_up(size,1024));
return ENOMEM;
}
_mi_verbose_message("reserved %zu KiB memory%s\n", _mi_divide_up(size,1024), is_large ? " (in large os pages)" : "");
return 0;
} }
// Reserve a range of regular OS memory // Reserve a range of regular OS memory
int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept { int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept {
return mi_reserve_os_memory_ex2(size,commit,allow_large,exclusive,true /*owned*/, arena_id); if (arena_id != NULL) *arena_id = _mi_arena_id_none();
size = _mi_align_up(size, MI_ARENA_BLOCK_SIZE); // at least one block
mi_memid_t memid;
void* start = _mi_os_alloc_aligned(size, MI_SEGMENT_ALIGN, commit, allow_large, &memid, &_mi_stats_main);
if (start == NULL) return ENOMEM;
const bool is_large = memid.is_pinned; // todo: use separate is_large field?
if (!mi_manage_os_memory_ex2(start, size, is_large, -1 /* numa node */, exclusive, memid, arena_id)) {
_mi_os_free_ex(start, size, commit, memid, &_mi_stats_main);
_mi_verbose_message("failed to reserve %zu k memory\n", _mi_divide_up(size, 1024));
return ENOMEM;
}
_mi_verbose_message("reserved %zu KiB memory%s\n", _mi_divide_up(size, 1024), is_large ? " (in large os pages)" : "");
return 0;
} }
@ -928,16 +874,16 @@ int mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_m
if (numa_node >= 0) numa_node = numa_node % _mi_os_numa_node_count(); if (numa_node >= 0) numa_node = numa_node % _mi_os_numa_node_count();
size_t hsize = 0; size_t hsize = 0;
size_t pages_reserved = 0; size_t pages_reserved = 0;
bool is_zero = false; mi_memid_t memid;
void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, timeout_msecs, &pages_reserved, &hsize, &is_zero); void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, timeout_msecs, &pages_reserved, &hsize, &memid);
if (p==NULL || pages_reserved==0) { if (p==NULL || pages_reserved==0) {
_mi_warning_message("failed to reserve %zu GiB huge pages\n", pages); _mi_warning_message("failed to reserve %zu GiB huge pages\n", pages);
return ENOMEM; return ENOMEM;
} }
_mi_verbose_message("numa node %i: reserved %zu GiB huge pages (of the %zu GiB requested)\n", numa_node, pages_reserved, pages); _mi_verbose_message("numa node %i: reserved %zu GiB huge pages (of the %zu GiB requested)\n", numa_node, pages_reserved, pages);
if (!mi_manage_os_memory_ex2(p, hsize, true, true, true, is_zero, numa_node, exclusive, true /* owned */, arena_id)) { if (!mi_manage_os_memory_ex2(p, hsize, true, numa_node, exclusive, memid, arena_id)) {
_mi_os_free_huge_os_pages(p, hsize, &_mi_stats_main); _mi_os_free(p, hsize, memid, &_mi_stats_main);
return ENOMEM; return ENOMEM;
} }
return 0; return 0;

View file

@ -1,5 +1,5 @@
/* ---------------------------------------------------------------------------- /* ----------------------------------------------------------------------------
Copyright (c) 2019-2021 Microsoft Research, Daan Leijen Copyright (c) 2019-2023 Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution. "LICENSE" at the root of this distribution.
@ -11,7 +11,6 @@ represeted as an array of fields where each field is a machine word (`size_t`)
There are two api's; the standard one cannot have sequences that cross There are two api's; the standard one cannot have sequences that cross
between the bitmap fields (and a sequence must be <= MI_BITMAP_FIELD_BITS). between the bitmap fields (and a sequence must be <= MI_BITMAP_FIELD_BITS).
(this is used in region allocation)
The `_across` postfixed functions do allow sequences that can cross over The `_across` postfixed functions do allow sequences that can cross over
between the fields. (This is used in arena allocation) between the fields. (This is used in arena allocation)
@ -63,12 +62,12 @@ inline bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, cons
// scan linearly for a free range of zero bits // scan linearly for a free range of zero bits
while (bitidx <= bitidx_max) { while (bitidx <= bitidx_max) {
const size_t mapm = map & m; const size_t mapm = (map & m);
if (mapm == 0) { // are the mask bits free at bitidx? if (mapm == 0) { // are the mask bits free at bitidx?
mi_assert_internal((m >> bitidx) == mask); // no overflow? mi_assert_internal((m >> bitidx) == mask); // no overflow?
const size_t newmap = map | m; const size_t newmap = (map | m);
mi_assert_internal((newmap^map) >> bitidx == mask); mi_assert_internal((newmap^map) >> bitidx == mask);
if (!mi_atomic_cas_weak_acq_rel(field, &map, newmap)) { // TODO: use strong cas here? if (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)) { // TODO: use weak cas here?
// no success, another thread claimed concurrently.. keep going (with updated `map`) // no success, another thread claimed concurrently.. keep going (with updated `map`)
continue; continue;
} }
@ -81,7 +80,8 @@ inline bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, cons
else { else {
// on to the next bit range // on to the next bit range
#ifdef MI_HAVE_FAST_BITSCAN #ifdef MI_HAVE_FAST_BITSCAN
const size_t shift = (count == 1 ? 1 : mi_bsr(mapm) - bitidx + 1); mi_assert_internal(mapm != 0);
const size_t shift = (count == 1 ? 1 : (MI_INTPTR_BITS - mi_clz(mapm) - bitidx));
mi_assert_internal(shift > 0 && shift <= count); mi_assert_internal(shift > 0 && shift <= count);
#else #else
const size_t shift = 1; const size_t shift = 1;
@ -100,7 +100,7 @@ inline bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, cons
bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx) { bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx) {
size_t idx = start_field_idx; size_t idx = start_field_idx;
for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) { for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) {
if (idx >= bitmap_fields) idx = 0; // wrap if (idx >= bitmap_fields) { idx = 0; } // wrap
if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) { if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) {
return true; return true;
} }
@ -127,14 +127,6 @@ bool _mi_bitmap_try_find_from_claim_pred(mi_bitmap_t bitmap, const size_t bitmap
return false; return false;
} }
/*
// Find `count` bits of 0 and set them to 1 atomically; returns `true` on success.
// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never span fields.
bool _mi_bitmap_try_find_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t count, mi_bitmap_index_t* bitmap_idx) {
return _mi_bitmap_try_find_from_claim(bitmap, bitmap_fields, 0, count, bitmap_idx);
}
*/
// Set `count` bits at `bitmap_idx` to 0 atomically // Set `count` bits at `bitmap_idx` to 0 atomically
// Returns `true` if all `count` bits were 1 previously. // Returns `true` if all `count` bits were 1 previously.
bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
@ -143,7 +135,7 @@ bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count,
const size_t mask = mi_bitmap_mask_(count, bitidx); const size_t mask = mi_bitmap_mask_(count, bitidx);
mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields); mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
// mi_assert_internal((bitmap[idx] & mask) == mask); // mi_assert_internal((bitmap[idx] & mask) == mask);
size_t prev = mi_atomic_and_acq_rel(&bitmap[idx], ~mask); const size_t prev = mi_atomic_and_acq_rel(&bitmap[idx], ~mask);
return ((prev & mask) == mask); return ((prev & mask) == mask);
} }
@ -157,7 +149,7 @@ bool _mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi
mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields); mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
//mi_assert_internal(any_zero != NULL || (bitmap[idx] & mask) == 0); //mi_assert_internal(any_zero != NULL || (bitmap[idx] & mask) == 0);
size_t prev = mi_atomic_or_acq_rel(&bitmap[idx], mask); size_t prev = mi_atomic_or_acq_rel(&bitmap[idx], mask);
if (any_zero != NULL) *any_zero = ((prev & mask) != mask); if (any_zero != NULL) { *any_zero = ((prev & mask) != mask); }
return ((prev & mask) == 0); return ((prev & mask) == 0);
} }
@ -167,8 +159,8 @@ static bool mi_bitmap_is_claimedx(mi_bitmap_t bitmap, size_t bitmap_fields, size
const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
const size_t mask = mi_bitmap_mask_(count, bitidx); const size_t mask = mi_bitmap_mask_(count, bitidx);
mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields); mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
size_t field = mi_atomic_load_relaxed(&bitmap[idx]); const size_t field = mi_atomic_load_relaxed(&bitmap[idx]);
if (any_ones != NULL) *any_ones = ((field & mask) != 0); if (any_ones != NULL) { *any_ones = ((field & mask) != 0); }
return ((field & mask) == mask); return ((field & mask) == mask);
} }
@ -179,10 +171,13 @@ bool _mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count
const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
const size_t mask = mi_bitmap_mask_(count, bitidx); const size_t mask = mi_bitmap_mask_(count, bitidx);
mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields); mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields);
size_t expected = 0; size_t expected = mi_atomic_load_relaxed(&bitmap[idx]);
if (mi_atomic_cas_strong_acq_rel(&bitmap[idx], &expected, mask)) return true; do {
if ((expected & mask) != 0) return false; if ((expected & mask) != 0) return false;
return mi_atomic_cas_strong_acq_rel(&bitmap[idx], &expected, expected | mask); }
while (!mi_atomic_cas_strong_acq_rel(&bitmap[idx], &expected, expected | mask));
mi_assert_internal((expected & mask) == 0);
return true;
} }
@ -204,6 +199,7 @@ bool _mi_bitmap_is_any_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t
// Try to atomically claim a sequence of `count` bits starting from the field // Try to atomically claim a sequence of `count` bits starting from the field
// at `idx` in `bitmap` and crossing into subsequent fields. Returns `true` on success. // at `idx` in `bitmap` and crossing into subsequent fields. Returns `true` on success.
// Only needs to consider crossing into the next fields (see `mi_bitmap_try_find_from_claim_across`)
static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t idx, const size_t count, const size_t retries, mi_bitmap_index_t* bitmap_idx) static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t idx, const size_t count, const size_t retries, mi_bitmap_index_t* bitmap_idx)
{ {
mi_assert_internal(bitmap_idx != NULL); mi_assert_internal(bitmap_idx != NULL);
@ -214,9 +210,9 @@ static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bit
const size_t initial = mi_clz(map); // count of initial zeros starting at idx const size_t initial = mi_clz(map); // count of initial zeros starting at idx
mi_assert_internal(initial <= MI_BITMAP_FIELD_BITS); mi_assert_internal(initial <= MI_BITMAP_FIELD_BITS);
if (initial == 0) return false; if (initial == 0) return false;
if (initial >= count) return _mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx); // no need to cross fields if (initial >= count) return _mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx); // no need to cross fields (this case won't happen for us)
if (_mi_divide_up(count - initial, MI_BITMAP_FIELD_BITS) >= (bitmap_fields - idx)) return false; // not enough entries if (_mi_divide_up(count - initial, MI_BITMAP_FIELD_BITS) >= (bitmap_fields - idx)) return false; // not enough entries
// scan ahead // scan ahead
size_t found = initial; size_t found = initial;
size_t mask = 0; // mask bits for the final field size_t mask = 0; // mask bits for the final field
@ -224,25 +220,27 @@ static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bit
field++; field++;
map = mi_atomic_load_relaxed(field); map = mi_atomic_load_relaxed(field);
const size_t mask_bits = (found + MI_BITMAP_FIELD_BITS <= count ? MI_BITMAP_FIELD_BITS : (count - found)); const size_t mask_bits = (found + MI_BITMAP_FIELD_BITS <= count ? MI_BITMAP_FIELD_BITS : (count - found));
mi_assert_internal(mask_bits > 0 && mask_bits <= MI_BITMAP_FIELD_BITS);
mask = mi_bitmap_mask_(mask_bits, 0); mask = mi_bitmap_mask_(mask_bits, 0);
if ((map & mask) != 0) return false; if ((map & mask) != 0) return false; // some part is already claimed
found += mask_bits; found += mask_bits;
} }
mi_assert_internal(field < &bitmap[bitmap_fields]); mi_assert_internal(field < &bitmap[bitmap_fields]);
// found range of zeros up to the final field; mask contains mask in the final field // we found a range of contiguous zeros up to the final field; mask contains mask in the final field
// now claim it atomically // now try to claim the range atomically
mi_bitmap_field_t* const final_field = field; mi_bitmap_field_t* const final_field = field;
const size_t final_mask = mask; const size_t final_mask = mask;
mi_bitmap_field_t* const initial_field = &bitmap[idx]; mi_bitmap_field_t* const initial_field = &bitmap[idx];
const size_t initial_mask = mi_bitmap_mask_(initial, MI_BITMAP_FIELD_BITS - initial); const size_t initial_idx = MI_BITMAP_FIELD_BITS - initial;
const size_t initial_mask = mi_bitmap_mask_(initial, initial_idx);
// initial field // initial field
size_t newmap; size_t newmap;
field = initial_field; field = initial_field;
map = mi_atomic_load_relaxed(field); map = mi_atomic_load_relaxed(field);
do { do {
newmap = map | initial_mask; newmap = (map | initial_mask);
if ((map & initial_mask) != 0) { goto rollback; }; if ((map & initial_mask) != 0) { goto rollback; };
} while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)); } while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap));
@ -257,31 +255,32 @@ static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bit
mi_assert_internal(field == final_field); mi_assert_internal(field == final_field);
map = mi_atomic_load_relaxed(field); map = mi_atomic_load_relaxed(field);
do { do {
newmap = map | final_mask; newmap = (map | final_mask);
if ((map & final_mask) != 0) { goto rollback; } if ((map & final_mask) != 0) { goto rollback; }
} while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)); } while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap));
// claimed! // claimed!
*bitmap_idx = mi_bitmap_index_create(idx, MI_BITMAP_FIELD_BITS - initial); *bitmap_idx = mi_bitmap_index_create(idx, initial_idx);
return true; return true;
rollback: rollback:
// roll back intermediate fields // roll back intermediate fields
// (we just failed to claim `field` so decrement first)
while (--field > initial_field) { while (--field > initial_field) {
newmap = 0; newmap = 0;
map = MI_BITMAP_FIELD_FULL; map = MI_BITMAP_FIELD_FULL;
mi_assert_internal(mi_atomic_load_relaxed(field) == map); mi_assert_internal(mi_atomic_load_relaxed(field) == map);
mi_atomic_store_release(field, newmap); mi_atomic_store_release(field, newmap);
} }
if (field == initial_field) { if (field == initial_field) { // (if we failed on the initial field, `field + 1 == initial_field`)
map = mi_atomic_load_relaxed(field); map = mi_atomic_load_relaxed(field);
do { do {
mi_assert_internal((map & initial_mask) == initial_mask); mi_assert_internal((map & initial_mask) == initial_mask);
newmap = map & ~initial_mask; newmap = (map & ~initial_mask);
} while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)); } while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap));
} }
// retry? (we make a recursive call instead of goto to be able to use const declarations) // retry? (we make a recursive call instead of goto to be able to use const declarations)
if (retries < 4) { if (retries <= 2) {
return mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, retries+1, bitmap_idx); return mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, retries+1, bitmap_idx);
} }
else { else {
@ -294,17 +293,22 @@ rollback:
// Starts at idx, and wraps around to search in all `bitmap_fields` fields. // Starts at idx, and wraps around to search in all `bitmap_fields` fields.
bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx) { bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx) {
mi_assert_internal(count > 0); mi_assert_internal(count > 0);
if (count==1) return _mi_bitmap_try_find_from_claim(bitmap, bitmap_fields, start_field_idx, count, bitmap_idx); if (count <= 2) {
// we don't bother with crossover fields for small counts
return _mi_bitmap_try_find_from_claim(bitmap, bitmap_fields, start_field_idx, count, bitmap_idx);
}
// visit the fields
size_t idx = start_field_idx; size_t idx = start_field_idx;
for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) { for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) {
if (idx >= bitmap_fields) idx = 0; // wrap if (idx >= bitmap_fields) { idx = 0; } // wrap
// try to claim inside the field // first try to claim inside a field
if (count <= MI_BITMAP_FIELD_BITS) { if (count <= MI_BITMAP_FIELD_BITS) {
if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) { if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) {
return true; return true;
} }
} }
// try to claim across fields // if that fails, then try to claim across fields
if (mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, 0, bitmap_idx)) { if (mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, 0, bitmap_idx)) {
return true; return true;
} }
@ -347,14 +351,14 @@ bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t
size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask); size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask);
bool all_one = true; bool all_one = true;
mi_bitmap_field_t* field = &bitmap[idx]; mi_bitmap_field_t* field = &bitmap[idx];
size_t prev = mi_atomic_and_acq_rel(field++, ~pre_mask); size_t prev = mi_atomic_and_acq_rel(field++, ~pre_mask); // clear first part
if ((prev & pre_mask) != pre_mask) all_one = false; if ((prev & pre_mask) != pre_mask) all_one = false;
while(mid_count-- > 0) { while(mid_count-- > 0) {
prev = mi_atomic_and_acq_rel(field++, ~mid_mask); prev = mi_atomic_and_acq_rel(field++, ~mid_mask); // clear mid part
if ((prev & mid_mask) != mid_mask) all_one = false; if ((prev & mid_mask) != mid_mask) all_one = false;
} }
if (post_mask!=0) { if (post_mask!=0) {
prev = mi_atomic_and_acq_rel(field, ~post_mask); prev = mi_atomic_and_acq_rel(field, ~post_mask); // clear end part
if ((prev & post_mask) != post_mask) all_one = false; if ((prev & post_mask) != post_mask) all_one = false;
} }
return all_one; return all_one;
@ -384,7 +388,7 @@ bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t co
if ((prev & post_mask) != 0) all_zero = false; if ((prev & post_mask) != 0) all_zero = false;
if ((prev & post_mask) != post_mask) any_zero = true; if ((prev & post_mask) != post_mask) any_zero = true;
} }
if (pany_zero != NULL) *pany_zero = any_zero; if (pany_zero != NULL) { *pany_zero = any_zero; }
return all_zero; return all_zero;
} }
@ -413,7 +417,7 @@ static bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_field
if ((prev & post_mask) != post_mask) all_ones = false; if ((prev & post_mask) != post_mask) all_ones = false;
if ((prev & post_mask) != 0) any_ones = true; if ((prev & post_mask) != 0) any_ones = true;
} }
if (pany_ones != NULL) *pany_ones = any_ones; if (pany_ones != NULL) { *pany_ones = any_ones; }
return all_ones; return all_ones;
} }

View file

@ -203,6 +203,7 @@ mi_heap_t* _mi_heap_main_get(void) {
typedef struct mi_thread_data_s { typedef struct mi_thread_data_s {
mi_heap_t heap; // must come first due to cast in `_mi_heap_done` mi_heap_t heap; // must come first due to cast in `_mi_heap_done`
mi_tld_t tld; mi_tld_t tld;
mi_memid_t memid;
} mi_thread_data_t; } mi_thread_data_t;
@ -231,15 +232,20 @@ static mi_thread_data_t* mi_thread_data_zalloc(void) {
// if that fails, allocate as meta data // if that fails, allocate as meta data
if (td == NULL) { if (td == NULL) {
td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &is_zero, &_mi_stats_main); mi_memid_t memid;
td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &memid, &_mi_stats_main);
if (td == NULL) { if (td == NULL) {
// if this fails, try once more. (issue #257) // if this fails, try once more. (issue #257)
td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &is_zero, &_mi_stats_main); td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &memid, &_mi_stats_main);
if (td == NULL) { if (td == NULL) {
// really out of memory // really out of memory
_mi_error_message(ENOMEM, "unable to allocate thread local heap metadata (%zu bytes)\n", sizeof(mi_thread_data_t)); _mi_error_message(ENOMEM, "unable to allocate thread local heap metadata (%zu bytes)\n", sizeof(mi_thread_data_t));
} }
} }
if (td != NULL) {
td->memid = memid;
is_zero = memid.was_zero;
}
} }
if (td != NULL && !is_zero) { if (td != NULL && !is_zero) {
@ -260,7 +266,7 @@ static void mi_thread_data_free( mi_thread_data_t* tdfree ) {
} }
} }
// if that fails, just free it directly // if that fails, just free it directly
_mi_os_free(tdfree, sizeof(mi_thread_data_t), &_mi_stats_main); _mi_os_free(tdfree, sizeof(mi_thread_data_t), tdfree->memid, &_mi_stats_main);
} }
void _mi_thread_data_collect(void) { void _mi_thread_data_collect(void) {
@ -270,7 +276,7 @@ void _mi_thread_data_collect(void) {
if (td != NULL) { if (td != NULL) {
td = mi_atomic_exchange_ptr_acq_rel(mi_thread_data_t, &td_cache[i], NULL); td = mi_atomic_exchange_ptr_acq_rel(mi_thread_data_t, &td_cache[i], NULL);
if (td != NULL) { if (td != NULL) {
_mi_os_free( td, sizeof(mi_thread_data_t), &_mi_stats_main ); _mi_os_free(td, sizeof(mi_thread_data_t), td->memid, &_mi_stats_main);
} }
} }
} }

115
src/os.c
View file

@ -137,7 +137,9 @@ void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size) {
Free memory Free memory
-------------------------------------------------------------- */ -------------------------------------------------------------- */
static void mi_os_mem_free(void* addr, size_t size, bool was_committed, mi_stats_t* tld_stats) { static void mi_os_free_huge_os_pages(void* p, size_t size, mi_stats_t* stats);
static void mi_os_prim_free(void* addr, size_t size, bool still_committed, mi_stats_t* tld_stats) {
MI_UNUSED(tld_stats); MI_UNUSED(tld_stats);
mi_assert_internal((size % _mi_os_page_size()) == 0); mi_assert_internal((size % _mi_os_page_size()) == 0);
if (addr == NULL || size == 0) return; // || _mi_os_is_huge_reserved(addr) if (addr == NULL || size == 0) return; // || _mi_os_is_huge_reserved(addr)
@ -146,18 +148,34 @@ static void mi_os_mem_free(void* addr, size_t size, bool was_committed, mi_stats
_mi_warning_message("unable to free OS memory (error: %d (0x%x), size: 0x%zx bytes, address: %p)\n", err, err, size, addr); _mi_warning_message("unable to free OS memory (error: %d (0x%x), size: 0x%zx bytes, address: %p)\n", err, err, size, addr);
} }
mi_stats_t* stats = &_mi_stats_main; mi_stats_t* stats = &_mi_stats_main;
if (was_committed) { _mi_stat_decrease(&stats->committed, size); } if (still_committed) { _mi_stat_decrease(&stats->committed, size); }
_mi_stat_decrease(&stats->reserved, size); _mi_stat_decrease(&stats->reserved, size);
} }
void _mi_os_free_ex(void* addr, size_t size, bool still_committed, mi_memid_t memid, mi_stats_t* tld_stats) {
void _mi_os_free_ex(void* addr, size_t size, bool was_committed, mi_stats_t* tld_stats) { size_t csize = _mi_os_good_alloc_size(size);
const size_t csize = _mi_os_good_alloc_size(size); if (memid.memkind == MI_MEM_OS) {
mi_os_mem_free(addr,csize,was_committed,tld_stats); if (memid.mem.os.base != NULL) {
mi_assert(memid.mem.os.base <= addr);
csize += ((uint8_t*)addr - (uint8_t*)memid.mem.os.base);
mi_os_prim_free(memid.mem.os.base, csize, still_committed, tld_stats);
}
else {
mi_os_prim_free(addr, csize, still_committed, tld_stats);
}
}
else if (memid.memkind == MI_MEM_OS_HUGE) {
mi_assert(memid.is_pinned);
mi_os_free_huge_os_pages(addr, size, tld_stats);
}
else {
// nothing to do
mi_assert(memid.memkind <= MI_MEM_EXTERNAL);
}
} }
void _mi_os_free(void* p, size_t size, mi_stats_t* tld_stats) { void _mi_os_free(void* p, size_t size, mi_memid_t memid, mi_stats_t* tld_stats) {
_mi_os_free_ex(p, size, true, tld_stats); _mi_os_free_ex(p, size, true, memid, tld_stats);
} }
@ -166,7 +184,7 @@ void _mi_os_free(void* p, size_t size, mi_stats_t* tld_stats) {
-------------------------------------------------------------- */ -------------------------------------------------------------- */
// Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned. // Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned.
static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* stats) { static void* mi_os_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* stats) {
mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0); mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0);
mi_assert_internal(is_zero != NULL); mi_assert_internal(is_zero != NULL);
mi_assert_internal(is_large != NULL); mi_assert_internal(is_large != NULL);
@ -191,7 +209,7 @@ static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, boo
// Primitive aligned allocation from the OS. // Primitive aligned allocation from the OS.
// This function guarantees the allocated memory is aligned. // This function guarantees the allocated memory is aligned.
static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* stats) { static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* stats) {
mi_assert_internal(alignment >= _mi_os_page_size() && ((alignment & (alignment - 1)) == 0)); mi_assert_internal(alignment >= _mi_os_page_size() && ((alignment & (alignment - 1)) == 0));
mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0); mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0);
mi_assert_internal(is_large != NULL); mi_assert_internal(is_large != NULL);
@ -201,19 +219,19 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit,
size = _mi_align_up(size, _mi_os_page_size()); size = _mi_align_up(size, _mi_os_page_size());
// try first with a hint (this will be aligned directly on Win 10+ or BSD) // try first with a hint (this will be aligned directly on Win 10+ or BSD)
void* p = mi_os_mem_alloc(size, alignment, commit, allow_large, is_large, is_zero, stats); void* p = mi_os_prim_alloc(size, alignment, commit, allow_large, is_large, is_zero, stats);
if (p == NULL) return NULL; if (p == NULL) return NULL;
// if not aligned, free it, overallocate, and unmap around it // if not aligned, free it, overallocate, and unmap around it
if (((uintptr_t)p % alignment != 0)) { if (((uintptr_t)p % alignment != 0)) {
_mi_warning_message("unable to allocate aligned OS memory directly, fall back to over-allocation (size: 0x%zx bytes, address: %p, alignment: 0x%zx, commit: %d)\n", size, p, alignment, commit); _mi_warning_message("unable to allocate aligned OS memory directly, fall back to over-allocation (size: 0x%zx bytes, address: %p, alignment: 0x%zx, commit: %d)\n", size, p, alignment, commit);
mi_os_mem_free(p, size, commit, stats); mi_os_prim_free(p, size, commit, stats);
if (size >= (SIZE_MAX - alignment)) return NULL; // overflow if (size >= (SIZE_MAX - alignment)) return NULL; // overflow
const size_t over_size = size + alignment; const size_t over_size = size + alignment;
if (mi_os_mem_config.must_free_whole) { // win32 virtualAlloc cannot free parts of an allocate block if (mi_os_mem_config.must_free_whole) { // win32 virtualAlloc cannot free parts of an allocate block
// over-allocate uncommitted (virtual) memory // over-allocate uncommitted (virtual) memory
p = mi_os_mem_alloc(over_size, 0 /*alignment*/, false /* commit? */, false /* allow_large */, is_large, is_zero, stats); p = mi_os_prim_alloc(over_size, 0 /*alignment*/, false /* commit? */, false /* allow_large */, is_large, is_zero, stats);
if (p == NULL) return NULL; if (p == NULL) return NULL;
// set p to the aligned part in the full region // set p to the aligned part in the full region
@ -228,7 +246,7 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit,
} }
else { // mmap can free inside an allocation else { // mmap can free inside an allocation
// overallocate... // overallocate...
p = mi_os_mem_alloc(over_size, 1, commit, false, is_large, is_zero, stats); p = mi_os_prim_alloc(over_size, 1, commit, false, is_large, is_zero, stats);
if (p == NULL) return NULL; if (p == NULL) return NULL;
// and selectively unmap parts around the over-allocated area. (noop on sbrk) // and selectively unmap parts around the over-allocated area. (noop on sbrk)
void* aligned_p = mi_align_up_ptr(p, alignment); void* aligned_p = mi_align_up_ptr(p, alignment);
@ -236,8 +254,8 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit,
size_t mid_size = _mi_align_up(size, _mi_os_page_size()); size_t mid_size = _mi_align_up(size, _mi_os_page_size());
size_t post_size = over_size - pre_size - mid_size; size_t post_size = over_size - pre_size - mid_size;
mi_assert_internal(pre_size < over_size&& post_size < over_size&& mid_size >= size); mi_assert_internal(pre_size < over_size&& post_size < over_size&& mid_size >= size);
if (pre_size > 0) mi_os_mem_free(p, pre_size, commit, stats); if (pre_size > 0) mi_os_prim_free(p, pre_size, commit, stats);
if (post_size > 0) mi_os_mem_free((uint8_t*)aligned_p + mid_size, post_size, commit, stats); if (post_size > 0) mi_os_prim_free((uint8_t*)aligned_p + mid_size, post_size, commit, stats);
// we can return the aligned pointer on `mmap` (and sbrk) systems // we can return the aligned pointer on `mmap` (and sbrk) systems
p = aligned_p; p = aligned_p;
} }
@ -252,31 +270,38 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit,
OS API: alloc and alloc_aligned OS API: alloc and alloc_aligned
----------------------------------------------------------- */ ----------------------------------------------------------- */
void* _mi_os_alloc(size_t size, bool* is_zero, mi_stats_t* tld_stats) { void* _mi_os_alloc(size_t size, mi_memid_t* memid, mi_stats_t* tld_stats) {
MI_UNUSED(tld_stats); MI_UNUSED(tld_stats);
*memid = _mi_memid_none();
mi_stats_t* stats = &_mi_stats_main; mi_stats_t* stats = &_mi_stats_main;
if (size == 0) return NULL; if (size == 0) return NULL;
size = _mi_os_good_alloc_size(size); size = _mi_os_good_alloc_size(size);
bool is_large = false; bool os_is_large = false;
bool is_zerox = false; bool os_is_zero = false;
void* p = mi_os_mem_alloc(size, 0, true, false, &is_large, &is_zerox, stats); void* p = mi_os_prim_alloc(size, 0, true, false, &os_is_large, &os_is_zero, stats);
if (is_zero != NULL) { *is_zero = is_zerox; } if (p != NULL) {
*memid = _mi_memid_create_os(true, os_is_zero);
memid->is_pinned = os_is_large;
}
return p; return p;
} }
void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* tld_stats) void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* tld_stats)
{ {
MI_UNUSED(&_mi_os_get_aligned_hint); // suppress unused warnings MI_UNUSED(&_mi_os_get_aligned_hint); // suppress unused warnings
MI_UNUSED(tld_stats); MI_UNUSED(tld_stats);
*memid = _mi_memid_none();
if (size == 0) return NULL; if (size == 0) return NULL;
size = _mi_os_good_alloc_size(size); size = _mi_os_good_alloc_size(size);
alignment = _mi_align_up(alignment, _mi_os_page_size()); alignment = _mi_align_up(alignment, _mi_os_page_size());
bool os_is_large = false; bool os_is_large = false;
bool os_is_zero = false; bool os_is_zero = false;
void* p = mi_os_mem_alloc_aligned(size, alignment, commit, allow_large, &os_is_large, &os_is_zero, &_mi_stats_main /*tld->stats*/ ); void* p = mi_os_prim_alloc_aligned(size, alignment, commit, allow_large, &os_is_large, &os_is_zero, &_mi_stats_main /*tld->stats*/ );
if (is_large != NULL) { *is_large = os_is_large; } if (p != NULL) {
if (is_zero != NULL) { *is_zero = os_is_zero; } *memid = _mi_memid_create_os(commit, os_is_zero);
memid->is_pinned = os_is_large;
}
return p; return p;
} }
@ -288,22 +313,24 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allo
to use the actual start of the memory region. to use the actual start of the memory region.
----------------------------------------------------------- */ ----------------------------------------------------------- */
void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t offset, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* tld_stats) { void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t offset, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* tld_stats) {
mi_assert(offset <= MI_SEGMENT_SIZE); mi_assert(offset <= MI_SEGMENT_SIZE);
mi_assert(offset <= size); mi_assert(offset <= size);
mi_assert((alignment % _mi_os_page_size()) == 0); mi_assert((alignment % _mi_os_page_size()) == 0);
*memid = _mi_memid_none();
if (offset > MI_SEGMENT_SIZE) return NULL; if (offset > MI_SEGMENT_SIZE) return NULL;
if (offset == 0) { if (offset == 0) {
// regular aligned allocation // regular aligned allocation
return _mi_os_alloc_aligned(size, alignment, commit, allow_large, is_large, is_zero, tld_stats); return _mi_os_alloc_aligned(size, alignment, commit, allow_large, memid, tld_stats);
} }
else { else {
// overallocate to align at an offset // overallocate to align at an offset
const size_t extra = _mi_align_up(offset, alignment) - offset; const size_t extra = _mi_align_up(offset, alignment) - offset;
const size_t oversize = size + extra; const size_t oversize = size + extra;
void* start = _mi_os_alloc_aligned(oversize, alignment, commit, allow_large, is_large, is_zero, tld_stats); void* const start = _mi_os_alloc_aligned(oversize, alignment, commit, allow_large, memid, tld_stats);
if (start == NULL) return NULL; if (start == NULL) return NULL;
void* p = (uint8_t*)start + extra; memid->mem.os.base = start;
void* const p = (uint8_t*)start + extra;
mi_assert(_mi_is_aligned((uint8_t*)p + offset, alignment)); mi_assert(_mi_is_aligned((uint8_t*)p + offset, alignment));
// decommit the overallocation at the start // decommit the overallocation at the start
if (commit && extra > _mi_os_page_size()) { if (commit && extra > _mi_os_page_size()) {
@ -313,14 +340,6 @@ void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t offse
} }
} }
void _mi_os_free_aligned_at_offset(void* p, size_t size, size_t alignment, size_t align_offset, bool was_committed, mi_stats_t* tld_stats) {
mi_assert(align_offset <= MI_SEGMENT_SIZE);
const size_t extra = _mi_align_up(align_offset, alignment) - align_offset;
void* start = (uint8_t*)p - extra;
_mi_os_free_ex(start, size + extra, was_committed, tld_stats);
}
/* ----------------------------------------------------------- /* -----------------------------------------------------------
OS memory API: reset, commit, decommit, protect, unprotect. OS memory API: reset, commit, decommit, protect, unprotect.
----------------------------------------------------------- */ ----------------------------------------------------------- */
@ -526,7 +545,8 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {
#endif #endif
// Allocate MI_SEGMENT_SIZE aligned huge pages // Allocate MI_SEGMENT_SIZE aligned huge pages
void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_msecs, size_t* pages_reserved, size_t* psize, bool* is_zero) { void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_msecs, size_t* pages_reserved, size_t* psize, mi_memid_t* memid) {
*memid = _mi_memid_none();
if (psize != NULL) *psize = 0; if (psize != NULL) *psize = 0;
if (pages_reserved != NULL) *pages_reserved = 0; if (pages_reserved != NULL) *pages_reserved = 0;
size_t size = 0; size_t size = 0;
@ -541,11 +561,11 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse
bool all_zero = true; bool all_zero = true;
while (page < pages) { while (page < pages) {
// allocate a page // allocate a page
bool is_zerox = false; bool is_zero = false;
void* addr = start + (page * MI_HUGE_OS_PAGE_SIZE); void* addr = start + (page * MI_HUGE_OS_PAGE_SIZE);
void* p = NULL; void* p = NULL;
int err = _mi_prim_alloc_huge_os_pages(addr, MI_HUGE_OS_PAGE_SIZE, numa_node, &is_zerox, &p); int err = _mi_prim_alloc_huge_os_pages(addr, MI_HUGE_OS_PAGE_SIZE, numa_node, &is_zero, &p);
if (!is_zerox) { all_zero = false; } if (!is_zero) { all_zero = false; }
if (err != 0) { if (err != 0) {
_mi_warning_message("unable to allocate huge OS page (error: %d (0x%x), address: %p, size: %zx bytes)\n", err, err, addr, MI_HUGE_OS_PAGE_SIZE); _mi_warning_message("unable to allocate huge OS page (error: %d (0x%x), address: %p, size: %zx bytes)\n", err, err, addr, MI_HUGE_OS_PAGE_SIZE);
break; break;
@ -556,7 +576,7 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse
// no success, issue a warning and break // no success, issue a warning and break
if (p != NULL) { if (p != NULL) {
_mi_warning_message("could not allocate contiguous huge OS page %zu at %p\n", page, addr); _mi_warning_message("could not allocate contiguous huge OS page %zu at %p\n", page, addr);
_mi_os_free(p, MI_HUGE_OS_PAGE_SIZE, &_mi_stats_main); mi_os_prim_free(p, MI_HUGE_OS_PAGE_SIZE, true, &_mi_stats_main);
} }
break; break;
} }
@ -584,17 +604,22 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse
mi_assert_internal(page*MI_HUGE_OS_PAGE_SIZE <= size); mi_assert_internal(page*MI_HUGE_OS_PAGE_SIZE <= size);
if (pages_reserved != NULL) { *pages_reserved = page; } if (pages_reserved != NULL) { *pages_reserved = page; }
if (psize != NULL) { *psize = page * MI_HUGE_OS_PAGE_SIZE; } if (psize != NULL) { *psize = page * MI_HUGE_OS_PAGE_SIZE; }
if (is_zero != NULL) { *is_zero = all_zero; } if (page != 0) {
mi_assert(start != NULL);
*memid = _mi_memid_create_os(true, all_zero);
memid->memkind = MI_MEM_OS_HUGE;
memid->is_pinned = true;
}
return (page == 0 ? NULL : start); return (page == 0 ? NULL : start);
} }
// free every huge page in a range individually (as we allocated per page) // free every huge page in a range individually (as we allocated per page)
// note: needed with VirtualAlloc but could potentially be done in one go on mmap'd systems. // note: needed with VirtualAlloc but could potentially be done in one go on mmap'd systems.
void _mi_os_free_huge_os_pages(void* p, size_t size, mi_stats_t* stats) { static void mi_os_free_huge_os_pages(void* p, size_t size, mi_stats_t* stats) {
if (p==NULL || size==0) return; if (p==NULL || size==0) return;
uint8_t* base = (uint8_t*)p; uint8_t* base = (uint8_t*)p;
while (size >= MI_HUGE_OS_PAGE_SIZE) { while (size >= MI_HUGE_OS_PAGE_SIZE) {
_mi_os_free(base, MI_HUGE_OS_PAGE_SIZE, stats); mi_os_prim_free(base, MI_HUGE_OS_PAGE_SIZE, true, stats);
size -= MI_HUGE_OS_PAGE_SIZE; size -= MI_HUGE_OS_PAGE_SIZE;
base += MI_HUGE_OS_PAGE_SIZE; base += MI_HUGE_OS_PAGE_SIZE;
} }

View file

@ -641,11 +641,6 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld)
// enable the new free list // enable the new free list
page->capacity += (uint16_t)extend; page->capacity += (uint16_t)extend;
mi_stat_increase(tld->stats.page_committed, extend * bsize); mi_stat_increase(tld->stats.page_committed, extend * bsize);
// extension into zero initialized memory preserves the zero'd free list
if (!page->is_zero_init) {
page->is_zero = false;
}
mi_assert_expensive(mi_page_is_valid_init(page)); mi_assert_expensive(mi_page_is_valid_init(page));
} }
@ -671,10 +666,11 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
page->keys[0] = _mi_heap_random_next(heap); page->keys[0] = _mi_heap_random_next(heap);
page->keys[1] = _mi_heap_random_next(heap); page->keys[1] = _mi_heap_random_next(heap);
#endif #endif
#if MI_DEBUG > 0
page->is_zero = false; // ensure in debug mode we initialize with MI_DEBUG_UNINIT, see issue #501
#else
page->is_zero = page->is_zero_init; page->is_zero = page->is_zero_init;
#if MI_DEBUG>1
if (page->is_zero_init) {
mi_assert(mi_mem_is_zero(page_start, page_size));
}
#endif #endif
mi_assert_internal(page->is_committed); mi_assert_internal(page->is_committed);

View file

@ -437,7 +437,7 @@ int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bo
int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) { int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) {
MI_UNUSED(hint_addr); MI_UNUSED(size); MI_UNUSED(numa_node); MI_UNUSED(hint_addr); MI_UNUSED(size); MI_UNUSED(numa_node);
*is_zero = true; *is_zero = false;
*addr = NULL; *addr = NULL;
return ENOMEM; return ENOMEM;
} }

View file

@ -125,7 +125,7 @@ static mi_segment_t* _mi_segment_of(const void* p) {
// Is this a valid pointer in our heap? // Is this a valid pointer in our heap?
static bool mi_is_valid_pointer(const void* p) { static bool mi_is_valid_pointer(const void* p) {
return (_mi_segment_of(p) != NULL); return ((_mi_segment_of(p) != NULL) || (_mi_arena_contains(p)));
} }
mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {

View file

@ -998,7 +998,7 @@ static mi_slice_t* mi_segment_page_clear(mi_page_t* page, mi_segments_tld_t* tld
// zero the page data, but not the segment fields // zero the page data, but not the segment fields
page->is_zero_init = false; page->is_zero_init = false;
ptrdiff_t ofs = offsetof(mi_page_t, capacity); ptrdiff_t ofs = offsetof(mi_page_t, capacity);
memset((uint8_t*)page + ofs, 0, sizeof(*page) - ofs); _mi_memzero((uint8_t*)page + ofs, sizeof(*page) - ofs);
page->xblock_size = 1; page->xblock_size = 1;
// and free it // and free it

View file

@ -20,6 +20,7 @@ terms of the MIT license.
#include <stdint.h> #include <stdint.h>
#include <stdbool.h> #include <stdbool.h>
#include <string.h> #include <string.h>
#include <assert.h>
// > mimalloc-test-stress [THREADS] [SCALE] [ITER] // > mimalloc-test-stress [THREADS] [SCALE] [ITER]
// //
@ -38,18 +39,18 @@ static int ITER = 50; // N full iterations destructing and re-creating a
#define STRESS // undefine for leak test #define STRESS // undefine for leak test
static bool allow_large_objects = true; // allow very large objects? static bool allow_large_objects = false; // allow very large objects? (set to `true` if SCALE>100)
static size_t use_one_size = 0; // use single object size of `N * sizeof(uintptr_t)`? static size_t use_one_size = 0; // use single object size of `N * sizeof(uintptr_t)`?
// #define USE_STD_MALLOC // #define USE_STD_MALLOC
#ifdef USE_STD_MALLOC #ifdef USE_STD_MALLOC
#define custom_calloc(n,s) malloc(n*s) #define custom_calloc(n,s) calloc(n,s)
#define custom_realloc(p,s) realloc(p,s) #define custom_realloc(p,s) realloc(p,s)
#define custom_free(p) free(p) #define custom_free(p) free(p)
#else #else
#include <mimalloc.h> #include <mimalloc.h>
#define custom_calloc(n,s) mi_malloc(n*s) #define custom_calloc(n,s) mi_calloc(n,s)
#define custom_realloc(p,s) mi_realloc(p,s) #define custom_realloc(p,s) mi_realloc(p,s)
#define custom_free(p) mi_free(p) #define custom_free(p) mi_free(p)
#endif #endif
@ -106,6 +107,7 @@ static void* alloc_items(size_t items, random_t r) {
uintptr_t* p = (uintptr_t*)custom_calloc(items,sizeof(uintptr_t)); uintptr_t* p = (uintptr_t*)custom_calloc(items,sizeof(uintptr_t));
if (p != NULL) { if (p != NULL) {
for (uintptr_t i = 0; i < items; i++) { for (uintptr_t i = 0; i < items; i++) {
assert(p[i] == 0);
p[i] = (items - i) ^ cookie; p[i] = (items - i) ^ cookie;
} }
} }
@ -247,7 +249,10 @@ int main(int argc, char** argv) {
long n = (strtol(argv[3], &end, 10)); long n = (strtol(argv[3], &end, 10));
if (n > 0) ITER = n; if (n > 0) ITER = n;
} }
printf("Using %d threads with a %d%% load-per-thread and %d iterations\n", THREADS, SCALE, ITER); if (SCALE > 100) {
allow_large_objects = true;
}
printf("Using %d threads with a %d%% load-per-thread and %d iterations %s\n", THREADS, SCALE, ITER, (allow_large_objects ? "(allow large objects)" : ""));
//mi_reserve_os_memory(1024*1024*1024ULL, false, true); //mi_reserve_os_memory(1024*1024*1024ULL, false, true);
//int res = mi_reserve_huge_os_pages(4,1); //int res = mi_reserve_huge_os_pages(4,1);
//printf("(reserve huge: %i\n)", res); //printf("(reserve huge: %i\n)", res);