From 10a6b303bebc53702de53fb36f5c786522d71450 Mon Sep 17 00:00:00 2001 From: daanx Date: Tue, 18 Apr 2023 13:34:23 -0700 Subject: [PATCH 1/4] review bitmap.c --- src/bitmap.c | 89 +++++++++++++++++++++++++++------------------------- 1 file changed, 47 insertions(+), 42 deletions(-) diff --git a/src/bitmap.c b/src/bitmap.c index bdd1c869..98b53052 100644 --- a/src/bitmap.c +++ b/src/bitmap.c @@ -11,7 +11,6 @@ represeted as an array of fields where each field is a machine word (`size_t`) There are two api's; the standard one cannot have sequences that cross between the bitmap fields (and a sequence must be <= MI_BITMAP_FIELD_BITS). -(this is used in region allocation) The `_across` postfixed functions do allow sequences that can cross over between the fields. (This is used in arena allocation) @@ -63,12 +62,12 @@ bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_ // scan linearly for a free range of zero bits while (bitidx <= bitidx_max) { - const size_t mapm = map & m; + const size_t mapm = (map & m); if (mapm == 0) { // are the mask bits free at bitidx? mi_assert_internal((m >> bitidx) == mask); // no overflow? - const size_t newmap = map | m; + const size_t newmap = (map | m); mi_assert_internal((newmap^map) >> bitidx == mask); - if (!mi_atomic_cas_weak_acq_rel(field, &map, newmap)) { // TODO: use strong cas here? + if (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)) { // TODO: use weak cas here? // no success, another thread claimed concurrently.. keep going (with updated `map`) continue; } @@ -81,7 +80,8 @@ bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_ else { // on to the next bit range #ifdef MI_HAVE_FAST_BITSCAN - const size_t shift = (count == 1 ? 1 : mi_bsr(mapm) - bitidx + 1); + mi_assert_internal(mapm != 0); + const size_t shift = (count == 1 ? 1 : (MI_INTPTR_BITS - mi_clz(mapm) - bitidx)); mi_assert_internal(shift > 0 && shift <= count); #else const size_t shift = 1; @@ -100,7 +100,7 @@ bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_ bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx) { size_t idx = start_field_idx; for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) { - if (idx >= bitmap_fields) idx = 0; // wrap + if (idx >= bitmap_fields) { idx = 0; } // wrap if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) { return true; } @@ -108,13 +108,6 @@ bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fiel return false; } -/* -// Find `count` bits of 0 and set them to 1 atomically; returns `true` on success. -// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never span fields. -bool _mi_bitmap_try_find_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t count, mi_bitmap_index_t* bitmap_idx) { - return _mi_bitmap_try_find_from_claim(bitmap, bitmap_fields, 0, count, bitmap_idx); -} -*/ // Set `count` bits at `bitmap_idx` to 0 atomically // Returns `true` if all `count` bits were 1 previously. @@ -124,7 +117,7 @@ bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, const size_t mask = mi_bitmap_mask_(count, bitidx); mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields); // mi_assert_internal((bitmap[idx] & mask) == mask); - size_t prev = mi_atomic_and_acq_rel(&bitmap[idx], ~mask); + const size_t prev = mi_atomic_and_acq_rel(&bitmap[idx], ~mask); return ((prev & mask) == mask); } @@ -138,7 +131,7 @@ bool _mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields); //mi_assert_internal(any_zero != NULL || (bitmap[idx] & mask) == 0); size_t prev = mi_atomic_or_acq_rel(&bitmap[idx], mask); - if (any_zero != NULL) *any_zero = ((prev & mask) != mask); + if (any_zero != NULL) { *any_zero = ((prev & mask) != mask); } return ((prev & mask) == 0); } @@ -148,8 +141,8 @@ static bool mi_bitmap_is_claimedx(mi_bitmap_t bitmap, size_t bitmap_fields, size const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); const size_t mask = mi_bitmap_mask_(count, bitidx); mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields); - size_t field = mi_atomic_load_relaxed(&bitmap[idx]); - if (any_ones != NULL) *any_ones = ((field & mask) != 0); + const size_t field = mi_atomic_load_relaxed(&bitmap[idx]); + if (any_ones != NULL) { *any_ones = ((field & mask) != 0); } return ((field & mask) == mask); } @@ -160,10 +153,13 @@ bool _mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); const size_t mask = mi_bitmap_mask_(count, bitidx); mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields); - size_t expected = 0; - if (mi_atomic_cas_strong_acq_rel(&bitmap[idx], &expected, mask)) return true; - if ((expected & mask) != 0) return false; - return mi_atomic_cas_strong_acq_rel(&bitmap[idx], &expected, expected | mask); + size_t expected = mi_atomic_load_relaxed(&bitmap[idx]); + do { + if ((expected & mask) != 0) return false; + } + while (!mi_atomic_cas_strong_acq_rel(&bitmap[idx], &expected, expected | mask)); + mi_assert_internal((expected & mask) == 0); + return true; } @@ -185,6 +181,7 @@ bool _mi_bitmap_is_any_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t // Try to atomically claim a sequence of `count` bits starting from the field // at `idx` in `bitmap` and crossing into subsequent fields. Returns `true` on success. +// Only needs to consider crossing into the next fields (see `mi_bitmap_try_find_from_claim_across`) static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t idx, const size_t count, const size_t retries, mi_bitmap_index_t* bitmap_idx) { mi_assert_internal(bitmap_idx != NULL); @@ -195,9 +192,9 @@ static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bit const size_t initial = mi_clz(map); // count of initial zeros starting at idx mi_assert_internal(initial <= MI_BITMAP_FIELD_BITS); if (initial == 0) return false; - if (initial >= count) return _mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx); // no need to cross fields + if (initial >= count) return _mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx); // no need to cross fields (this case won't happen for us) if (_mi_divide_up(count - initial, MI_BITMAP_FIELD_BITS) >= (bitmap_fields - idx)) return false; // not enough entries - + // scan ahead size_t found = initial; size_t mask = 0; // mask bits for the final field @@ -205,25 +202,27 @@ static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bit field++; map = mi_atomic_load_relaxed(field); const size_t mask_bits = (found + MI_BITMAP_FIELD_BITS <= count ? MI_BITMAP_FIELD_BITS : (count - found)); + mi_assert_internal(mask_bits > 0 && mask_bits <= MI_BITMAP_FIELD_BITS); mask = mi_bitmap_mask_(mask_bits, 0); - if ((map & mask) != 0) return false; + if ((map & mask) != 0) return false; // some part is already claimed found += mask_bits; } mi_assert_internal(field < &bitmap[bitmap_fields]); - // found range of zeros up to the final field; mask contains mask in the final field - // now claim it atomically + // we found a range of contiguous zeros up to the final field; mask contains mask in the final field + // now try to claim the range atomically mi_bitmap_field_t* const final_field = field; const size_t final_mask = mask; mi_bitmap_field_t* const initial_field = &bitmap[idx]; - const size_t initial_mask = mi_bitmap_mask_(initial, MI_BITMAP_FIELD_BITS - initial); + const size_t initial_idx = MI_BITMAP_FIELD_BITS - initial; + const size_t initial_mask = mi_bitmap_mask_(initial, initial_idx); // initial field size_t newmap; field = initial_field; map = mi_atomic_load_relaxed(field); do { - newmap = map | initial_mask; + newmap = (map | initial_mask); if ((map & initial_mask) != 0) { goto rollback; }; } while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)); @@ -238,31 +237,32 @@ static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bit mi_assert_internal(field == final_field); map = mi_atomic_load_relaxed(field); do { - newmap = map | final_mask; + newmap = (map | final_mask); if ((map & final_mask) != 0) { goto rollback; } } while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)); // claimed! - *bitmap_idx = mi_bitmap_index_create(idx, MI_BITMAP_FIELD_BITS - initial); + *bitmap_idx = mi_bitmap_index_create(idx, initial_idx); return true; rollback: // roll back intermediate fields + // (we just failed to claim `field` so decrement first) while (--field > initial_field) { newmap = 0; map = mi_bitmap_mask_(MI_BITMAP_FIELD_BITS, 0); mi_assert_internal(mi_atomic_load_relaxed(field) == map); mi_atomic_store_release(field, newmap); } - if (field == initial_field) { + if (field == initial_field) { // (if we failed on the initial field, `field + 1 == initial_field`) map = mi_atomic_load_relaxed(field); do { mi_assert_internal((map & initial_mask) == initial_mask); - newmap = map & ~initial_mask; + newmap = (map & ~initial_mask); } while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)); } // retry? (we make a recursive call instead of goto to be able to use const declarations) - if (retries < 4) { + if (retries <= 2) { return mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, retries+1, bitmap_idx); } else { @@ -275,17 +275,22 @@ rollback: // Starts at idx, and wraps around to search in all `bitmap_fields` fields. bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx) { mi_assert_internal(count > 0); - if (count==1) return _mi_bitmap_try_find_from_claim(bitmap, bitmap_fields, start_field_idx, count, bitmap_idx); + if (count <= 2) { + // we don't bother with crossover fields for small counts + return _mi_bitmap_try_find_from_claim(bitmap, bitmap_fields, start_field_idx, count, bitmap_idx); + } + + // visit the fields size_t idx = start_field_idx; for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) { - if (idx >= bitmap_fields) idx = 0; // wrap - // try to claim inside the field + if (idx >= bitmap_fields) { idx = 0; } // wrap + // first try to claim inside a field if (count <= MI_BITMAP_FIELD_BITS) { if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) { return true; } } - // try to claim across fields + // if that fails, then try to claim across fields if (mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, 0, bitmap_idx)) { return true; } @@ -328,14 +333,14 @@ bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask); bool all_one = true; mi_bitmap_field_t* field = &bitmap[idx]; - size_t prev = mi_atomic_and_acq_rel(field++, ~pre_mask); + size_t prev = mi_atomic_and_acq_rel(field++, ~pre_mask); // clear first part if ((prev & pre_mask) != pre_mask) all_one = false; while(mid_count-- > 0) { - prev = mi_atomic_and_acq_rel(field++, ~mid_mask); + prev = mi_atomic_and_acq_rel(field++, ~mid_mask); // clear mid part if ((prev & mid_mask) != mid_mask) all_one = false; } if (post_mask!=0) { - prev = mi_atomic_and_acq_rel(field, ~post_mask); + prev = mi_atomic_and_acq_rel(field, ~post_mask); // clear end part if ((prev & post_mask) != post_mask) all_one = false; } return all_one; @@ -365,7 +370,7 @@ bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t co if ((prev & post_mask) != 0) all_zero = false; if ((prev & post_mask) != post_mask) any_zero = true; } - if (pany_zero != NULL) *pany_zero = any_zero; + if (pany_zero != NULL) { *pany_zero = any_zero; } return all_zero; } @@ -394,7 +399,7 @@ static bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_field if ((prev & post_mask) != post_mask) all_ones = false; if ((prev & post_mask) != 0) any_ones = true; } - if (pany_ones != NULL) *pany_ones = any_ones; + if (pany_ones != NULL) { *pany_ones = any_ones; } return all_ones; } From 2f878354f401baf6ac4c4d6c5a0bb53d3356590b Mon Sep 17 00:00:00 2001 From: daanx Date: Tue, 18 Apr 2023 14:13:55 -0700 Subject: [PATCH 2/4] add arena contains check for valid pointers --- src/bitmap.c | 2 +- src/segment-map.c | 2 +- test/test-stress.c | 9 ++++++--- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/bitmap.c b/src/bitmap.c index 98b53052..01064140 100644 --- a/src/bitmap.c +++ b/src/bitmap.c @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2019-2021 Microsoft Research, Daan Leijen +Copyright (c) 2019-2023 Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. diff --git a/src/segment-map.c b/src/segment-map.c index 56b18531..4c2104bd 100644 --- a/src/segment-map.c +++ b/src/segment-map.c @@ -125,7 +125,7 @@ static mi_segment_t* _mi_segment_of(const void* p) { // Is this a valid pointer in our heap? static bool mi_is_valid_pointer(const void* p) { - return (_mi_segment_of(p) != NULL); + return ((_mi_segment_of(p) != NULL) || (_mi_arena_contains(p))); } mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { diff --git a/test/test-stress.c b/test/test-stress.c index c0de8910..0d6d0bc9 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -38,8 +38,8 @@ static int ITER = 50; // N full iterations destructing and re-creating a #define STRESS // undefine for leak test -static bool allow_large_objects = true; // allow very large objects? -static size_t use_one_size = 0; // use single object size of `N * sizeof(uintptr_t)`? +static bool allow_large_objects = false; // allow very large objects? (set to `true` if SCALE>100) +static size_t use_one_size = 0; // use single object size of `N * sizeof(uintptr_t)`? // #define USE_STD_MALLOC @@ -244,7 +244,10 @@ int main(int argc, char** argv) { long n = (strtol(argv[3], &end, 10)); if (n > 0) ITER = n; } - printf("Using %d threads with a %d%% load-per-thread and %d iterations\n", THREADS, SCALE, ITER); + if (SCALE > 100) { + allow_large_objects = true; + } + printf("Using %d threads with a %d%% load-per-thread and %d iterations %s\n", THREADS, SCALE, ITER, (allow_large_objects ? "(allow large objects)" : "")); //mi_reserve_os_memory(1024*1024*1024ULL, false, true); //int res = mi_reserve_huge_os_pages(4,1); //printf("(reserve huge: %i\n)", res); From b845be241aec2cb3e3dfd2e9fb45bbb5233ea459 Mon Sep 17 00:00:00 2001 From: daanx Date: Tue, 18 Apr 2023 16:38:47 -0700 Subject: [PATCH 3/4] wip: use memid for OS allocation --- include/mimalloc/internal.h | 48 ++++++++--- include/mimalloc/types.h | 9 +- src/arena.c | 168 ++++++++++++------------------------ src/init.c | 14 ++- src/os.c | 115 ++++++++++++++---------- 5 files changed, 178 insertions(+), 176 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index e0bcfaea..b320f690 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -88,9 +88,9 @@ void _mi_thread_data_collect(void); // os.c void _mi_os_init(void); // called from process init -void* _mi_os_alloc(size_t size, bool* is_zero, mi_stats_t* stats); -void _mi_os_free(void* p, size_t size, mi_stats_t* stats); -void _mi_os_free_ex(void* p, size_t size, bool is_committed, mi_stats_t* stats); +void* _mi_os_alloc(size_t size, mi_memid_t* memid, mi_stats_t* stats); +void _mi_os_free(void* p, size_t size, mi_memid_t memid, mi_stats_t* stats); +void _mi_os_free_ex(void* p, size_t size, bool still_committed, mi_memid_t memid, mi_stats_t* stats); size_t _mi_os_page_size(void); size_t _mi_os_good_alloc_size(size_t size); @@ -105,16 +105,14 @@ bool _mi_os_unprotect(void* addr, size_t size); bool _mi_os_purge(void* p, size_t size, mi_stats_t* stats); bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, mi_stats_t* stats); -void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* stats); -void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* tld_stats); -void _mi_os_free_aligned_at_offset(void* p, size_t size, size_t alignment, size_t align_offset, bool was_committed, mi_stats_t* tld_stats); +void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* stats); +void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* tld_stats); void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size); bool _mi_os_use_large_page(size_t size, size_t alignment); size_t _mi_os_large_page_size(void); -void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize, bool* is_zero); -void _mi_os_free_huge_os_pages(void* p, size_t size, mi_stats_t* stats); +void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize, mi_memid_t* memid); // arena.c mi_arena_id_t _mi_arena_id_none(void); @@ -268,6 +266,10 @@ bool _mi_page_is_valid(mi_page_t* page); #define MI_INIT256(x) MI_INIT128(x),MI_INIT128(x) +#include +// initialize a local variable to zero; use memset as compilers optimize constant sized memset's +#define _mi_memzero_var(x) memset(&x,0,sizeof(x)) + // Is `x` a power of two? (0 is considered a power of two) static inline bool _mi_is_power_of_two(uintptr_t x) { return ((x & (x - 1)) == 0); @@ -653,6 +655,30 @@ static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, c #endif } + +/* ----------------------------------------------------------- + memory id's +----------------------------------------------------------- */ + +static inline mi_memid_t _mi_memid_create(mi_memkind_t memkind) { + mi_memid_t memid; + _mi_memzero_var(memid); + memid.memkind = memkind; + return memid; +} + +static inline mi_memid_t _mi_memid_none(void) { + return _mi_memid_create(MI_MEM_NONE); +} + +static inline mi_memid_t _mi_memid_create_os(bool committed, bool is_zero) { + mi_memid_t memid = _mi_memid_create(MI_MEM_OS); + memid.was_committed = committed; + memid.was_zero = is_zero; + return memid; +} + + // ------------------------------------------------------------------- // Fast "random" shuffle // ------------------------------------------------------------------- @@ -812,7 +838,6 @@ static inline size_t mi_bsr(uintptr_t x) { #if !MI_TRACK_ENABLED && defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64)) #include -#include extern bool _mi_cpu_has_fsrm; static inline void _mi_memcpy(void* dst, const void* src, size_t n) { if (_mi_cpu_has_fsrm) { @@ -831,7 +856,6 @@ static inline void _mi_memzero(void* dst, size_t n) { } } #else -#include static inline void _mi_memcpy(void* dst, const void* src, size_t n) { memcpy(dst, src, n); } @@ -840,9 +864,6 @@ static inline void _mi_memzero(void* dst, size_t n) { } #endif -// initialize a local variable to zero; use memset as compilers optimize constant sized memset's -#define _mi_memzero_var(x) memset(&x,0,sizeof(x)) - // ------------------------------------------------------------------------------- // The `_mi_memcpy_aligned` can be used if the pointers are machine-word aligned // This is used for example in `mi_realloc`. @@ -850,7 +871,6 @@ static inline void _mi_memzero(void* dst, size_t n) { #if (defined(__GNUC__) && (__GNUC__ >= 4)) || defined(__clang__) // On GCC/CLang we provide a hint that the pointers are word aligned. -#include static inline void _mi_memcpy_aligned(void* dst, const void* src, size_t n) { mi_assert_internal(((uintptr_t)dst % MI_INTPTR_SIZE == 0) && ((uintptr_t)src % MI_INTPTR_SIZE == 0)); void* adst = __builtin_assume_aligned(dst, MI_INTPTR_SIZE); diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index 85d7e813..60408ed3 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -330,12 +330,17 @@ typedef enum mi_memkind_e { MI_MEM_EXTERNAL, // not owned by mimalloc but provided externally (via `mi_manage_os_memory` for example) MI_MEM_STATIC, // allocated in a static area and should not be freed (for arena meta data for example) MI_MEM_OS, // allocated from the OS + MI_MEM_OS_HUGE, // allocated as huge os pages + MI_MEM_OS_REMAP, // allocated in a remapable area (i.e. using `mremap`) MI_MEM_ARENA // allocated from an arena (the usual case) } mi_memkind_t; +static inline bool mi_memkind_is_os(mi_memkind_t memkind) { + return (memkind >= MI_MEM_OS && memkind <= MI_MEM_OS_REMAP); +} + typedef struct mi_memid_os_info { - size_t alignment; // allocated with the given alignment - size_t align_offset; // the offset that was aligned (used only for huge aligned pages) + void* base; // actual base address of the block (used for offset aligned allocations) } mi_memid_os_info_t; typedef struct mi_memid_arena_info { diff --git a/src/arena.c b/src/arena.c index d3ffe3b7..04d5c49c 100644 --- a/src/arena.c +++ b/src/arena.c @@ -41,18 +41,15 @@ typedef uintptr_t mi_block_info_t; // A memory arena descriptor typedef struct mi_arena_s { mi_arena_id_t id; // arena id; 0 for non-specific - bool exclusive; // only allow allocations if specifically for this arena - bool owned; // if true, the arena will be released when the process exits if `mi_option_destroy_on_exit` is set. + mi_memid_t memid; // memid of the memory area _Atomic(uint8_t*) start; // the start of the memory area size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`) size_t field_count; // number of bitmap fields (where `field_count * MI_BITMAP_FIELD_BITS >= block_count`) size_t meta_size; // size of the arena structure itself (including its bitmaps) mi_memid_t meta_memid; // memid of the arena structure itself (OS or static allocation) int numa_node; // associated NUMA node - bool is_zero_init; // is the arena zero initialized? - bool is_large; // large- or huge OS pages (always committed) - bool is_huge_alloc; // huge OS pages allocated by `_mi_os_alloc_huge_pages` - bool allow_decommit; // is decommit allowed? if true, is_large should be false and blocks_committed != NULL + bool exclusive; // only allow allocations if specifically for this arena + bool is_large; // memory area consists of large- or huge OS pages (always committed) _Atomic(size_t) search_idx; // optimization to start the search for free blocks _Atomic(mi_msecs_t) purge_expire; // expiration time when blocks should be decommitted from `blocks_decommit`. mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero? @@ -67,7 +64,7 @@ static mi_decl_cache_align _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS]; static mi_decl_cache_align _Atomic(size_t) mi_arena_count; // = 0 -static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_committed, bool is_large, bool is_huge_alloc, bool is_zero, int numa_node, bool exclusive, bool owned, mi_arena_id_t* arena_id) mi_attr_noexcept; +//static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int numa_node, bool exclusive, mi_memid_t memid, mi_arena_id_t* arena_id) mi_attr_noexcept; /* ----------------------------------------------------------- Arena id's @@ -92,27 +89,6 @@ static bool mi_arena_id_is_suitable(mi_arena_id_t arena_id, bool arena_is_exclus (arena_id == req_arena_id)); } -/* ----------------------------------------------------------- - memory id's ------------------------------------------------------------ */ - -static mi_memid_t mi_memid_create(mi_memkind_t memkind) { - mi_memid_t memid; - _mi_memzero_var(memid); - memid.memkind = memkind; - return memid; -} - -static mi_memid_t mi_memid_none(void) { - return mi_memid_create(MI_MEM_NONE); -} - -static mi_memid_t mi_memid_create_os(bool committed) { - mi_memid_t memid = mi_memid_create(MI_MEM_OS); - memid.was_committed = committed; - return memid; -} - bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_id_t request_arena_id) { if (memid.memkind == MI_MEM_ARENA) { return mi_arena_id_is_suitable(memid.mem.arena.id, memid.mem.arena.is_exclusive, request_arena_id); @@ -123,7 +99,6 @@ bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_id_t request_arena_i } - /* ----------------------------------------------------------- Arena allocations get a (currently) 16-bit memory id where the lower 8 bits are the arena id, and the upper bits the block index. @@ -142,7 +117,7 @@ static size_t mi_arena_size(mi_arena_t* arena) { } static mi_memid_t mi_memid_create_arena(mi_arena_id_t id, bool is_exclusive, mi_bitmap_index_t bitmap_index) { - mi_memid_t memid = mi_memid_create(MI_MEM_ARENA); + mi_memid_t memid = _mi_memid_create(MI_MEM_ARENA); memid.mem.arena.id = id; memid.mem.arena.block_index = bitmap_index; memid.mem.arena.is_exclusive = is_exclusive; @@ -169,7 +144,7 @@ static uint8_t mi_arena_static[MI_ARENA_STATIC_MAX]; static _Atomic(size_t) mi_arena_static_top; static void* mi_arena_static_zalloc(size_t size, size_t alignment, mi_memid_t* memid) { - *memid = mi_memid_none(); + *memid = _mi_memid_none(); if (size == 0 || size > MI_ARENA_STATIC_MAX) return NULL; if (mi_atomic_load_relaxed(&mi_arena_static_top) >= MI_ARENA_STATIC_MAX) return NULL; @@ -186,7 +161,7 @@ static void* mi_arena_static_zalloc(size_t size, size_t alignment, mi_memid_t* m } // success - *memid = mi_memid_create(MI_MEM_STATIC); + *memid = _mi_memid_create(MI_MEM_STATIC); const size_t start = _mi_align_up(oldtop, alignment); uint8_t* const p = &mi_arena_static[start]; _mi_memzero(p, size); @@ -194,27 +169,22 @@ static void* mi_arena_static_zalloc(size_t size, size_t alignment, mi_memid_t* m } static void* mi_arena_meta_zalloc(size_t size, mi_memid_t* memid, mi_stats_t* stats) { - *memid = mi_memid_none(); + *memid = _mi_memid_none(); // try static void* p = mi_arena_static_zalloc(size, MI_ALIGNMENT_MAX, memid); if (p != NULL) return p; // or fall back to the OS - bool is_zero = false; - p = _mi_os_alloc(size, &is_zero, stats); - if (p != NULL) { - *memid = mi_memid_create_os(true); - if (!is_zero) { _mi_memzero_aligned(p, size); } - return p; - } - - return NULL; + return _mi_os_alloc(size, memid, stats); } static void mi_arena_meta_free(void* p, mi_memid_t memid, size_t size, mi_stats_t* stats) { - if (memid.memkind == MI_MEM_OS) { - _mi_os_free(p, size, stats); + if (mi_memkind_is_os(memid.memkind)) { + _mi_os_free(p, size, memid, stats); + } + else { + mi_assert(memid.memkind == MI_MEM_STATIC); } } @@ -252,7 +222,7 @@ static mi_decl_noinline void* mi_arena_alloc_at(mi_arena_t* arena, size_t arena_ // claimed it! void* p = arena->start + mi_arena_block_size(mi_bitmap_index_bit(bitmap_index)); *memid = mi_memid_create_arena(arena->id, arena->exclusive, bitmap_index); - memid->is_pinned = (arena->is_large || !arena->allow_decommit); + memid->is_pinned = arena->memid.is_pinned; // none of the claimed blocks should be scheduled for a decommit if (arena->blocks_purge != NULL) { @@ -404,7 +374,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset { mi_assert_internal(memid != NULL && tld != NULL); mi_assert_internal(size > 0); - *memid = mi_memid_none(); + *memid = _mi_memid_none(); const int numa_node = _mi_os_numa_node(tld); // current numa node @@ -429,17 +399,12 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset } // finally, fall back to the OS - bool os_is_large = false; - bool os_is_zero = false; - void* p = _mi_os_alloc_aligned_at_offset(size, alignment, align_offset, commit, allow_large, &os_is_large, &os_is_zero, tld->stats); - if (p != NULL) { - *memid = mi_memid_create_os(commit); - memid->is_pinned = os_is_large; - memid->was_zero = os_is_zero; - memid->mem.os.alignment = alignment; - memid->mem.os.align_offset = align_offset; + if (align_offset > 0) { + return _mi_os_alloc_aligned_at_offset(size, alignment, align_offset, commit, allow_large, memid, tld->stats); + } + else { + return _mi_os_alloc_aligned(size, alignment, commit, allow_large, memid, tld->stats); } - return p; } void* _mi_arena_alloc(size_t size, bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld) @@ -467,7 +432,7 @@ void* mi_arena_area(mi_arena_id_t arena_id, size_t* size) { static void mi_arena_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks, mi_stats_t* stats) { mi_assert_internal(arena->blocks_committed != NULL); mi_assert_internal(arena->blocks_purge != NULL); - mi_assert_internal(arena->allow_decommit); + mi_assert_internal(!arena->memid.is_pinned); const size_t size = mi_arena_block_size(blocks); void* const p = arena->start + mi_arena_block_size(mi_bitmap_index_bit(bitmap_idx)); bool needs_recommit; @@ -541,7 +506,7 @@ static bool mi_arena_purge_range(mi_arena_t* arena, size_t idx, size_t startidx, // returns true if anything was purged static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force, mi_stats_t* stats) { - if (!arena->allow_decommit || arena->blocks_purge == NULL) return false; + if (arena->memid.is_pinned || arena->blocks_purge == NULL) return false; mi_msecs_t expire = mi_atomic_loadi64_relaxed(&arena->purge_expire); if (expire == 0) return false; if (!force && expire > now) return false; @@ -631,18 +596,13 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi if (size==0) return; const bool all_committed = (committed_size == size); - if (memid.memkind == MI_MEM_OS) { + if (mi_memkind_is_os(memid.memkind)) { // was a direct OS allocation, pass through if (!all_committed && committed_size > 0) { // if partially committed, adjust the committed stats _mi_stat_decrease(&stats->committed, committed_size); } - if (memid.mem.os.align_offset != 0) { - _mi_os_free_aligned_at_offset(p, size, memid.mem.os.alignment, memid.mem.os.align_offset, all_committed, stats); - } - else { - _mi_os_free(p, size, stats); - } + _mi_os_free(p, size, memid, stats); } else if (memid.memkind == MI_MEM_ARENA) { // allocated in an arena @@ -669,7 +629,7 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi mi_track_mem_undefined(p,size); // potentially decommit - if (!arena->allow_decommit || arena->blocks_committed == NULL) { + if (arena->memid.is_pinned || arena->blocks_committed == NULL) { mi_assert_internal(all_committed); } else { @@ -717,14 +677,9 @@ static void mi_arenas_unsafe_destroy(void) { for (size_t i = 0; i < max_arena; i++) { mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]); if (arena != NULL) { - if (arena->owned && arena->start != NULL) { + if (arena->start != NULL && mi_memkind_is_os(arena->memid.memkind)) { mi_atomic_store_ptr_release(mi_arena_t, &mi_arenas[i], NULL); - if (arena->is_huge_alloc) { - _mi_os_free_huge_os_pages(arena->start, mi_arena_size(arena), &_mi_stats_main); - } - else { - _mi_os_free(arena->start, mi_arena_size(arena), &_mi_stats_main); - } + _mi_os_free(arena->start, mi_arena_size(arena), arena->memid, &_mi_stats_main); } else { new_max_arena = i; @@ -784,21 +739,18 @@ static bool mi_arena_add(mi_arena_t* arena, mi_arena_id_t* arena_id) { return true; } -static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_committed, bool is_large, bool is_huge_alloc, bool is_zero, int numa_node, bool exclusive, bool owned, mi_arena_id_t* arena_id) mi_attr_noexcept +static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int numa_node, bool exclusive, mi_memid_t memid, mi_arena_id_t* arena_id) mi_attr_noexcept { if (arena_id != NULL) *arena_id = _mi_arena_id_none(); if (size < MI_ARENA_BLOCK_SIZE) return false; if (is_large) { - mi_assert_internal(is_committed); - is_committed = true; + mi_assert_internal(memid.was_committed && memid.is_pinned); } - const bool allow_decommit = !is_large; // && !is_committed; // only allow decommit for initially uncommitted memory - const size_t bcount = size / MI_ARENA_BLOCK_SIZE; const size_t fields = _mi_divide_up(bcount, MI_BITMAP_FIELD_BITS); - const size_t bitmaps = (allow_decommit ? 4 : 2); + const size_t bitmaps = (memid.is_pinned ? 2 : 4); const size_t asize = sizeof(mi_arena_t) + (bitmaps*fields*sizeof(mi_bitmap_field_t)); mi_memid_t meta_memid; mi_arena_t* arena = (mi_arena_t*)mi_arena_meta_zalloc(asize, &meta_memid, &_mi_stats_main); // TODO: can we avoid allocating from the OS? @@ -807,8 +759,8 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_committed, // already zero'd due to os_alloc // _mi_memzero(arena, asize); arena->id = _mi_arena_id_none(); + arena->memid = memid; arena->exclusive = exclusive; - arena->owned = owned; arena->meta_size = asize; arena->meta_memid = meta_memid; arena->block_count = bcount; @@ -816,16 +768,13 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_committed, arena->start = (uint8_t*)start; arena->numa_node = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1) arena->is_large = is_large; - arena->is_huge_alloc= is_huge_alloc; - arena->is_zero_init = is_zero; - arena->allow_decommit = allow_decommit; arena->purge_expire = 0; arena->search_idx = 0; arena->blocks_dirty = &arena->blocks_inuse[fields]; // just after inuse bitmap - arena->blocks_committed = (!arena->allow_decommit ? NULL : &arena->blocks_inuse[2*fields]); // just after dirty bitmap - arena->blocks_purge = (!arena->allow_decommit ? NULL : &arena->blocks_inuse[3*fields]); // just after committed bitmap + arena->blocks_committed = (arena->memid.is_pinned ? NULL : &arena->blocks_inuse[2*fields]); // just after dirty bitmap + arena->blocks_purge = (arena->memid.is_pinned ? NULL : &arena->blocks_inuse[3*fields]); // just after committed bitmap // initialize committed bitmap? - if (arena->blocks_committed != NULL && is_committed) { + if (arena->blocks_committed != NULL && arena->memid.was_committed) { memset((void*)arena->blocks_committed, 0xFF, fields*sizeof(mi_bitmap_field_t)); // cast to void* to avoid atomic warning } @@ -842,31 +791,28 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_committed, } bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept { - return mi_manage_os_memory_ex2(start,size,is_committed,is_large,false,is_zero,numa_node,exclusive,false /* not owned */, arena_id); -} - - -// Reserve a range of regular OS memory -static int mi_reserve_os_memory_ex2(size_t size, bool commit, bool allow_large, bool exclusive, bool owned, mi_arena_id_t* arena_id) mi_attr_noexcept -{ - if (arena_id != NULL) *arena_id = _mi_arena_id_none(); - size = _mi_align_up(size, MI_ARENA_BLOCK_SIZE); // at least one block - bool is_large = false; - bool is_zero = false; - void* start = _mi_os_alloc_aligned(size, MI_SEGMENT_ALIGN, commit, allow_large, &is_large, &is_zero, &_mi_stats_main); - if (start==NULL) return ENOMEM; - if (!mi_manage_os_memory_ex2(start, size, (is_large || commit), is_large, false, is_zero, -1, exclusive, owned, arena_id)) { - _mi_os_free_ex(start, size, commit, &_mi_stats_main); - _mi_verbose_message("failed to reserve %zu k memory\n", _mi_divide_up(size,1024)); - return ENOMEM; - } - _mi_verbose_message("reserved %zu KiB memory%s\n", _mi_divide_up(size,1024), is_large ? " (in large os pages)" : ""); - return 0; + mi_memid_t memid = _mi_memid_create(MI_MEM_EXTERNAL); + memid.was_committed = is_committed; + memid.was_zero = is_zero; + memid.is_pinned = is_large; + return mi_manage_os_memory_ex2(start,size,is_large,numa_node,exclusive,memid, arena_id); } // Reserve a range of regular OS memory int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept { - return mi_reserve_os_memory_ex2(size,commit,allow_large,exclusive,true /*owned*/, arena_id); + if (arena_id != NULL) *arena_id = _mi_arena_id_none(); + size = _mi_align_up(size, MI_ARENA_BLOCK_SIZE); // at least one block + mi_memid_t memid; + void* start = _mi_os_alloc_aligned(size, MI_SEGMENT_ALIGN, commit, allow_large, &memid, &_mi_stats_main); + if (start == NULL) return ENOMEM; + const bool is_large = memid.is_pinned; // todo: use separate is_large field? + if (!mi_manage_os_memory_ex2(start, size, is_large, -1 /* numa node */, exclusive, memid, arena_id)) { + _mi_os_free_ex(start, size, commit, memid, &_mi_stats_main); + _mi_verbose_message("failed to reserve %zu k memory\n", _mi_divide_up(size, 1024)); + return ENOMEM; + } + _mi_verbose_message("reserved %zu KiB memory%s\n", _mi_divide_up(size, 1024), is_large ? " (in large os pages)" : ""); + return 0; } @@ -925,16 +871,16 @@ int mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_m if (numa_node >= 0) numa_node = numa_node % _mi_os_numa_node_count(); size_t hsize = 0; size_t pages_reserved = 0; - bool is_zero = false; - void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, timeout_msecs, &pages_reserved, &hsize, &is_zero); + mi_memid_t memid; + void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, timeout_msecs, &pages_reserved, &hsize, &memid); if (p==NULL || pages_reserved==0) { _mi_warning_message("failed to reserve %zu GiB huge pages\n", pages); return ENOMEM; } _mi_verbose_message("numa node %i: reserved %zu GiB huge pages (of the %zu GiB requested)\n", numa_node, pages_reserved, pages); - if (!mi_manage_os_memory_ex2(p, hsize, true, true, true, is_zero, numa_node, exclusive, true /* owned */, arena_id)) { - _mi_os_free_huge_os_pages(p, hsize, &_mi_stats_main); + if (!mi_manage_os_memory_ex2(p, hsize, true, numa_node, exclusive, memid, arena_id)) { + _mi_os_free(p, hsize, memid, &_mi_stats_main); return ENOMEM; } return 0; diff --git a/src/init.c b/src/init.c index 78618627..cd2fd643 100644 --- a/src/init.c +++ b/src/init.c @@ -177,6 +177,7 @@ mi_heap_t* _mi_heap_main_get(void) { typedef struct mi_thread_data_s { mi_heap_t heap; // must come first due to cast in `_mi_heap_done` mi_tld_t tld; + mi_memid_t memid; } mi_thread_data_t; @@ -205,15 +206,20 @@ static mi_thread_data_t* mi_thread_data_zalloc(void) { // if that fails, allocate as meta data if (td == NULL) { - td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &is_zero, &_mi_stats_main); + mi_memid_t memid; + td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &memid, &_mi_stats_main); if (td == NULL) { // if this fails, try once more. (issue #257) - td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &is_zero, &_mi_stats_main); + td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &memid, &_mi_stats_main); if (td == NULL) { // really out of memory _mi_error_message(ENOMEM, "unable to allocate thread local heap metadata (%zu bytes)\n", sizeof(mi_thread_data_t)); } } + if (td != NULL) { + td->memid = memid; + is_zero = memid.was_zero; + } } if (td != NULL && !is_zero) { @@ -234,7 +240,7 @@ static void mi_thread_data_free( mi_thread_data_t* tdfree ) { } } // if that fails, just free it directly - _mi_os_free(tdfree, sizeof(mi_thread_data_t), &_mi_stats_main); + _mi_os_free(tdfree, sizeof(mi_thread_data_t), tdfree->memid, &_mi_stats_main); } void _mi_thread_data_collect(void) { @@ -244,7 +250,7 @@ void _mi_thread_data_collect(void) { if (td != NULL) { td = mi_atomic_exchange_ptr_acq_rel(mi_thread_data_t, &td_cache[i], NULL); if (td != NULL) { - _mi_os_free( td, sizeof(mi_thread_data_t), &_mi_stats_main ); + _mi_os_free(td, sizeof(mi_thread_data_t), td->memid, &_mi_stats_main); } } } diff --git a/src/os.c b/src/os.c index cf53472f..589399a4 100644 --- a/src/os.c +++ b/src/os.c @@ -148,7 +148,9 @@ void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size) { Free memory -------------------------------------------------------------- */ -static void mi_os_mem_free(void* addr, size_t size, bool was_committed, mi_stats_t* tld_stats) { +static void mi_os_free_huge_os_pages(void* p, size_t size, mi_stats_t* stats); + +static void mi_os_prim_free(void* addr, size_t size, bool still_committed, mi_stats_t* tld_stats) { MI_UNUSED(tld_stats); mi_assert_internal((size % _mi_os_page_size()) == 0); if (addr == NULL || size == 0) return; // || _mi_os_is_huge_reserved(addr) @@ -157,18 +159,34 @@ static void mi_os_mem_free(void* addr, size_t size, bool was_committed, mi_stats _mi_warning_message("unable to free OS memory (error: %d (0x%x), size: 0x%zx bytes, address: %p)\n", err, err, size, addr); } mi_stats_t* stats = &_mi_stats_main; - if (was_committed) { _mi_stat_decrease(&stats->committed, size); } + if (still_committed) { _mi_stat_decrease(&stats->committed, size); } _mi_stat_decrease(&stats->reserved, size); } - -void _mi_os_free_ex(void* addr, size_t size, bool was_committed, mi_stats_t* tld_stats) { - const size_t csize = _mi_os_good_alloc_size(size); - mi_os_mem_free(addr,csize,was_committed,tld_stats); +void _mi_os_free_ex(void* addr, size_t size, bool still_committed, mi_memid_t memid, mi_stats_t* tld_stats) { + size_t csize = _mi_os_good_alloc_size(size); + if (memid.memkind == MI_MEM_OS) { + if (memid.mem.os.base != NULL) { + mi_assert(memid.mem.os.base <= addr); + csize += ((uint8_t*)addr - (uint8_t*)memid.mem.os.base); + mi_os_prim_free(memid.mem.os.base, csize, still_committed, tld_stats); + } + else { + mi_os_prim_free(addr, csize, still_committed, tld_stats); + } + } + else if (memid.memkind == MI_MEM_OS_HUGE) { + mi_assert(memid.is_pinned); + mi_os_free_huge_os_pages(addr, size, tld_stats); + } + else { + // nothing to do + mi_assert(memid.memkind <= MI_MEM_EXTERNAL); + } } -void _mi_os_free(void* p, size_t size, mi_stats_t* tld_stats) { - _mi_os_free_ex(p, size, true, tld_stats); +void _mi_os_free(void* p, size_t size, mi_memid_t memid, mi_stats_t* tld_stats) { + _mi_os_free_ex(p, size, true, memid, tld_stats); } @@ -177,7 +195,7 @@ void _mi_os_free(void* p, size_t size, mi_stats_t* tld_stats) { -------------------------------------------------------------- */ // Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned. -static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* stats) { +static void* mi_os_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* stats) { mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0); mi_assert_internal(is_zero != NULL); mi_assert_internal(is_large != NULL); @@ -202,7 +220,7 @@ static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, boo // Primitive aligned allocation from the OS. // This function guarantees the allocated memory is aligned. -static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* stats) { +static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* stats) { mi_assert_internal(alignment >= _mi_os_page_size() && ((alignment & (alignment - 1)) == 0)); mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0); mi_assert_internal(is_large != NULL); @@ -212,19 +230,19 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, size = _mi_align_up(size, _mi_os_page_size()); // try first with a hint (this will be aligned directly on Win 10+ or BSD) - void* p = mi_os_mem_alloc(size, alignment, commit, allow_large, is_large, is_zero, stats); + void* p = mi_os_prim_alloc(size, alignment, commit, allow_large, is_large, is_zero, stats); if (p == NULL) return NULL; // if not aligned, free it, overallocate, and unmap around it if (((uintptr_t)p % alignment != 0)) { _mi_warning_message("unable to allocate aligned OS memory directly, fall back to over-allocation (size: 0x%zx bytes, address: %p, alignment: 0x%zx, commit: %d)\n", size, p, alignment, commit); - mi_os_mem_free(p, size, commit, stats); + mi_os_prim_free(p, size, commit, stats); if (size >= (SIZE_MAX - alignment)) return NULL; // overflow const size_t over_size = size + alignment; if (mi_os_mem_config.must_free_whole) { // win32 virtualAlloc cannot free parts of an allocate block // over-allocate uncommitted (virtual) memory - p = mi_os_mem_alloc(over_size, 0 /*alignment*/, false /* commit? */, false /* allow_large */, is_large, is_zero, stats); + p = mi_os_prim_alloc(over_size, 0 /*alignment*/, false /* commit? */, false /* allow_large */, is_large, is_zero, stats); if (p == NULL) return NULL; // set p to the aligned part in the full region @@ -239,7 +257,7 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, } else { // mmap can free inside an allocation // overallocate... - p = mi_os_mem_alloc(over_size, 1, commit, false, is_large, is_zero, stats); + p = mi_os_prim_alloc(over_size, 1, commit, false, is_large, is_zero, stats); if (p == NULL) return NULL; // and selectively unmap parts around the over-allocated area. (noop on sbrk) void* aligned_p = mi_align_up_ptr(p, alignment); @@ -247,8 +265,8 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, size_t mid_size = _mi_align_up(size, _mi_os_page_size()); size_t post_size = over_size - pre_size - mid_size; mi_assert_internal(pre_size < over_size&& post_size < over_size&& mid_size >= size); - if (pre_size > 0) mi_os_mem_free(p, pre_size, commit, stats); - if (post_size > 0) mi_os_mem_free((uint8_t*)aligned_p + mid_size, post_size, commit, stats); + if (pre_size > 0) mi_os_prim_free(p, pre_size, commit, stats); + if (post_size > 0) mi_os_prim_free((uint8_t*)aligned_p + mid_size, post_size, commit, stats); // we can return the aligned pointer on `mmap` (and sbrk) systems p = aligned_p; } @@ -263,31 +281,38 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, OS API: alloc and alloc_aligned ----------------------------------------------------------- */ -void* _mi_os_alloc(size_t size, bool* is_zero, mi_stats_t* tld_stats) { +void* _mi_os_alloc(size_t size, mi_memid_t* memid, mi_stats_t* tld_stats) { MI_UNUSED(tld_stats); + *memid = _mi_memid_none(); mi_stats_t* stats = &_mi_stats_main; if (size == 0) return NULL; size = _mi_os_good_alloc_size(size); - bool is_large = false; - bool is_zerox = false; - void* p = mi_os_mem_alloc(size, 0, true, false, &is_large, &is_zerox, stats); - if (is_zero != NULL) { *is_zero = is_zerox; } + bool os_is_large = false; + bool os_is_zero = false; + void* p = mi_os_prim_alloc(size, 0, true, false, &os_is_large, &os_is_zero, stats); + if (p != NULL) { + *memid = _mi_memid_create_os(true, os_is_zero); + memid->is_pinned = os_is_large; + } return p; } -void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* tld_stats) +void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* tld_stats) { MI_UNUSED(&_mi_os_get_aligned_hint); // suppress unused warnings MI_UNUSED(tld_stats); + *memid = _mi_memid_none(); if (size == 0) return NULL; size = _mi_os_good_alloc_size(size); alignment = _mi_align_up(alignment, _mi_os_page_size()); bool os_is_large = false; bool os_is_zero = false; - void* p = mi_os_mem_alloc_aligned(size, alignment, commit, allow_large, &os_is_large, &os_is_zero, &_mi_stats_main /*tld->stats*/ ); - if (is_large != NULL) { *is_large = os_is_large; } - if (is_zero != NULL) { *is_zero = os_is_zero; } + void* p = mi_os_prim_alloc_aligned(size, alignment, commit, allow_large, &os_is_large, &os_is_zero, &_mi_stats_main /*tld->stats*/ ); + if (p != NULL) { + *memid = _mi_memid_create_os(commit, os_is_zero); + memid->is_pinned = os_is_large; + } return p; } @@ -299,22 +324,24 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allo to use the actual start of the memory region. ----------------------------------------------------------- */ -void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t offset, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* tld_stats) { +void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t offset, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* tld_stats) { mi_assert(offset <= MI_SEGMENT_SIZE); mi_assert(offset <= size); mi_assert((alignment % _mi_os_page_size()) == 0); + *memid = _mi_memid_none(); if (offset > MI_SEGMENT_SIZE) return NULL; if (offset == 0) { // regular aligned allocation - return _mi_os_alloc_aligned(size, alignment, commit, allow_large, is_large, is_zero, tld_stats); + return _mi_os_alloc_aligned(size, alignment, commit, allow_large, memid, tld_stats); } else { // overallocate to align at an offset const size_t extra = _mi_align_up(offset, alignment) - offset; const size_t oversize = size + extra; - void* start = _mi_os_alloc_aligned(oversize, alignment, commit, allow_large, is_large, is_zero, tld_stats); + void* const start = _mi_os_alloc_aligned(oversize, alignment, commit, allow_large, memid, tld_stats); if (start == NULL) return NULL; - void* p = (uint8_t*)start + extra; + memid->mem.os.base = start; + void* const p = (uint8_t*)start + extra; mi_assert(_mi_is_aligned((uint8_t*)p + offset, alignment)); // decommit the overallocation at the start if (commit && extra > _mi_os_page_size()) { @@ -324,14 +351,6 @@ void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t offse } } -void _mi_os_free_aligned_at_offset(void* p, size_t size, size_t alignment, size_t align_offset, bool was_committed, mi_stats_t* tld_stats) { - mi_assert(align_offset <= MI_SEGMENT_SIZE); - const size_t extra = _mi_align_up(align_offset, alignment) - align_offset; - void* start = (uint8_t*)p - extra; - _mi_os_free_ex(start, size + extra, was_committed, tld_stats); -} - - /* ----------------------------------------------------------- OS memory API: reset, commit, decommit, protect, unprotect. ----------------------------------------------------------- */ @@ -535,7 +554,8 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) { #endif // Allocate MI_SEGMENT_SIZE aligned huge pages -void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_msecs, size_t* pages_reserved, size_t* psize, bool* is_zero) { +void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_msecs, size_t* pages_reserved, size_t* psize, mi_memid_t* memid) { + *memid = _mi_memid_none(); if (psize != NULL) *psize = 0; if (pages_reserved != NULL) *pages_reserved = 0; size_t size = 0; @@ -550,11 +570,11 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse bool all_zero = true; while (page < pages) { // allocate a page - bool is_zerox = false; + bool is_zero = false; void* addr = start + (page * MI_HUGE_OS_PAGE_SIZE); void* p = NULL; - int err = _mi_prim_alloc_huge_os_pages(addr, MI_HUGE_OS_PAGE_SIZE, numa_node, &is_zerox, &p); - if (!is_zerox) { all_zero = false; } + int err = _mi_prim_alloc_huge_os_pages(addr, MI_HUGE_OS_PAGE_SIZE, numa_node, &is_zero, &p); + if (!is_zero) { all_zero = false; } if (err != 0) { _mi_warning_message("unable to allocate huge OS page (error: %d (0x%x), address: %p, size: %zx bytes)\n", err, err, addr, MI_HUGE_OS_PAGE_SIZE); break; @@ -565,7 +585,7 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse // no success, issue a warning and break if (p != NULL) { _mi_warning_message("could not allocate contiguous huge OS page %zu at %p\n", page, addr); - _mi_os_free(p, MI_HUGE_OS_PAGE_SIZE, &_mi_stats_main); + mi_os_prim_free(p, MI_HUGE_OS_PAGE_SIZE, true, &_mi_stats_main); } break; } @@ -593,17 +613,22 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse mi_assert_internal(page*MI_HUGE_OS_PAGE_SIZE <= size); if (pages_reserved != NULL) { *pages_reserved = page; } if (psize != NULL) { *psize = page * MI_HUGE_OS_PAGE_SIZE; } - if (is_zero != NULL) { *is_zero = all_zero; } + if (page != 0) { + mi_assert(start != NULL); + *memid = _mi_memid_create_os(true, all_zero); + memid->memkind = MI_MEM_OS_HUGE; + memid->is_pinned = true; + } return (page == 0 ? NULL : start); } // free every huge page in a range individually (as we allocated per page) // note: needed with VirtualAlloc but could potentially be done in one go on mmap'd systems. -void _mi_os_free_huge_os_pages(void* p, size_t size, mi_stats_t* stats) { +static void mi_os_free_huge_os_pages(void* p, size_t size, mi_stats_t* stats) { if (p==NULL || size==0) return; uint8_t* base = (uint8_t*)p; while (size >= MI_HUGE_OS_PAGE_SIZE) { - _mi_os_free(base, MI_HUGE_OS_PAGE_SIZE, stats); + mi_os_prim_free(base, MI_HUGE_OS_PAGE_SIZE, true, stats); size -= MI_HUGE_OS_PAGE_SIZE; base += MI_HUGE_OS_PAGE_SIZE; } From 4d976270ebc18a7968659dfe31f0435652bde4cd Mon Sep 17 00:00:00 2001 From: daanx Date: Tue, 18 Apr 2023 17:18:39 -0700 Subject: [PATCH 4/4] review is_zero flag --- include/mimalloc/internal.h | 2 +- include/mimalloc/types.h | 2 +- src/alloc.c | 11 ++++++++--- src/page.c | 13 ++++--------- src/prim/unix/prim.c | 2 +- test/test-stress.c | 2 ++ 6 files changed, 17 insertions(+), 15 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index b320f690..2140cadd 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -300,7 +300,7 @@ static inline uintptr_t _mi_divide_up(uintptr_t size, size_t divider) { } // Is memory zero initialized? -static inline bool mi_mem_is_zero(void* p, size_t size) { +static inline bool mi_mem_is_zero(const void* p, size_t size) { for (size_t i = 0; i < size; i++) { if (((uint8_t*)p)[i] != 0) return false; } diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index 60408ed3..9851ef5e 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -285,7 +285,7 @@ typedef struct mi_page_s { uint8_t segment_idx; // index in the segment `pages` array, `page == &segment->pages[page->segment_idx]` uint8_t segment_in_use:1; // `true` if the segment allocated this page uint8_t is_committed:1; // `true` if the page virtual memory is committed - uint8_t is_zero_init:1; // `true` if the page was zero initialized + uint8_t is_zero_init:1; // `true` if the page was initially zero initialized // layout like this to optimize access in `mi_malloc` and `mi_free` uint16_t capacity; // number of blocks committed, must be the first field, see `segment.c:page_clear` diff --git a/src/alloc.c b/src/alloc.c index b17adf45..3edc0b51 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -46,12 +46,17 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz // zero the block? note: we need to zero the full block size (issue #63) if mi_unlikely(zero) { mi_assert_internal(page->xblock_size != 0); // do not call with zero'ing for huge blocks (see _mi_malloc_generic) - const size_t zsize = (page->is_zero ? sizeof(block->next) + MI_PADDING_SIZE : page->xblock_size); - _mi_memzero_aligned(block, zsize - MI_PADDING_SIZE); + if (page->is_zero) { + block->next = 0; + } + else { + mi_assert_internal(page->xblock_size >= MI_PADDING_SIZE); + _mi_memzero_aligned(block, page->xblock_size - MI_PADDING_SIZE); + } } #if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN - if (!page->is_zero && !zero && !mi_page_is_huge(page)) { + if (!zero && !mi_page_is_huge(page)) { memset(block, MI_DEBUG_UNINIT, mi_page_usable_block_size(page)); } #elif (MI_SECURE!=0) diff --git a/src/page.c b/src/page.c index 3445e504..27c6fd09 100644 --- a/src/page.c +++ b/src/page.c @@ -639,11 +639,6 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld) // enable the new free list page->capacity += (uint16_t)extend; mi_stat_increase(tld->stats.page_committed, extend * bsize); - - // extension into zero initialized memory preserves the zero'd free list - if (!page->is_zero_init) { - page->is_zero = false; - } mi_assert_expensive(mi_page_is_valid_init(page)); } @@ -667,12 +662,12 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi page->keys[0] = _mi_heap_random_next(heap); page->keys[1] = _mi_heap_random_next(heap); #endif - #if MI_DEBUG > 0 - page->is_zero = false; // ensure in debug mode we initialize with MI_DEBUG_UNINIT, see issue #501 - #else page->is_zero = page->is_zero_init; + #if MI_DEBUG>1 + if (page->is_zero_init) { + mi_mem_is_zero(page_start, page_size); + } #endif - mi_assert_internal(page->capacity == 0); mi_assert_internal(page->free == NULL); mi_assert_internal(page->used == 0); diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c index de50f594..4349f578 100644 --- a/src/prim/unix/prim.c +++ b/src/prim/unix/prim.c @@ -441,7 +441,7 @@ int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bo int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) { MI_UNUSED(hint_addr); MI_UNUSED(size); MI_UNUSED(numa_node); - *is_zero = true; + *is_zero = false; *addr = NULL; return ENOMEM; } diff --git a/test/test-stress.c b/test/test-stress.c index 0d6d0bc9..0dccec0a 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -20,6 +20,7 @@ terms of the MIT license. #include #include #include +#include // > mimalloc-test-stress [THREADS] [SCALE] [ITER] // @@ -106,6 +107,7 @@ static void* alloc_items(size_t items, random_t r) { uintptr_t* p = (uintptr_t*)custom_calloc(items,sizeof(uintptr_t)); if (p != NULL) { for (uintptr_t i = 0; i < items; i++) { + assert(p[i] == 0); p[i] = (items - i) ^ cookie; } }