From c7272afa9a477576369a89ad74cbba4375dafd05 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 7 Sep 2020 21:34:34 -0700 Subject: [PATCH] add mi_reserve_os_memory/mi_manage_os_memory; allow arena allocations to cross multiple bitmap fields --- ide/vs2019/mimalloc-test-stress.vcxproj | 4 +- include/mimalloc.h | 4 + src/arena.c | 109 +++++----- src/bitmap.inc.c | 252 +++++++++++++++++++++++- src/os.c | 4 +- test/main-override-static.c | 22 ++- test/test-stress.c | 3 +- 7 files changed, 340 insertions(+), 58 deletions(-) diff --git a/ide/vs2019/mimalloc-test-stress.vcxproj b/ide/vs2019/mimalloc-test-stress.vcxproj index afbb6666..ef7ab357 100644 --- a/ide/vs2019/mimalloc-test-stress.vcxproj +++ b/ide/vs2019/mimalloc-test-stress.vcxproj @@ -149,8 +149,8 @@ - - {abb5eae7-b3e6-432e-b636-333449892ea7} + + {abb5eae7-b3e6-432e-b636-333449892ea6} diff --git a/include/mimalloc.h b/include/mimalloc.h index 5b3d13ed..2bcbf10e 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -262,6 +262,10 @@ mi_decl_nodiscard mi_decl_export bool mi_is_redirected(void) mi_attr_noexcept; mi_decl_export int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs) mi_attr_noexcept; mi_decl_export int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept; +mi_decl_export int mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noexcept; +mi_decl_export bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept; + + // deprecated mi_decl_export int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept; diff --git a/src/arena.c b/src/arena.c index 73a7e704..20a89445 100644 --- a/src/arena.c +++ b/src/arena.c @@ -30,12 +30,13 @@ of 256MiB in practice. #include "mimalloc-atomic.h" #include // memset +#include // ENOMEM #include "bitmap.inc.c" // atomic bitmap // os.c -void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld); +void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_stats_t* stats); void _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* stats); void _mi_os_free(void* p, size_t size, mi_stats_t* stats); @@ -49,8 +50,7 @@ bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); ----------------------------------------------------------- */ #define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE -#define MI_ARENA_BLOCK_SIZE (8*MI_SEGMENT_ALIGN) // 32MiB -#define MI_ARENA_MAX_OBJ_SIZE (MI_BITMAP_FIELD_BITS * MI_ARENA_BLOCK_SIZE) // 2GiB +#define MI_ARENA_BLOCK_SIZE (4*MI_SEGMENT_ALIGN) // 32MiB #define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2) // 16MiB #define MI_MAX_ARENAS (64) // not more than 256 (since we use 8 bits in the memid) @@ -104,16 +104,11 @@ static size_t mi_block_count_of_size(size_t size) { ----------------------------------------------------------- */ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx) { - const size_t fcount = arena->field_count; size_t idx = mi_atomic_load_acquire(&arena->search_idx); // start from last search - for (size_t visited = 0; visited < fcount; visited++, idx++) { - if (idx >= fcount) idx = 0; // wrap around - // try to atomically claim a range of bits - if (mi_bitmap_try_find_claim_field(arena->blocks_inuse, idx, blocks, bitmap_idx)) { - mi_atomic_store_release(&arena->search_idx, idx); // start search from here next time - return true; - } - } + if (mi_bitmap_try_find_from_claim_across(arena->blocks_inuse, arena->field_count, idx, blocks, bitmap_idx)) { + mi_atomic_store_release(&arena->search_idx, idx); // start search from here next time + return true; + }; return false; } @@ -131,7 +126,7 @@ static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t n // claimed it! set the dirty bits (todo: no need for an atomic op here?) void* p = arena->start + (mi_bitmap_index_bit(bitmap_index)*MI_ARENA_BLOCK_SIZE); *memid = mi_arena_id_create(arena_index, bitmap_index); - *is_zero = mi_bitmap_claim(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL); + *is_zero = mi_bitmap_claim_across(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL); *large = arena->is_large; if (arena->is_committed) { // always committed @@ -140,7 +135,7 @@ static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t n else if (*commit) { // arena not committed as a whole, but commit requested: ensure commit now bool any_uncommitted; - mi_bitmap_claim(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted); + mi_bitmap_claim_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted); if (any_uncommitted) { bool commit_zero; _mi_os_commit(p, needed_bcount * MI_ARENA_BLOCK_SIZE, &commit_zero, tld->stats); @@ -149,7 +144,7 @@ static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t n } else { // no need to commit, but check if already fully committed - *commit = mi_bitmap_is_claimed(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index); + *commit = mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index); } return p; } @@ -166,7 +161,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, // try to allocate in an arena if the alignment is small enough // and the object is not too large or too small. if (alignment <= MI_SEGMENT_ALIGN && - size <= MI_ARENA_MAX_OBJ_SIZE && + // size <= MI_ARENA_MAX_OBJ_SIZE && size >= MI_ARENA_MIN_OBJ_SIZE) { const size_t bcount = mi_block_count_of_size(size); @@ -202,7 +197,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, // finally, fall back to the OS *is_zero = true; *memid = MI_MEMID_OS; - return _mi_os_alloc_aligned(size, alignment, *commit, large, tld); + return _mi_os_alloc_aligned(size, alignment, *commit, large, tld->stats); } void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) @@ -240,7 +235,7 @@ void _mi_arena_free(void* p, size_t size, size_t memid, bool all_committed, mi_s return; } const size_t blocks = mi_block_count_of_size(size); - bool ones = mi_bitmap_unclaim(arena->blocks_inuse, arena->field_count, blocks, bitmap_idx); + bool ones = mi_bitmap_unclaim_across(arena->blocks_inuse, arena->field_count, blocks, bitmap_idx); if (!ones) { _mi_error_message(EAGAIN, "trying to free an already freed block: %p, size %zu\n", p, size); return; @@ -266,12 +261,59 @@ static bool mi_arena_add(mi_arena_t* arena) { return true; } +bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept +{ + const size_t bcount = mi_block_count_of_size(size); + const size_t fields = _mi_divide_up(bcount, MI_BITMAP_FIELD_BITS); + const size_t bitmaps = (is_committed ? 3 : 2); + const size_t asize = sizeof(mi_arena_t) + (bitmaps*fields*sizeof(mi_bitmap_field_t)); + mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main); // TODO: can we avoid allocating from the OS? + if (arena == NULL) return false; + + arena->block_count = bcount; + arena->field_count = fields; + arena->start = (uint8_t*)start; + arena->numa_node = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1) + arena->is_large = is_large; + arena->is_zero_init = is_zero; + arena->is_committed = is_committed; + arena->search_idx = 0; + arena->blocks_dirty = &arena->blocks_inuse[fields]; // just after inuse bitmap + arena->blocks_committed = (is_committed ? NULL : &arena->blocks_inuse[2*fields]); // just after dirty bitmap + // the bitmaps are already zero initialized due to os_alloc + // just claim leftover blocks if needed + ptrdiff_t post = (fields * MI_BITMAP_FIELD_BITS) - bcount; + mi_assert_internal(post >= 0); + if (post > 0) { + // don't use leftover bits at the end + mi_bitmap_index_t postidx = mi_bitmap_index_create(fields - 1, MI_BITMAP_FIELD_BITS - post); + mi_bitmap_claim(arena->blocks_inuse, fields, post, postidx, NULL); + } + + mi_arena_add(arena); + return true; +} + +// Reserve a range of regular OS memory +int mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noexcept +{ + size = _mi_os_good_alloc_size(size); + bool large = allow_large; + void* start = _mi_os_alloc_aligned(size, MI_SEGMENT_ALIGN, commit, &large, &_mi_stats_main); + if (start==NULL) return ENOMEM; + if (!mi_manage_os_memory(start, size, commit, large, true, -1)) { + _mi_os_free_ex(start, size, commit, &_mi_stats_main); + _mi_verbose_message("failed to reserve %zu k memory\n", _mi_divide_up(size,1024)); + return ENOMEM; + } + _mi_verbose_message("reserved %zu kb memory\n", _mi_divide_up(size,1024)); + return 0; +} + /* ----------------------------------------------------------- Reserve a huge page arena. ----------------------------------------------------------- */ -#include // ENOMEM - // reserve at a specific numa node int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept { if (pages==0) return 0; @@ -286,35 +328,10 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec } _mi_verbose_message("numa node %i: reserved %zu gb huge pages (of the %zu gb requested)\n", numa_node, pages_reserved, pages); - size_t bcount = mi_block_count_of_size(hsize); - size_t fields = _mi_divide_up(bcount, MI_BITMAP_FIELD_BITS); - size_t asize = sizeof(mi_arena_t) + (2*fields*sizeof(mi_bitmap_field_t)); - mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main); // TODO: can we avoid allocating from the OS? - if (arena == NULL) { + if (!mi_manage_os_memory(p, hsize, true, true, true, numa_node)) { _mi_os_free_huge_pages(p, hsize, &_mi_stats_main); return ENOMEM; } - arena->block_count = bcount; - arena->field_count = fields; - arena->start = (uint8_t*)p; - arena->numa_node = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1) - arena->is_large = true; - arena->is_zero_init = true; - arena->is_committed = true; - arena->search_idx = 0; - arena->blocks_dirty = &arena->blocks_inuse[fields]; // just after inuse bitmap - arena->blocks_committed = NULL; - // the bitmaps are already zero initialized due to os_alloc - // just claim leftover blocks if needed - ptrdiff_t post = (fields * MI_BITMAP_FIELD_BITS) - bcount; - mi_assert_internal(post >= 0); - if (post > 0) { - // don't use leftover bits at the end - mi_bitmap_index_t postidx = mi_bitmap_index_create(fields - 1, MI_BITMAP_FIELD_BITS - post); - mi_bitmap_claim(arena->blocks_inuse, fields, post, postidx, NULL); - } - - mi_arena_add(arena); return 0; } diff --git a/src/bitmap.inc.c b/src/bitmap.inc.c index 2d6df46e..739ab2b6 100644 --- a/src/bitmap.inc.c +++ b/src/bitmap.inc.c @@ -61,7 +61,9 @@ static inline size_t mi_bitmap_index_bit(mi_bitmap_index_t bitmap_idx) { // The bit mask for a given number of blocks at a specified bit index. static inline uintptr_t mi_bitmap_mask_(size_t count, size_t bitidx) { mi_assert_internal(count + bitidx <= MI_BITMAP_FIELD_BITS); - if (count == MI_BITMAP_FIELD_BITS) return MI_BITMAP_FIELD_FULL; + mi_assert_internal(count > 0); + if (count >= MI_BITMAP_FIELD_BITS) return MI_BITMAP_FIELD_FULL; + if (count == 0) return 0; return ((((uintptr_t)1 << count) - 1) << bitidx); } @@ -137,6 +139,7 @@ static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t bitmap_f static inline bool mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx) { mi_assert_internal(bitmap_idx != NULL); + mi_assert_internal(count <= MI_BITMAP_FIELD_BITS); _Atomic(uintptr_t)* field = &bitmap[idx]; uintptr_t map = mi_atomic_load_relaxed(field); if (map==MI_BITMAP_FIELD_FULL) return false; // short cut @@ -185,10 +188,12 @@ static inline bool mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx } -// Find `count` bits of 0 and set them to 1 atomically; returns `true` on success. -// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never span fields. -static inline bool mi_bitmap_try_find_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t* bitmap_idx) { - for (size_t idx = 0; idx < bitmap_fields; idx++) { +// Starts at idx, and wraps around to search in all `bitmap_fields` fields. +// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never cross fields. +static inline bool mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx) { + size_t idx = start_field_idx; + for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) { + if (idx >= bitmap_fields) idx = 0; // wrap if (mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) { return true; } @@ -196,6 +201,13 @@ static inline bool mi_bitmap_try_find_claim(mi_bitmap_t bitmap, size_t bitmap_fi return false; } +// Find `count` bits of 0 and set them to 1 atomically; returns `true` on success. +// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never span fields. +static inline bool mi_bitmap_try_find_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t count, mi_bitmap_index_t* bitmap_idx) { + return mi_bitmap_try_find_from_claim(bitmap, bitmap_fields, 0, count, bitmap_idx); +} + + // Set `count` bits at `bitmap_idx` to 0 atomically // Returns `true` if all `count` bits were 1 previously. static inline bool mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { @@ -244,4 +256,234 @@ static inline bool mi_bitmap_is_any_claimed(mi_bitmap_t bitmap, size_t bitmap_fi } +//-------------------------------------------------------------------------- +// the `_across` functions work on bitmaps where sequences can cross over +// between the fields. This is used in arena allocation +//-------------------------------------------------------------------------- + +// Try to atomically claim a sequence of `count` bits starting from the field +// at `idx` in `bitmap` and crossing into subsequent fields. Returns `true` on success. +static inline bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t idx, const size_t count, const size_t retries, mi_bitmap_index_t* bitmap_idx) +{ + mi_assert_internal(bitmap_idx != NULL); + + // check initial trailing zeros + _Atomic(uintptr_t)* field = &bitmap[idx]; + uintptr_t map = mi_atomic_load_relaxed(field); + const uintptr_t bitidx = (map==0 ? 0 : mi_bsr(map) + 1); + const size_t initial = MI_BITMAP_FIELD_BITS - bitidx; // count of initial zeros starting at idx + if (initial == 0) return false; + if (initial >= count) return mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx); // no need to cross fields + if (_mi_divide_up(count - initial, MI_BITMAP_FIELD_BITS) >= (bitmap_fields - idx)) return false; // not enough entries + + // scan ahead + size_t found = initial; + uintptr_t mask = 0; // mask bits for the final field + while(found < count) { + field++; + map = mi_atomic_load_relaxed(field); + const uintptr_t mask_bits = (found + MI_BITMAP_FIELD_BITS <= count ? MI_BITMAP_FIELD_BITS : (count - found)); + mask = mi_bitmap_mask_(mask_bits, 0); + if ((map & mask) != 0) return false; + found += mask_bits; + } + mi_assert_internal(field < &bitmap[bitmap_fields]); + + // found range of zeros up to the final field; mask contains mask in the final field + // now claim it atomically + _Atomic(uintptr_t)* const final_field = field; + const uintptr_t final_mask = mask; + _Atomic(uintptr_t)* const initial_field = &bitmap[idx]; + const uintptr_t initial_mask = mi_bitmap_mask_(initial, MI_BITMAP_FIELD_BITS - initial); + + // initial field + uintptr_t newmap; + field = initial_field; + map = mi_atomic_load_relaxed(field); + do { + newmap = map | initial_mask; + if ((map & initial_mask) != 0) { goto rollback; }; + } while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)); + + // intermediate fields + while (++field < final_field) { + newmap = mi_bitmap_mask_(MI_BITMAP_FIELD_BITS, 0); + map = 0; + if (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)) { goto rollback; } + } + + // final field + mi_assert_internal(field == final_field); + map = mi_atomic_load_relaxed(field); + do { + newmap = map | final_mask; + if ((map & final_mask) != 0) { goto rollback; } + } while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)); + + // claimed! + *bitmap_idx = mi_bitmap_index_create(idx, bitidx); + return true; + +rollback: + // roll back intermediate fields + while (--field > initial_field) { + newmap = 0; + map = mi_bitmap_mask_(MI_BITMAP_FIELD_BITS, 0); + mi_assert_internal(mi_atomic_load_relaxed(field) == map); + mi_atomic_store_release(field, newmap); + } + if (field == initial_field) { + map = mi_atomic_load_relaxed(field); + do { + mi_assert_internal((map & initial_mask) == initial_mask); + newmap = map & ~initial_mask; + } while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)); + } + // retry? (we make a recursive call instead of goto to be able to use const declarations) + if (retries < 4) { + return mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, retries+1, bitmap_idx); + } + else { + return false; + } +} + + +// Find `count` bits of zeros and set them to 1 atomically; returns `true` on success. +// Starts at idx, and wraps around to search in all `bitmap_fields` fields. +static inline bool mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx) { + mi_assert_internal(count > 0); + if (count==1) return mi_bitmap_try_find_from_claim(bitmap, bitmap_fields, start_field_idx, count, bitmap_idx); + size_t idx = start_field_idx; + for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) { + if (idx >= bitmap_fields) idx = 0; // wrap + // try to claim inside the field + if (count <= MI_BITMAP_FIELD_BITS) { + if (mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) { + return true; + } + } + // try to claim across fields + if (mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, 0, bitmap_idx)) { + return true; + } + } + return false; +} + +// Helper for masks across fields; returns the mid count, post_mask may be 0 +static inline size_t mi_bitmap_mask_across(mi_bitmap_index_t bitmap_idx, size_t bitmap_fields, size_t count, uintptr_t* pre_mask, uintptr_t* mid_mask, uintptr_t* post_mask) { + UNUSED_RELEASE(bitmap_fields); + const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); + if (mi_likely(bitidx + count <= MI_BITMAP_FIELD_BITS)) { + *pre_mask = mi_bitmap_mask_(count, bitidx); + *mid_mask = 0; + *post_mask = 0; + mi_assert_internal(mi_bitmap_index_field(bitmap_idx) < bitmap_fields); + return 0; + } + else { + const size_t pre_bits = MI_BITMAP_FIELD_BITS - bitidx; + mi_assert_internal(pre_bits < count); + *pre_mask = mi_bitmap_mask_(pre_bits, bitidx); + count -= pre_bits; + const size_t mid_count = (count / MI_BITMAP_FIELD_BITS); + *mid_mask = MI_BITMAP_FIELD_FULL; + count %= MI_BITMAP_FIELD_BITS; + *post_mask = (count==0 ? 0 : mi_bitmap_mask_(count, 0)); + mi_assert_internal(mi_bitmap_index_field(bitmap_idx) + mid_count + (count==0 ? 0 : 1) < bitmap_fields); + return mid_count; + } +} + +// Set `count` bits at `bitmap_idx` to 0 atomically +// Returns `true` if all `count` bits were 1 previously. +static inline bool mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { + size_t idx = mi_bitmap_index_field(bitmap_idx); + uintptr_t pre_mask; + uintptr_t mid_mask; + uintptr_t post_mask; + size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask); + bool all_one = true; + _Atomic(uintptr_t)*field = &bitmap[idx]; + uintptr_t prev = mi_atomic_and_acq_rel(field++, ~pre_mask); + if ((prev & pre_mask) != pre_mask) all_one = false; + while(mid_count-- > 0) { + prev = mi_atomic_and_acq_rel(field++, ~mid_mask); + if ((prev & mid_mask) != mid_mask) all_one = false; + } + if (post_mask!=0) { + prev = mi_atomic_and_acq_rel(field, ~post_mask); + if ((prev & post_mask) != post_mask) all_one = false; + } + return all_one; +} + +// Set `count` bits at `bitmap_idx` to 1 atomically +// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit. +static inline bool mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero) { + size_t idx = mi_bitmap_index_field(bitmap_idx); + uintptr_t pre_mask; + uintptr_t mid_mask; + uintptr_t post_mask; + size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask); + bool all_zero = true; + bool any_zero = false; + _Atomic(uintptr_t)*field = &bitmap[idx]; + uintptr_t prev = mi_atomic_or_acq_rel(field++, pre_mask); + if ((prev & pre_mask) != 0) all_zero = false; + if ((prev & pre_mask) != pre_mask) any_zero = true; + while (mid_count-- > 0) { + prev = mi_atomic_or_acq_rel(field++, mid_mask); + if ((prev & mid_mask) != 0) all_zero = false; + if ((prev & mid_mask) != mid_mask) any_zero = true; + } + if (post_mask!=0) { + prev = mi_atomic_or_acq_rel(field, post_mask); + if ((prev & post_mask) != 0) all_zero = false; + if ((prev & post_mask) != post_mask) any_zero = true; + } + if (pany_zero != NULL) *pany_zero = any_zero; + return all_zero; +} + + +// Returns `true` if all `count` bits were 1. +// `any_ones` is `true` if there was at least one bit set to one. +static inline bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_ones) { + size_t idx = mi_bitmap_index_field(bitmap_idx); + uintptr_t pre_mask; + uintptr_t mid_mask; + uintptr_t post_mask; + size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask); + bool all_ones = true; + bool any_ones = false; + _Atomic(uintptr_t)* field = &bitmap[idx]; + uintptr_t prev = mi_atomic_load_relaxed(field++); + if ((prev & pre_mask) != pre_mask) all_ones = false; + if ((prev & pre_mask) != 0) any_ones = true; + while (mid_count-- > 0) { + prev = mi_atomic_load_relaxed(field++); + if ((prev & pre_mask) != pre_mask) all_ones = false; + if ((prev & pre_mask) != 0) any_ones = true; + } + if (post_mask!=0) { + prev = mi_atomic_load_relaxed(field); + if ((prev & pre_mask) != pre_mask) all_ones = false; + if ((prev & pre_mask) != 0) any_ones = true; + } + if (pany_ones != NULL) *pany_ones = any_ones; + return all_ones; +} + +static inline bool mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { + return mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, NULL); +} + +static inline bool mi_bitmap_is_any_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { + bool any_ones; + mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, &any_ones); + return any_ones; +} + #endif diff --git a/src/os.c b/src/os.c index 6985587c..437b8f1a 100644 --- a/src/os.c +++ b/src/os.c @@ -623,9 +623,9 @@ void _mi_os_free(void* p, size_t size, mi_stats_t* stats) { _mi_os_free_ex(p, size, true, stats); } -void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld) +void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_stats_t* tld_stats) { - UNUSED(tld); + UNUSED(tld_stats); if (size == 0) return NULL; size = _mi_os_good_alloc_size(size); alignment = _mi_align_up(alignment, _mi_os_page_size()); diff --git a/test/main-override-static.c b/test/main-override-static.c index 0b73bb61..6e4572f7 100644 --- a/test/main-override-static.c +++ b/test/main-override-static.c @@ -14,6 +14,7 @@ static void block_overflow1(); static void invalid_free(); static void test_aslr(void); static void test_process_info(void); +static void test_reserved(void); int main() { mi_version(); @@ -24,7 +25,8 @@ int main() { // corrupt_free(); // block_overflow1(); // test_aslr(); - invalid_free(); + // invalid_free(); + // test_reserved(); void* p1 = malloc(78); void* p2 = malloc(24); @@ -147,4 +149,20 @@ static void test_process_info(void) { } mi_process_info(&user_msecs, &system_msecs, ¤t_rss, &peak_rss, ¤t_commit, &peak_commit, &page_faults); printf("\n\n*** process info: user: %3zd.%03zd s, rss: %zd b, commit: %zd b\n\n", user_msecs/1000, user_msecs%1000, peak_rss, peak_commit); -} \ No newline at end of file +} + +static void test_reserved(void) { +#define KiB 1024ULL +#define MiB (KiB*KiB) +#define GiB (MiB*KiB) + mi_reserve_os_memory(4*GiB, false, true); + void* p1 = malloc(100); + void* p2 = malloc(100000); + void* p3 = malloc(2*GiB); + void* p4 = malloc(1*GiB + 100000); + free(p1); + free(p2); + free(p3); + p3 = malloc(1*GiB); + free(p4); +} diff --git a/test/test-stress.c b/test/test-stress.c index 05dbd4b4..68faef90 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -217,7 +217,7 @@ static void test_leak(void) { } #endif -int main(int argc, char** argv) { +int main(int argc, char** argv) { // > mimalloc-test-stress [THREADS] [SCALE] [ITER] if (argc >= 2) { char* end; @@ -235,6 +235,7 @@ int main(int argc, char** argv) { if (n > 0) ITER = n; } printf("Using %d threads with a %d%% load-per-thread and %d iterations\n", THREADS, SCALE, ITER); + //mi_reserve_os_memory(512*1024*1024ULL, true, true); //int res = mi_reserve_huge_os_pages(4,1); //printf("(reserve huge: %i\n)", res);