diff --git a/include/mimalloc/bits.h b/include/mimalloc/bits.h index c40b32f6..1d4063bb 100644 --- a/include/mimalloc/bits.h +++ b/include/mimalloc/bits.h @@ -13,9 +13,6 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MI_BITS_H #define MI_BITS_H -#include // size_t -#include // int64_t etc -#include // bool // ------------------------------------------------------ // Size of a pointer. @@ -93,7 +90,7 @@ typedef int32_t mi_ssize_t; #endif #endif -#if MI_ARCH_X64 && defined(__AVX2__) +#if (MI_ARCH_X86 || MI_ARCH_X64) #include #elif MI_ARCH_ARM64 && MI_OPT_SIMD #include @@ -123,20 +120,37 @@ typedef int32_t mi_ssize_t; #define MI_MAX_VABITS (32) #endif + // use a flat page-map (or a 2-level one) #ifndef MI_PAGE_MAP_FLAT -#if MI_MAX_VABITS <= 40 && !defined(__APPLE__) +#if MI_MAX_VABITS <= 40 && !MI_SECURE && !defined(__APPLE__) #define MI_PAGE_MAP_FLAT 1 #else #define MI_PAGE_MAP_FLAT 0 #endif #endif +#if MI_PAGE_MAP_FLAT && MI_SECURE +#error should not use MI_PAGE_MAP_FLAT with a secure build +#endif + /* -------------------------------------------------------------------------------- Builtin's -------------------------------------------------------------------------------- */ +#if defined(__GNUC__) || defined(__clang__) +#define mi_unlikely(x) (__builtin_expect(!!(x),false)) +#define mi_likely(x) (__builtin_expect(!!(x),true)) +#elif (defined(__cplusplus) && (__cplusplus >= 202002L)) || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L) +#define mi_unlikely(x) (x) [[unlikely]] +#define mi_likely(x) (x) [[likely]] +#else +#define mi_unlikely(x) (x) +#define mi_likely(x) (x) +#endif + + #ifndef __has_builtin #define __has_builtin(x) 0 #endif @@ -174,17 +188,28 @@ typedef int32_t mi_ssize_t; -------------------------------------------------------------------------------- */ size_t _mi_popcount_generic(size_t x); +extern bool _mi_cpu_has_popcnt; static inline size_t mi_popcount(size_t x) { - #if mi_has_builtinz(popcount) + #if defined(__GNUC__) && (MI_ARCH_X64 || MI_ARCH_X86) + #if !defined(__BMI1__) + if mi_unlikely(!_mi_cpu_has_popcnt) { return _mi_popcount_generic(x); } + #endif + size_t r; + __asm ("popcnt\t%1,%0" : "=r"(r) : "r"(x) : "cc"); + return r; + #elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86) + #if !defined(__BMI1__) + if mi_unlikely(!_mi_cpu_has_popcnt) { return _mi_popcount_generic(x); } + #endif + return (size_t)mi_msc_builtinz(__popcnt)(x); + #elif defined(_MSC_VER) && MI_ARCH_ARM64 + return (size_t)mi_msc_builtinz(__popcnt)(x); + #elif mi_has_builtinz(popcount) return mi_builtinz(popcount)(x); - #elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32) - return mi_msc_builtinz(__popcnt)(x); - #elif MI_ARCH_X64 && defined(__BMI1__) - return (size_t)_mm_popcnt_u64(x); #else #define MI_HAS_FAST_POPCOUNT 0 - return (x<=1 ? x : _mi_popcount_generic(x)); + return _mi_popcount_generic(x); #endif } @@ -198,19 +223,29 @@ size_t _mi_clz_generic(size_t x); size_t _mi_ctz_generic(size_t x); static inline size_t mi_ctz(size_t x) { - #if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__) // on x64 tzcnt is defined for 0 + #if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__) size_t r; __asm ("tzcnt\t%1, %0" : "=r"(r) : "r"(x) : "cc"); return r; - #elif defined(_MSC_VER) && MI_ARCH_X64 && defined(__BMI1__) - return _tzcnt_u64(x); - #elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32) - unsigned long idx; - return (mi_msc_builtinz(_BitScanForward)(&idx, x) ? (size_t)idx : MI_SIZE_BITS); + #elif defined(__GNUC__) && MI_ARCH_X64 + // tzcnt is interpreted as bsf if BMI1 is not supported (pre-haswell) + // if the argument is zero: + // - tzcnt: sets carry-flag, and returns MI_SIZE_BITS + // - bsf : sets zero-flag, and leaves the destination _unmodified_ (on both AMD and Intel now, see ) + // so we always initialize r to MI_SIZE_BITS to work correctly on all cpu's without branching + size_t r = MI_SIZE_BITS; + __asm ("tzcnt\t%1, %0" : "+r"(r) : "r"(x) : "cc"); // use '+r' to keep the assignment to r in case this becomes bsf on older cpu's + return r; #elif mi_has_builtinz(ctz) return (x!=0 ? (size_t)mi_builtinz(ctz)(x) : MI_SIZE_BITS); - #elif defined(__GNUC__) && (MI_ARCH_X64 || MI_ARCH_X86) - size_t r = MI_SIZE_BITS; // bsf leaves destination unmodified if the argument is 0 (see ) + #elif defined(_MSC_VER) && MI_ARCH_X64 && defined(__BMI1__) + return (x!=0 ? _tzcnt_u64(x) : MI_SIZE_BITS); // ensure it still works on non-BMI1 cpu's as well + #elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32) + if (x==0) return MI_SIZE_BITS; // test explicitly for `x==0` to avoid codegen bug (issue #1071) + unsigned long idx; mi_msc_builtinz(_BitScanForward)(&idx, x); + return (size_t)idx; + #elif defined(__GNUC__) && MI_ARCH_X86 + size_t r = MI_SIZE_BITS; __asm ("bsf\t%1, %0" : "+r"(r) : "r"(x) : "cc"); return r; #elif MI_HAS_FAST_POPCOUNT @@ -222,17 +257,18 @@ static inline size_t mi_ctz(size_t x) { } static inline size_t mi_clz(size_t x) { + // we don't optimize anymore to lzcnt as there are still non BMI1 cpu's around (like Intel Celeron, see issue #1016) + // on pre-haswell cpu's lzcnt gets executed as bsr which is not equivalent (at it returns the bit position) #if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__) // on x64 lzcnt is defined for 0 size_t r; __asm ("lzcnt\t%1, %0" : "=r"(r) : "r"(x) : "cc"); return r; - #elif defined(_MSC_VER) && MI_ARCH_X64 && defined(__BMI1__) - return _lzcnt_u64(x); - #elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32) - unsigned long idx; - return (mi_msc_builtinz(_BitScanReverse)(&idx, x) ? MI_SIZE_BITS - 1 - (size_t)idx : MI_SIZE_BITS); #elif mi_has_builtinz(clz) return (x!=0 ? (size_t)mi_builtinz(clz)(x) : MI_SIZE_BITS); + #elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32) + if (x==0) return MI_SIZE_BITS; // test explicitly for `x==0` to avoid codegen bug (issue #1071) + unsigned long idx; mi_msc_builtinz(_BitScanReverse)(&idx, x); + return (MI_SIZE_BITS - 1 - (size_t)idx); #elif defined(__GNUC__) && (MI_ARCH_X64 || MI_ARCH_X86) if (x==0) return MI_SIZE_BITS; size_t r; @@ -261,9 +297,11 @@ static inline bool mi_bsf(size_t x, size_t* idx) { bool is_zero; __asm ( "tzcnt\t%2, %1" : "=@ccc"(is_zero), "=r"(*idx) : "r"(x) : "cc" ); return !is_zero; - #elif 0 && defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32) - unsigned long i; - return (mi_msc_builtinz(_BitScanForward)(&i, x) ? (*idx = (size_t)i, true) : false); + #elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32) + if (x==0) return false; // test explicitly for `x==0` to avoid codegen bug (issue #1071) + unsigned long i; mi_msc_builtinz(_BitScanForward)(&i, x); + *idx = (size_t)i; + return true; #else return (x!=0 ? (*idx = mi_ctz(x), true) : false); #endif @@ -273,14 +311,11 @@ static inline bool mi_bsf(size_t x, size_t* idx) { // return false if `x==0` (with `*idx` undefined) and true otherwise, // with the `idx` is set to the bit index (`0 <= *idx < MI_BFIELD_BITS`). static inline bool mi_bsr(size_t x, size_t* idx) { - #if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__) && (!defined(__clang_major__) || __clang_major__ >= 9) - // on x64 the carry flag is set on zero which gives better codegen - bool is_zero; - __asm ("lzcnt\t%2, %1" : "=@ccc"(is_zero), "=r"(*idx) : "r"(x) : "cc"); - return !is_zero; - #elif 0 && defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32) - unsigned long i; - return (mi_msc_builtinz(_BitScanReverse)(&i, x) ? (*idx = (size_t)i, true) : false); + #if defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32) + if (x==0) return false; // test explicitly for `x==0` to avoid codegen bug (issue #1071) + unsigned long i; mi_msc_builtinz(_BitScanReverse)(&i, x); + *idx = (size_t)i; + return true; #else return (x!=0 ? (*idx = MI_SIZE_BITS - 1 - mi_clz(x), true) : false); #endif diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 46274567..62387dc3 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -223,7 +223,7 @@ void* _mi_arenas_alloc_aligned(mi_subproc_t* subproc, size_t size, size_ void _mi_arenas_free(void* p, size_t size, mi_memid_t memid); bool _mi_arenas_contain(const void* p); void _mi_arenas_collect(bool force_purge, bool visit_all, mi_tld_t* tld); -void _mi_arenas_unsafe_destroy_all(mi_subproc_t* subproc); +void _mi_arenas_unsafe_destroy_all(mi_tld_t* tld); mi_page_t* _mi_arenas_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment); void _mi_arenas_page_free(mi_page_t* page, mi_tld_t* tld); @@ -238,7 +238,7 @@ bool _mi_meta_is_meta_page(void* p); // "page-map.c" bool _mi_page_map_init(void); -mi_decl_nodiscard bool _mi_page_map_register(mi_page_t* page); +void _mi_page_map_register(mi_page_t* page); void _mi_page_map_unregister(mi_page_t* page); void _mi_page_map_unregister_range(void* start, size_t size); mi_page_t* _mi_safe_ptr_page(const void* p); @@ -604,8 +604,7 @@ static inline mi_page_t* _mi_unchecked_ptr_page(const void* p) { #define MI_PAGE_MAP_SHIFT (MI_MAX_VABITS - MI_PAGE_MAP_SUB_SHIFT - MI_ARENA_SLICE_SHIFT) #define MI_PAGE_MAP_COUNT (MI_ZU(1) << MI_PAGE_MAP_SHIFT) -typedef mi_page_t** mi_submap_t; -extern mi_decl_hidden _Atomic(mi_submap_t)* _mi_page_map; +extern mi_decl_hidden _Atomic(mi_page_t**)* _mi_page_map; static inline size_t _mi_page_map_index(const void* p, size_t* sub_idx) { const size_t u = (size_t)((uintptr_t)p / MI_ARENA_SLICE_SIZE); @@ -613,7 +612,7 @@ static inline size_t _mi_page_map_index(const void* p, size_t* sub_idx) { return (u / MI_PAGE_MAP_SUB_COUNT); } -static inline mi_submap_t _mi_page_map_at(size_t idx) { +static inline mi_page_t** _mi_page_map_at(size_t idx) { return mi_atomic_load_ptr_relaxed(mi_page_t*, &_mi_page_map[idx]); } @@ -626,7 +625,7 @@ static inline mi_page_t* _mi_unchecked_ptr_page(const void* p) { static inline mi_page_t* _mi_checked_ptr_page(const void* p) { size_t sub_idx; const size_t idx = _mi_page_map_index(p, &sub_idx); - mi_submap_t const sub = _mi_page_map_at(idx); + mi_page_t** const sub = _mi_page_map_at(idx); if mi_unlikely(sub == NULL) return NULL; return sub[sub_idx]; } diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index b4e0cae6..71b2c93b 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -22,7 +22,6 @@ terms of the MIT license. A copy of the license can be found in the file #include #include // ptrdiff_t #include // uintptr_t, uint16_t, etc -#include // bool #include // SIZE_MAX etc. #include // error codes #include "bits.h" // size defines (MI_INTPTR_SIZE etc), bit operations diff --git a/src/arena.c b/src/arena.c index a95248a7..b26f4442 100644 --- a/src/arena.c +++ b/src/arena.c @@ -684,7 +684,7 @@ static mi_page_t* mi_arenas_page_alloc_fresh(size_t slice_count, size_t block_si commit_size = _mi_align_up(block_start + block_size, MI_PAGE_MIN_COMMIT_SIZE); if (commit_size > page_noguard_size) { commit_size = page_noguard_size; } bool is_zero; - if mi_unlikely(!mi_arena_commit( mi_memid_arena(memid), page, commit_size, &is_zero, 0)) { + if (!mi_arena_commit( mi_memid_arena(memid), page, commit_size, &is_zero, 0)) { _mi_arenas_free(page, alloc_size, memid); return NULL; } @@ -710,10 +710,7 @@ static mi_page_t* mi_arenas_page_alloc_fresh(size_t slice_count, size_t block_si mi_page_try_claim_ownership(page); // register in the page map - if mi_unlikely(!_mi_page_map_register(page)) { - _mi_arenas_free( page, alloc_size, memid ); - return NULL; - } + _mi_page_map_register(page); // stats mi_tld_stat_increase(tld, pages, 1); @@ -993,7 +990,7 @@ void _mi_arenas_page_unabandon(mi_page_t* page) { Arena free ----------------------------------------------------------- */ static void mi_arena_schedule_purge(mi_arena_t* arena, size_t slice_index, size_t slices); -static void mi_arenas_try_purge(bool force, bool visit_all, mi_subproc_t* subproc, size_t tseq); +static void mi_arenas_try_purge(bool force, bool visit_all, mi_tld_t* tld); void _mi_arenas_free(void* p, size_t size, mi_memid_t memid) { if (p==NULL) return; @@ -1054,7 +1051,7 @@ void _mi_arenas_free(void* p, size_t size, mi_memid_t memid) { // Purge the arenas; if `force_purge` is true, amenable parts are purged even if not yet expired void _mi_arenas_collect(bool force_purge, bool visit_all, mi_tld_t* tld) { - mi_arenas_try_purge(force_purge, visit_all, tld->subproc, tld->thread_seq); + mi_arenas_try_purge(force_purge, visit_all, tld); } @@ -1088,8 +1085,9 @@ bool _mi_arenas_contain(const void* p) { // for dynamic libraries that are unloaded and need to release all their allocated memory. static void mi_arenas_unsafe_destroy(mi_subproc_t* subproc) { mi_assert_internal(subproc != NULL); - const size_t arena_count = mi_arenas_get_count(subproc); - for (size_t i = 0; i < arena_count; i++) { + const size_t max_arena = mi_arenas_get_count(subproc); + size_t new_max_arena = 0; + for (size_t i = 0; i < max_arena; i++) { mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &subproc->arenas[i]); if (arena != NULL) { // mi_lock_done(&arena->abandoned_visit_lock); @@ -1099,17 +1097,18 @@ static void mi_arenas_unsafe_destroy(mi_subproc_t* subproc) { } } } + // try to lower the max arena. - size_t expected = arena_count; - mi_atomic_cas_strong_acq_rel(&subproc->arena_count, &expected, 0); + size_t expected = max_arena; + mi_atomic_cas_strong_acq_rel(&subproc->arena_count, &expected, new_max_arena); } // destroy owned arenas; this is unsafe and should only be done using `mi_option_destroy_on_exit` // for dynamic libraries that are unloaded and need to release all their allocated memory. -void _mi_arenas_unsafe_destroy_all(mi_subproc_t* subproc) { - mi_arenas_unsafe_destroy(subproc); - // mi_arenas_try_purge(true /* force purge */, true /* visit all*/, subproc, 0 /* thread seq */); // purge non-owned arenas +void _mi_arenas_unsafe_destroy_all(mi_tld_t* tld) { + mi_arenas_unsafe_destroy(tld->subproc); + _mi_arenas_collect(true /* force purge */, true /* visit all*/, tld); // purge non-owned arenas } @@ -1773,13 +1772,14 @@ static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force) } -static void mi_arenas_try_purge(bool force, bool visit_all, mi_subproc_t* subproc, size_t tseq) +static void mi_arenas_try_purge(bool force, bool visit_all, mi_tld_t* tld) { // try purge can be called often so try to only run when needed const long delay = mi_arena_purge_delay(); if (_mi_preloading() || delay <= 0) return; // nothing will be scheduled // check if any arena needs purging? + mi_subproc_t* subproc = tld->subproc; const mi_msecs_t now = _mi_clock_now(); const mi_msecs_t arenas_expire = mi_atomic_loadi64_acquire(&subproc->purge_expire); if (!visit_all && !force && (arenas_expire == 0 || arenas_expire > now)) return; @@ -1793,7 +1793,7 @@ static void mi_arenas_try_purge(bool force, bool visit_all, mi_subproc_t* subpro { // increase global expire: at most one purge per delay cycle if (arenas_expire > now) { mi_atomic_storei64_release(&subproc->purge_expire, now + (delay/10)); } - const size_t arena_start = tseq % max_arena; + const size_t arena_start = tld->thread_seq % max_arena; size_t max_purge_count = (visit_all ? max_arena : (max_arena/4)+1); bool all_visited = true; bool any_purged = false; @@ -1894,12 +1894,12 @@ static bool mi_arena_page_register(size_t slice_index, size_t slice_count, mi_ar mi_assert_internal(slice_count == 1); mi_page_t* page = (mi_page_t*)mi_arena_slice_start(arena, slice_index); mi_assert_internal(mi_bitmap_is_setN(page->memid.mem.arena.arena->pages, page->memid.mem.arena.slice_index, 1)); - if (!_mi_page_map_register(page)) return false; // break + _mi_page_map_register(page); mi_assert_internal(_mi_ptr_page(page)==page); return true; } -mi_decl_nodiscard static bool mi_arena_pages_reregister(mi_arena_t* arena) { +static bool mi_arena_pages_reregister(mi_arena_t* arena) { return _mi_bitmap_forall_set(arena->pages, &mi_arena_page_register, arena, NULL); } @@ -1979,10 +1979,7 @@ mi_decl_export bool mi_arena_reload(void* start, size_t size, mi_commit_fun_t* c if (!mi_arenas_add(arena->subproc, arena, arena_id)) { return false; } - if (!mi_arena_pages_reregister(arena)) { - // todo: clear arena entry in the subproc? - return false; - } + mi_arena_pages_reregister(arena); // adjust abandoned page count for (size_t bin = 0; bin < MI_BIN_COUNT; bin++) { diff --git a/src/init.c b/src/init.c index eb508bc9..73847a4b 100644 --- a/src/init.c +++ b/src/init.c @@ -711,7 +711,7 @@ void mi_process_init(void) mi_attr_noexcept { mi_detect_cpu_features(); _mi_stats_init(); _mi_os_init(); - _mi_page_map_init(); // this could fail.. should we abort in that case? + _mi_page_map_init(); mi_heap_main_init(); mi_tld_main_init(); // the following two can potentially allocate (on freeBSD for locks and thread keys) @@ -778,7 +778,7 @@ void mi_cdecl mi_process_done(void) mi_attr_noexcept { if (mi_option_is_enabled(mi_option_destroy_on_exit)) { mi_heap_collect(heap, true /* force */); _mi_heap_unsafe_destroy_all(heap); // forcefully release all memory held by all heaps (of this thread only!) - _mi_arenas_unsafe_destroy_all(_mi_subproc_main()); + _mi_arenas_unsafe_destroy_all(heap->tld); _mi_page_map_unsafe_destroy(_mi_subproc_main()); } //_mi_page_map_unsafe_destroy(_mi_subproc_main()); diff --git a/src/page-map.c b/src/page-map.c index 08ded252..ce70495b 100644 --- a/src/page-map.c +++ b/src/page-map.c @@ -10,7 +10,7 @@ terms of the MIT license. A copy of the license can be found in the file #include "bitmap.h" static void mi_page_map_cannot_commit(void) { - _mi_warning_message("unable to commit the allocation page-map on-demand\n" ); + _mi_error_message(EFAULT,"unable to commit memory for the page address map\n"); } #if MI_PAGE_MAP_FLAT @@ -32,7 +32,7 @@ static mi_memid_t mi_page_map_memid; #define MI_PAGE_MAP_ENTRIES_PER_COMMIT_BIT MI_ARENA_SLICE_SIZE static mi_bitmap_t* mi_page_map_commit; // one bit per committed 64 KiB entries -mi_decl_nodiscard static bool mi_page_map_ensure_committed(size_t idx, size_t slice_count); +static void mi_page_map_ensure_committed(size_t idx, size_t slice_count); bool _mi_page_map_init(void) { size_t vbits = (size_t)mi_option_get_clamp(mi_option_max_vabits, 0, MI_SIZE_BITS); @@ -71,10 +71,7 @@ bool _mi_page_map_init(void) { // commit the first part so NULL pointers get resolved without an access violation if (!commit) { - if (!mi_page_map_ensure_committed(0, 1)) { - mi_page_map_cannot_commit(); - return false; - } + mi_page_map_ensure_committed(0, 1); } _mi_page_map[0] = 1; // so _mi_ptr_page(NULL) == NULL mi_assert_internal(_mi_ptr_page(NULL)==NULL); @@ -93,7 +90,7 @@ void _mi_page_map_unsafe_destroy(mi_subproc_t* subproc) { } -static bool mi_page_map_ensure_committed(size_t idx, size_t slice_count) { +static void mi_page_map_ensure_committed(size_t idx, size_t slice_count) { // is the page map area that contains the page address committed? // we always set the commit bits so we can track what ranges are in-use. // we only actually commit if the map wasn't committed fully already. @@ -106,11 +103,8 @@ static bool mi_page_map_ensure_committed(size_t idx, size_t slice_count) { bool is_zero; uint8_t* const start = _mi_page_map + (i * MI_PAGE_MAP_ENTRIES_PER_COMMIT_BIT); const size_t size = MI_PAGE_MAP_ENTRIES_PER_COMMIT_BIT; - if (!_mi_os_commit(start, size, &is_zero)) { - mi_page_map_cannot_commit(); - return false; - } - if (!is_zero && !mi_page_map_memid.initially_zero) { _mi_memzero(start, size); } + if (!_mi_os_commit(start, size, &is_zero)) return; + if (!is_zero && !mi_page_map_memid.initially_zero) { _mi_memzero(start, size); } mi_bitmap_set(mi_page_map_commit, i); } } @@ -119,7 +113,6 @@ static bool mi_page_map_ensure_committed(size_t idx, size_t slice_count) { _mi_page_map[idx] = 0; _mi_page_map[idx+slice_count-1] = 0; #endif - return true; } @@ -131,28 +124,25 @@ static size_t mi_page_map_get_idx(mi_page_t* page, uint8_t** page_start, size_t* return _mi_page_map_index(page); } -bool _mi_page_map_register(mi_page_t* page) { +void _mi_page_map_register(mi_page_t* page) { mi_assert_internal(page != NULL); mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN)); mi_assert_internal(_mi_page_map != NULL); // should be initialized before multi-thread access! if mi_unlikely(_mi_page_map == NULL) { - if (!_mi_page_map_init()) return false; + if (!_mi_page_map_init()) return; } mi_assert(_mi_page_map!=NULL); uint8_t* page_start; size_t slice_count; const size_t idx = mi_page_map_get_idx(page, &page_start, &slice_count); - if (!mi_page_map_ensure_committed(idx, slice_count)) { - return false; - } + mi_page_map_ensure_committed(idx, slice_count); // set the offsets for (size_t i = 0; i < slice_count; i++) { mi_assert_internal(i < 128); _mi_page_map[idx + i] = (uint8_t)(i+1); } - return true; } void _mi_page_map_unregister(mi_page_t* page) { @@ -168,10 +158,7 @@ void _mi_page_map_unregister(mi_page_t* page) { void _mi_page_map_unregister_range(void* start, size_t size) { const size_t slice_count = _mi_divide_up(size, MI_ARENA_SLICE_SIZE); const uintptr_t index = _mi_page_map_index(start); - // todo: scan the commit bits and clear only those ranges? - if (!mi_page_map_ensure_committed(index, slice_count)) { // we commit the range in total; - return; - } + mi_page_map_ensure_committed(index, slice_count); // we commit the range in total; todo: scan the commit bits and clear only those ranges? _mi_memzero(&_mi_page_map[index], slice_count); } @@ -192,10 +179,9 @@ mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_att #else // A 2-level page map -#define MI_PAGE_MAP_SUB_SIZE (MI_PAGE_MAP_SUB_COUNT * sizeof(mi_page_t*)) -#define MI_PAGE_MAP_ENTRIES_PER_CBIT (MI_PAGE_MAP_COUNT / MI_BFIELD_BITS) +#define MI_PAGE_MAP_SUB_SIZE (MI_PAGE_MAP_SUB_COUNT * sizeof(mi_page_t*)) -mi_decl_cache_align _Atomic(mi_submap_t)* _mi_page_map; +mi_decl_cache_align _Atomic(mi_page_t**)* _mi_page_map; static size_t mi_page_map_count; static void* mi_page_map_max_address; static mi_memid_t mi_page_map_memid; @@ -203,31 +189,26 @@ static mi_memid_t mi_page_map_memid; // divide the main map in 64 (`MI_BFIELD_BITS`) parts commit those parts on demand static _Atomic(mi_bfield_t) mi_page_map_commit; -mi_decl_nodiscard static bool mi_page_map_ensure_committed(size_t idx, mi_submap_t* submap); -mi_decl_nodiscard static bool mi_page_map_ensure_submap_at(size_t idx, mi_submap_t* submap); -static bool mi_page_map_set_range(mi_page_t* page, size_t idx, size_t sub_idx, size_t slice_count); +#define MI_PAGE_MAP_ENTRIES_PER_CBIT (MI_PAGE_MAP_COUNT / MI_BFIELD_BITS) static inline bool mi_page_map_is_committed(size_t idx, size_t* pbit_idx) { mi_bfield_t commit = mi_atomic_load_relaxed(&mi_page_map_commit); - const size_t bit_idx = idx/MI_PAGE_MAP_ENTRIES_PER_CBIT; + const size_t bit_idx = idx/MI_PAGE_MAP_ENTRIES_PER_CBIT; mi_assert_internal(bit_idx < MI_BFIELD_BITS); if (pbit_idx != NULL) { *pbit_idx = bit_idx; } return ((commit & (MI_ZU(1) << bit_idx)) != 0); } -static bool mi_page_map_ensure_committed(size_t idx, mi_submap_t* submap) { - mi_assert_internal(submap!=NULL && *submap==NULL); +static mi_page_t** mi_page_map_ensure_committed(size_t idx) { size_t bit_idx; if mi_unlikely(!mi_page_map_is_committed(idx, &bit_idx)) { uint8_t* start = (uint8_t*)&_mi_page_map[bit_idx * MI_PAGE_MAP_ENTRIES_PER_CBIT]; - if (!_mi_os_commit(start, MI_PAGE_MAP_ENTRIES_PER_CBIT * sizeof(mi_submap_t), NULL)) { - mi_page_map_cannot_commit(); - return false; + if (!_mi_os_commit(start, MI_PAGE_MAP_ENTRIES_PER_CBIT * sizeof(mi_page_t**), NULL)) { + return NULL; } mi_atomic_or_acq_rel(&mi_page_map_commit, MI_ZU(1) << bit_idx); } - *submap = mi_atomic_load_ptr_acquire(mi_page_t*, &_mi_page_map[idx]); // acquire _mi_page_map_at(idx); - return true; + return mi_atomic_load_ptr_acquire(mi_page_t*, &_mi_page_map[idx]); // _mi_page_map_at(idx); } // initialize the page map @@ -277,11 +258,7 @@ bool _mi_page_map_init(void) { if (!mi_page_map_memid.initially_zero) { // initialize low addresses with NULL _mi_memzero_aligned(sub0, submap_size); } - mi_submap_t nullsub = NULL; - if (!mi_page_map_ensure_committed(0,&nullsub)) { - mi_page_map_cannot_commit(); - return false; - } + mi_page_map_ensure_committed(0); mi_atomic_store_ptr_release(mi_page_t*, &_mi_page_map[0], sub0); mi_assert_internal(_mi_ptr_page(NULL)==NULL); @@ -296,7 +273,7 @@ void _mi_page_map_unsafe_destroy(mi_subproc_t* subproc) { for (size_t idx = 1; idx < mi_page_map_count; idx++) { // skip entry 0 (as we allocate that submap at the end of the page_map) // free all sub-maps if (mi_page_map_is_committed(idx, NULL)) { - mi_submap_t sub = _mi_page_map_at(idx); + mi_page_t** sub = _mi_page_map_at(idx); if (sub != NULL) { mi_memid_t memid = _mi_memid_create_os(sub, MI_PAGE_MAP_SUB_SIZE, true, false, false); _mi_os_free_ex(memid.mem.os.base, memid.mem.os.size, true, memid, subproc); @@ -313,62 +290,41 @@ void _mi_page_map_unsafe_destroy(mi_subproc_t* subproc) { } -static bool mi_page_map_ensure_submap_at(size_t idx, mi_submap_t* submap) { - mi_assert_internal(submap!=NULL && *submap==NULL); - mi_submap_t sub = NULL; - if (!mi_page_map_ensure_committed(idx, &sub)) { - return false; - } +static mi_page_t** mi_page_map_ensure_submap_at(size_t idx) { + mi_page_t** sub = mi_page_map_ensure_committed(idx); if mi_unlikely(sub == NULL) { // sub map not yet allocated, alloc now mi_memid_t memid; mi_page_t** expect = sub; const size_t submap_size = MI_PAGE_MAP_SUB_SIZE; - sub = (mi_submap_t)_mi_os_zalloc(submap_size, &memid); - if (sub==NULL) { - _mi_warning_message("internal error: unable to extend the page map\n"); - return false; + sub = (mi_page_t**)_mi_os_zalloc(submap_size, &memid); + if (sub == NULL) { + _mi_error_message(EFAULT, "internal error: unable to extend the page map\n"); + return NULL; } if (!mi_atomic_cas_ptr_strong_acq_rel(mi_page_t*, &_mi_page_map[idx], &expect, sub)) { // another thread already allocated it.. free and continue _mi_os_free(sub, submap_size, memid); - sub = expect; + sub = expect; + mi_assert_internal(sub!=NULL); } - } - mi_assert_internal(sub!=NULL); - *submap = sub; - return true; + } + return sub; } -static bool mi_page_map_set_range_prim(mi_page_t* page, size_t idx, size_t sub_idx, size_t slice_count) { +static void mi_page_map_set_range(mi_page_t* page, size_t idx, size_t sub_idx, size_t slice_count) { // is the page map area that contains the page address committed? while (slice_count > 0) { - mi_submap_t sub = NULL; - if (!mi_page_map_ensure_submap_at(idx, &sub)) { - return false; - }; - mi_assert_internal(sub!=NULL); + mi_page_t** sub = mi_page_map_ensure_submap_at(idx); // set the offsets for the page - while (slice_count > 0 && sub_idx < MI_PAGE_MAP_SUB_COUNT) { + while (sub_idx < MI_PAGE_MAP_SUB_COUNT) { sub[sub_idx] = page; - slice_count--; - sub_idx++; + slice_count--; if (slice_count == 0) return; + sub_idx++; } idx++; // potentially wrap around to the next idx sub_idx = 0; } - return true; -} - -static bool mi_page_map_set_range(mi_page_t* page, size_t idx, size_t sub_idx, size_t slice_count) { - if mi_unlikely(!mi_page_map_set_range_prim(page,idx,sub_idx,slice_count)) { - // failed to commit, call again to reset the page pointer if needed - if (page!=NULL) { - mi_page_map_set_range_prim(NULL,idx,sub_idx,slice_count); - } - return false; - } - return true; } static size_t mi_page_map_get_idx(mi_page_t* page, size_t* sub_idx, size_t* slice_count) { @@ -379,18 +335,18 @@ static size_t mi_page_map_get_idx(mi_page_t* page, size_t* sub_idx, size_t* slic return _mi_page_map_index(page, sub_idx); } -bool _mi_page_map_register(mi_page_t* page) { +void _mi_page_map_register(mi_page_t* page) { mi_assert_internal(page != NULL); mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN)); mi_assert_internal(_mi_page_map != NULL); // should be initialized before multi-thread access! if mi_unlikely(_mi_page_map == NULL) { - if (!_mi_page_map_init()) return false; + if (!_mi_page_map_init()) return; } mi_assert(_mi_page_map!=NULL); size_t slice_count; size_t sub_idx; const size_t idx = mi_page_map_get_idx(page, &sub_idx, &slice_count); - return mi_page_map_set_range(page, idx, sub_idx, slice_count); + mi_page_map_set_range(page, idx, sub_idx, slice_count); } void _mi_page_map_unregister(mi_page_t* page) { diff --git a/test/main-override.cpp b/test/main-override.cpp index f3cb999f..408b5ee8 100644 --- a/test/main-override.cpp +++ b/test/main-override.cpp @@ -33,7 +33,6 @@ static void strdup_test(); // issue #445 static void heap_thread_free_huge(); static void test_std_string(); // issue #697 static void test_thread_local(); // issue #944 -static void test_thread_leak(); // issue #1104 // static void test_mixed0(); // issue #942 static void test_mixed1(); // issue #942 static void test_stl_allocators(); @@ -46,12 +45,11 @@ static void test_dep() { }; #endif int main() { - //mi_stats_reset(); // ignore earlier allocations + mi_stats_reset(); // ignore earlier allocations //various_tests(); //test_mixed1(); - // test_dep(); - test_thread_leak(); + test_dep(); //test_std_string(); //test_thread_local(); @@ -70,7 +68,7 @@ int main() { test_mt_shutdown(); */ //fail_aslr(); - //mi_stats_print(NULL); + mi_stats_print(NULL); return 0; } @@ -380,32 +378,6 @@ static void heap_thread_free_huge() { } } -static std::atomic gsum; - -static void local_alloc() { - long sum = 0; - for(int i = 0; i < 1000000; i++) { - const int n = 1 + std::rand() % 1000; - uint8_t* p = (uint8_t*)calloc(n, 1); - p[0] = 1; - sum += p[std::rand() % n]; - if ((std::rand() % 100) > 24) { - free(p); - } - } - gsum += sum; -} - -static void test_thread_leak() { - std::vector threads; - for (int i=1; i<=100; ++i) { - threads.emplace_back(std::thread(&local_alloc)); - } - for (auto& th : threads) { - th.join(); - } -} - static void test_mt_shutdown() {