Compare commits

...

7 commits

7 changed files with 179 additions and 137 deletions

View file

@ -13,6 +13,9 @@ terms of the MIT license. A copy of the license can be found in the file
#ifndef MI_BITS_H
#define MI_BITS_H
#include <stddef.h> // size_t
#include <stdint.h> // int64_t etc
#include <stdbool.h> // bool
// ------------------------------------------------------
// Size of a pointer.
@ -90,7 +93,7 @@ typedef int32_t mi_ssize_t;
#endif
#endif
#if (MI_ARCH_X86 || MI_ARCH_X64)
#if MI_ARCH_X64 && defined(__AVX2__)
#include <immintrin.h>
#elif MI_ARCH_ARM64 && MI_OPT_SIMD
#include <arm_neon.h>
@ -120,37 +123,20 @@ typedef int32_t mi_ssize_t;
#define MI_MAX_VABITS (32)
#endif
// use a flat page-map (or a 2-level one)
#ifndef MI_PAGE_MAP_FLAT
#if MI_MAX_VABITS <= 40 && !MI_SECURE && !defined(__APPLE__)
#if MI_MAX_VABITS <= 40 && !defined(__APPLE__)
#define MI_PAGE_MAP_FLAT 1
#else
#define MI_PAGE_MAP_FLAT 0
#endif
#endif
#if MI_PAGE_MAP_FLAT && MI_SECURE
#error should not use MI_PAGE_MAP_FLAT with a secure build
#endif
/* --------------------------------------------------------------------------------
Builtin's
-------------------------------------------------------------------------------- */
#if defined(__GNUC__) || defined(__clang__)
#define mi_unlikely(x) (__builtin_expect(!!(x),false))
#define mi_likely(x) (__builtin_expect(!!(x),true))
#elif (defined(__cplusplus) && (__cplusplus >= 202002L)) || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L)
#define mi_unlikely(x) (x) [[unlikely]]
#define mi_likely(x) (x) [[likely]]
#else
#define mi_unlikely(x) (x)
#define mi_likely(x) (x)
#endif
#ifndef __has_builtin
#define __has_builtin(x) 0
#endif
@ -188,28 +174,17 @@ typedef int32_t mi_ssize_t;
-------------------------------------------------------------------------------- */
size_t _mi_popcount_generic(size_t x);
extern bool _mi_cpu_has_popcnt;
static inline size_t mi_popcount(size_t x) {
#if defined(__GNUC__) && (MI_ARCH_X64 || MI_ARCH_X86)
#if !defined(__BMI1__)
if mi_unlikely(!_mi_cpu_has_popcnt) { return _mi_popcount_generic(x); }
#endif
size_t r;
__asm ("popcnt\t%1,%0" : "=r"(r) : "r"(x) : "cc");
return r;
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86)
#if !defined(__BMI1__)
if mi_unlikely(!_mi_cpu_has_popcnt) { return _mi_popcount_generic(x); }
#endif
return (size_t)mi_msc_builtinz(__popcnt)(x);
#elif defined(_MSC_VER) && MI_ARCH_ARM64
return (size_t)mi_msc_builtinz(__popcnt)(x);
#elif mi_has_builtinz(popcount)
#if mi_has_builtinz(popcount)
return mi_builtinz(popcount)(x);
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
return mi_msc_builtinz(__popcnt)(x);
#elif MI_ARCH_X64 && defined(__BMI1__)
return (size_t)_mm_popcnt_u64(x);
#else
#define MI_HAS_FAST_POPCOUNT 0
return _mi_popcount_generic(x);
return (x<=1 ? x : _mi_popcount_generic(x));
#endif
}
@ -223,29 +198,19 @@ size_t _mi_clz_generic(size_t x);
size_t _mi_ctz_generic(size_t x);
static inline size_t mi_ctz(size_t x) {
#if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__)
#if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__) // on x64 tzcnt is defined for 0
size_t r;
__asm ("tzcnt\t%1, %0" : "=r"(r) : "r"(x) : "cc");
return r;
#elif defined(__GNUC__) && MI_ARCH_X64
// tzcnt is interpreted as bsf if BMI1 is not supported (pre-haswell)
// if the argument is zero:
// - tzcnt: sets carry-flag, and returns MI_SIZE_BITS
// - bsf : sets zero-flag, and leaves the destination _unmodified_ (on both AMD and Intel now, see <https://github.com/llvm/llvm-project/pull/102885>)
// so we always initialize r to MI_SIZE_BITS to work correctly on all cpu's without branching
size_t r = MI_SIZE_BITS;
__asm ("tzcnt\t%1, %0" : "+r"(r) : "r"(x) : "cc"); // use '+r' to keep the assignment to r in case this becomes bsf on older cpu's
return r;
#elif defined(_MSC_VER) && MI_ARCH_X64 && defined(__BMI1__)
return _tzcnt_u64(x);
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
unsigned long idx;
return (mi_msc_builtinz(_BitScanForward)(&idx, x) ? (size_t)idx : MI_SIZE_BITS);
#elif mi_has_builtinz(ctz)
return (x!=0 ? (size_t)mi_builtinz(ctz)(x) : MI_SIZE_BITS);
#elif defined(_MSC_VER) && MI_ARCH_X64 && defined(__BMI1__)
return (x!=0 ? _tzcnt_u64(x) : MI_SIZE_BITS); // ensure it still works on non-BMI1 cpu's as well
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
if (x==0) return MI_SIZE_BITS; // test explicitly for `x==0` to avoid codegen bug (issue #1071)
unsigned long idx; mi_msc_builtinz(_BitScanForward)(&idx, x);
return (size_t)idx;
#elif defined(__GNUC__) && MI_ARCH_X86
size_t r = MI_SIZE_BITS;
#elif defined(__GNUC__) && (MI_ARCH_X64 || MI_ARCH_X86)
size_t r = MI_SIZE_BITS; // bsf leaves destination unmodified if the argument is 0 (see <https://github.com/llvm/llvm-project/pull/102885>)
__asm ("bsf\t%1, %0" : "+r"(r) : "r"(x) : "cc");
return r;
#elif MI_HAS_FAST_POPCOUNT
@ -257,18 +222,17 @@ static inline size_t mi_ctz(size_t x) {
}
static inline size_t mi_clz(size_t x) {
// we don't optimize anymore to lzcnt as there are still non BMI1 cpu's around (like Intel Celeron, see issue #1016)
// on pre-haswell cpu's lzcnt gets executed as bsr which is not equivalent (at it returns the bit position)
#if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__) // on x64 lzcnt is defined for 0
size_t r;
__asm ("lzcnt\t%1, %0" : "=r"(r) : "r"(x) : "cc");
return r;
#elif defined(_MSC_VER) && MI_ARCH_X64 && defined(__BMI1__)
return _lzcnt_u64(x);
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
unsigned long idx;
return (mi_msc_builtinz(_BitScanReverse)(&idx, x) ? MI_SIZE_BITS - 1 - (size_t)idx : MI_SIZE_BITS);
#elif mi_has_builtinz(clz)
return (x!=0 ? (size_t)mi_builtinz(clz)(x) : MI_SIZE_BITS);
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
if (x==0) return MI_SIZE_BITS; // test explicitly for `x==0` to avoid codegen bug (issue #1071)
unsigned long idx; mi_msc_builtinz(_BitScanReverse)(&idx, x);
return (MI_SIZE_BITS - 1 - (size_t)idx);
#elif defined(__GNUC__) && (MI_ARCH_X64 || MI_ARCH_X86)
if (x==0) return MI_SIZE_BITS;
size_t r;
@ -297,11 +261,9 @@ static inline bool mi_bsf(size_t x, size_t* idx) {
bool is_zero;
__asm ( "tzcnt\t%2, %1" : "=@ccc"(is_zero), "=r"(*idx) : "r"(x) : "cc" );
return !is_zero;
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
if (x==0) return false; // test explicitly for `x==0` to avoid codegen bug (issue #1071)
unsigned long i; mi_msc_builtinz(_BitScanForward)(&i, x);
*idx = (size_t)i;
return true;
#elif 0 && defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
unsigned long i;
return (mi_msc_builtinz(_BitScanForward)(&i, x) ? (*idx = (size_t)i, true) : false);
#else
return (x!=0 ? (*idx = mi_ctz(x), true) : false);
#endif
@ -311,11 +273,14 @@ static inline bool mi_bsf(size_t x, size_t* idx) {
// return false if `x==0` (with `*idx` undefined) and true otherwise,
// with the `idx` is set to the bit index (`0 <= *idx < MI_BFIELD_BITS`).
static inline bool mi_bsr(size_t x, size_t* idx) {
#if defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
if (x==0) return false; // test explicitly for `x==0` to avoid codegen bug (issue #1071)
unsigned long i; mi_msc_builtinz(_BitScanReverse)(&i, x);
*idx = (size_t)i;
return true;
#if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__) && (!defined(__clang_major__) || __clang_major__ >= 9)
// on x64 the carry flag is set on zero which gives better codegen
bool is_zero;
__asm ("lzcnt\t%2, %1" : "=@ccc"(is_zero), "=r"(*idx) : "r"(x) : "cc");
return !is_zero;
#elif 0 && defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
unsigned long i;
return (mi_msc_builtinz(_BitScanReverse)(&i, x) ? (*idx = (size_t)i, true) : false);
#else
return (x!=0 ? (*idx = MI_SIZE_BITS - 1 - mi_clz(x), true) : false);
#endif

View file

@ -223,7 +223,7 @@ void* _mi_arenas_alloc_aligned(mi_subproc_t* subproc, size_t size, size_
void _mi_arenas_free(void* p, size_t size, mi_memid_t memid);
bool _mi_arenas_contain(const void* p);
void _mi_arenas_collect(bool force_purge, bool visit_all, mi_tld_t* tld);
void _mi_arenas_unsafe_destroy_all(mi_tld_t* tld);
void _mi_arenas_unsafe_destroy_all(mi_subproc_t* subproc);
mi_page_t* _mi_arenas_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment);
void _mi_arenas_page_free(mi_page_t* page, mi_tld_t* tld);
@ -238,7 +238,7 @@ bool _mi_meta_is_meta_page(void* p);
// "page-map.c"
bool _mi_page_map_init(void);
void _mi_page_map_register(mi_page_t* page);
mi_decl_nodiscard bool _mi_page_map_register(mi_page_t* page);
void _mi_page_map_unregister(mi_page_t* page);
void _mi_page_map_unregister_range(void* start, size_t size);
mi_page_t* _mi_safe_ptr_page(const void* p);
@ -604,7 +604,8 @@ static inline mi_page_t* _mi_unchecked_ptr_page(const void* p) {
#define MI_PAGE_MAP_SHIFT (MI_MAX_VABITS - MI_PAGE_MAP_SUB_SHIFT - MI_ARENA_SLICE_SHIFT)
#define MI_PAGE_MAP_COUNT (MI_ZU(1) << MI_PAGE_MAP_SHIFT)
extern mi_decl_hidden _Atomic(mi_page_t**)* _mi_page_map;
typedef mi_page_t** mi_submap_t;
extern mi_decl_hidden _Atomic(mi_submap_t)* _mi_page_map;
static inline size_t _mi_page_map_index(const void* p, size_t* sub_idx) {
const size_t u = (size_t)((uintptr_t)p / MI_ARENA_SLICE_SIZE);
@ -612,7 +613,7 @@ static inline size_t _mi_page_map_index(const void* p, size_t* sub_idx) {
return (u / MI_PAGE_MAP_SUB_COUNT);
}
static inline mi_page_t** _mi_page_map_at(size_t idx) {
static inline mi_submap_t _mi_page_map_at(size_t idx) {
return mi_atomic_load_ptr_relaxed(mi_page_t*, &_mi_page_map[idx]);
}
@ -625,7 +626,7 @@ static inline mi_page_t* _mi_unchecked_ptr_page(const void* p) {
static inline mi_page_t* _mi_checked_ptr_page(const void* p) {
size_t sub_idx;
const size_t idx = _mi_page_map_index(p, &sub_idx);
mi_page_t** const sub = _mi_page_map_at(idx);
mi_submap_t const sub = _mi_page_map_at(idx);
if mi_unlikely(sub == NULL) return NULL;
return sub[sub_idx];
}

View file

@ -22,6 +22,7 @@ terms of the MIT license. A copy of the license can be found in the file
#include <mimalloc-stats.h>
#include <stddef.h> // ptrdiff_t
#include <stdint.h> // uintptr_t, uint16_t, etc
#include <stdbool.h> // bool
#include <limits.h> // SIZE_MAX etc.
#include <errno.h> // error codes
#include "bits.h" // size defines (MI_INTPTR_SIZE etc), bit operations

View file

@ -684,7 +684,7 @@ static mi_page_t* mi_arenas_page_alloc_fresh(size_t slice_count, size_t block_si
commit_size = _mi_align_up(block_start + block_size, MI_PAGE_MIN_COMMIT_SIZE);
if (commit_size > page_noguard_size) { commit_size = page_noguard_size; }
bool is_zero;
if (!mi_arena_commit( mi_memid_arena(memid), page, commit_size, &is_zero, 0)) {
if mi_unlikely(!mi_arena_commit( mi_memid_arena(memid), page, commit_size, &is_zero, 0)) {
_mi_arenas_free(page, alloc_size, memid);
return NULL;
}
@ -710,7 +710,10 @@ static mi_page_t* mi_arenas_page_alloc_fresh(size_t slice_count, size_t block_si
mi_page_try_claim_ownership(page);
// register in the page map
_mi_page_map_register(page);
if mi_unlikely(!_mi_page_map_register(page)) {
_mi_arenas_free( page, alloc_size, memid );
return NULL;
}
// stats
mi_tld_stat_increase(tld, pages, 1);
@ -990,7 +993,7 @@ void _mi_arenas_page_unabandon(mi_page_t* page) {
Arena free
----------------------------------------------------------- */
static void mi_arena_schedule_purge(mi_arena_t* arena, size_t slice_index, size_t slices);
static void mi_arenas_try_purge(bool force, bool visit_all, mi_tld_t* tld);
static void mi_arenas_try_purge(bool force, bool visit_all, mi_subproc_t* subproc, size_t tseq);
void _mi_arenas_free(void* p, size_t size, mi_memid_t memid) {
if (p==NULL) return;
@ -1051,7 +1054,7 @@ void _mi_arenas_free(void* p, size_t size, mi_memid_t memid) {
// Purge the arenas; if `force_purge` is true, amenable parts are purged even if not yet expired
void _mi_arenas_collect(bool force_purge, bool visit_all, mi_tld_t* tld) {
mi_arenas_try_purge(force_purge, visit_all, tld);
mi_arenas_try_purge(force_purge, visit_all, tld->subproc, tld->thread_seq);
}
@ -1085,9 +1088,8 @@ bool _mi_arenas_contain(const void* p) {
// for dynamic libraries that are unloaded and need to release all their allocated memory.
static void mi_arenas_unsafe_destroy(mi_subproc_t* subproc) {
mi_assert_internal(subproc != NULL);
const size_t max_arena = mi_arenas_get_count(subproc);
size_t new_max_arena = 0;
for (size_t i = 0; i < max_arena; i++) {
const size_t arena_count = mi_arenas_get_count(subproc);
for (size_t i = 0; i < arena_count; i++) {
mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &subproc->arenas[i]);
if (arena != NULL) {
// mi_lock_done(&arena->abandoned_visit_lock);
@ -1097,18 +1099,17 @@ static void mi_arenas_unsafe_destroy(mi_subproc_t* subproc) {
}
}
}
// try to lower the max arena.
size_t expected = max_arena;
mi_atomic_cas_strong_acq_rel(&subproc->arena_count, &expected, new_max_arena);
size_t expected = arena_count;
mi_atomic_cas_strong_acq_rel(&subproc->arena_count, &expected, 0);
}
// destroy owned arenas; this is unsafe and should only be done using `mi_option_destroy_on_exit`
// for dynamic libraries that are unloaded and need to release all their allocated memory.
void _mi_arenas_unsafe_destroy_all(mi_tld_t* tld) {
mi_arenas_unsafe_destroy(tld->subproc);
_mi_arenas_collect(true /* force purge */, true /* visit all*/, tld); // purge non-owned arenas
void _mi_arenas_unsafe_destroy_all(mi_subproc_t* subproc) {
mi_arenas_unsafe_destroy(subproc);
// mi_arenas_try_purge(true /* force purge */, true /* visit all*/, subproc, 0 /* thread seq */); // purge non-owned arenas
}
@ -1772,14 +1773,13 @@ static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force)
}
static void mi_arenas_try_purge(bool force, bool visit_all, mi_tld_t* tld)
static void mi_arenas_try_purge(bool force, bool visit_all, mi_subproc_t* subproc, size_t tseq)
{
// try purge can be called often so try to only run when needed
const long delay = mi_arena_purge_delay();
if (_mi_preloading() || delay <= 0) return; // nothing will be scheduled
// check if any arena needs purging?
mi_subproc_t* subproc = tld->subproc;
const mi_msecs_t now = _mi_clock_now();
const mi_msecs_t arenas_expire = mi_atomic_loadi64_acquire(&subproc->purge_expire);
if (!visit_all && !force && (arenas_expire == 0 || arenas_expire > now)) return;
@ -1793,7 +1793,7 @@ static void mi_arenas_try_purge(bool force, bool visit_all, mi_tld_t* tld)
{
// increase global expire: at most one purge per delay cycle
if (arenas_expire > now) { mi_atomic_storei64_release(&subproc->purge_expire, now + (delay/10)); }
const size_t arena_start = tld->thread_seq % max_arena;
const size_t arena_start = tseq % max_arena;
size_t max_purge_count = (visit_all ? max_arena : (max_arena/4)+1);
bool all_visited = true;
bool any_purged = false;
@ -1894,12 +1894,12 @@ static bool mi_arena_page_register(size_t slice_index, size_t slice_count, mi_ar
mi_assert_internal(slice_count == 1);
mi_page_t* page = (mi_page_t*)mi_arena_slice_start(arena, slice_index);
mi_assert_internal(mi_bitmap_is_setN(page->memid.mem.arena.arena->pages, page->memid.mem.arena.slice_index, 1));
_mi_page_map_register(page);
if (!_mi_page_map_register(page)) return false; // break
mi_assert_internal(_mi_ptr_page(page)==page);
return true;
}
static bool mi_arena_pages_reregister(mi_arena_t* arena) {
mi_decl_nodiscard static bool mi_arena_pages_reregister(mi_arena_t* arena) {
return _mi_bitmap_forall_set(arena->pages, &mi_arena_page_register, arena, NULL);
}
@ -1979,7 +1979,10 @@ mi_decl_export bool mi_arena_reload(void* start, size_t size, mi_commit_fun_t* c
if (!mi_arenas_add(arena->subproc, arena, arena_id)) {
return false;
}
mi_arena_pages_reregister(arena);
if (!mi_arena_pages_reregister(arena)) {
// todo: clear arena entry in the subproc?
return false;
}
// adjust abandoned page count
for (size_t bin = 0; bin < MI_BIN_COUNT; bin++) {

View file

@ -711,7 +711,7 @@ void mi_process_init(void) mi_attr_noexcept {
mi_detect_cpu_features();
_mi_stats_init();
_mi_os_init();
_mi_page_map_init();
_mi_page_map_init(); // this could fail.. should we abort in that case?
mi_heap_main_init();
mi_tld_main_init();
// the following two can potentially allocate (on freeBSD for locks and thread keys)
@ -778,7 +778,7 @@ void mi_cdecl mi_process_done(void) mi_attr_noexcept {
if (mi_option_is_enabled(mi_option_destroy_on_exit)) {
mi_heap_collect(heap, true /* force */);
_mi_heap_unsafe_destroy_all(heap); // forcefully release all memory held by all heaps (of this thread only!)
_mi_arenas_unsafe_destroy_all(heap->tld);
_mi_arenas_unsafe_destroy_all(_mi_subproc_main());
_mi_page_map_unsafe_destroy(_mi_subproc_main());
}
//_mi_page_map_unsafe_destroy(_mi_subproc_main());

View file

@ -10,7 +10,7 @@ terms of the MIT license. A copy of the license can be found in the file
#include "bitmap.h"
static void mi_page_map_cannot_commit(void) {
_mi_error_message(EFAULT,"unable to commit memory for the page address map\n");
_mi_warning_message("unable to commit the allocation page-map on-demand\n" );
}
#if MI_PAGE_MAP_FLAT
@ -32,7 +32,7 @@ static mi_memid_t mi_page_map_memid;
#define MI_PAGE_MAP_ENTRIES_PER_COMMIT_BIT MI_ARENA_SLICE_SIZE
static mi_bitmap_t* mi_page_map_commit; // one bit per committed 64 KiB entries
static void mi_page_map_ensure_committed(size_t idx, size_t slice_count);
mi_decl_nodiscard static bool mi_page_map_ensure_committed(size_t idx, size_t slice_count);
bool _mi_page_map_init(void) {
size_t vbits = (size_t)mi_option_get_clamp(mi_option_max_vabits, 0, MI_SIZE_BITS);
@ -71,7 +71,10 @@ bool _mi_page_map_init(void) {
// commit the first part so NULL pointers get resolved without an access violation
if (!commit) {
mi_page_map_ensure_committed(0, 1);
if (!mi_page_map_ensure_committed(0, 1)) {
mi_page_map_cannot_commit();
return false;
}
}
_mi_page_map[0] = 1; // so _mi_ptr_page(NULL) == NULL
mi_assert_internal(_mi_ptr_page(NULL)==NULL);
@ -90,7 +93,7 @@ void _mi_page_map_unsafe_destroy(mi_subproc_t* subproc) {
}
static void mi_page_map_ensure_committed(size_t idx, size_t slice_count) {
static bool mi_page_map_ensure_committed(size_t idx, size_t slice_count) {
// is the page map area that contains the page address committed?
// we always set the commit bits so we can track what ranges are in-use.
// we only actually commit if the map wasn't committed fully already.
@ -103,7 +106,10 @@ static void mi_page_map_ensure_committed(size_t idx, size_t slice_count) {
bool is_zero;
uint8_t* const start = _mi_page_map + (i * MI_PAGE_MAP_ENTRIES_PER_COMMIT_BIT);
const size_t size = MI_PAGE_MAP_ENTRIES_PER_COMMIT_BIT;
if (!_mi_os_commit(start, size, &is_zero)) return;
if (!_mi_os_commit(start, size, &is_zero)) {
mi_page_map_cannot_commit();
return false;
}
if (!is_zero && !mi_page_map_memid.initially_zero) { _mi_memzero(start, size); }
mi_bitmap_set(mi_page_map_commit, i);
}
@ -113,6 +119,7 @@ static void mi_page_map_ensure_committed(size_t idx, size_t slice_count) {
_mi_page_map[idx] = 0;
_mi_page_map[idx+slice_count-1] = 0;
#endif
return true;
}
@ -124,25 +131,28 @@ static size_t mi_page_map_get_idx(mi_page_t* page, uint8_t** page_start, size_t*
return _mi_page_map_index(page);
}
void _mi_page_map_register(mi_page_t* page) {
bool _mi_page_map_register(mi_page_t* page) {
mi_assert_internal(page != NULL);
mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN));
mi_assert_internal(_mi_page_map != NULL); // should be initialized before multi-thread access!
if mi_unlikely(_mi_page_map == NULL) {
if (!_mi_page_map_init()) return;
if (!_mi_page_map_init()) return false;
}
mi_assert(_mi_page_map!=NULL);
uint8_t* page_start;
size_t slice_count;
const size_t idx = mi_page_map_get_idx(page, &page_start, &slice_count);
mi_page_map_ensure_committed(idx, slice_count);
if (!mi_page_map_ensure_committed(idx, slice_count)) {
return false;
}
// set the offsets
for (size_t i = 0; i < slice_count; i++) {
mi_assert_internal(i < 128);
_mi_page_map[idx + i] = (uint8_t)(i+1);
}
return true;
}
void _mi_page_map_unregister(mi_page_t* page) {
@ -158,7 +168,10 @@ void _mi_page_map_unregister(mi_page_t* page) {
void _mi_page_map_unregister_range(void* start, size_t size) {
const size_t slice_count = _mi_divide_up(size, MI_ARENA_SLICE_SIZE);
const uintptr_t index = _mi_page_map_index(start);
mi_page_map_ensure_committed(index, slice_count); // we commit the range in total; todo: scan the commit bits and clear only those ranges?
// todo: scan the commit bits and clear only those ranges?
if (!mi_page_map_ensure_committed(index, slice_count)) { // we commit the range in total;
return;
}
_mi_memzero(&_mi_page_map[index], slice_count);
}
@ -179,9 +192,10 @@ mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_att
#else
// A 2-level page map
#define MI_PAGE_MAP_SUB_SIZE (MI_PAGE_MAP_SUB_COUNT * sizeof(mi_page_t*))
#define MI_PAGE_MAP_SUB_SIZE (MI_PAGE_MAP_SUB_COUNT * sizeof(mi_page_t*))
#define MI_PAGE_MAP_ENTRIES_PER_CBIT (MI_PAGE_MAP_COUNT / MI_BFIELD_BITS)
mi_decl_cache_align _Atomic(mi_page_t**)* _mi_page_map;
mi_decl_cache_align _Atomic(mi_submap_t)* _mi_page_map;
static size_t mi_page_map_count;
static void* mi_page_map_max_address;
static mi_memid_t mi_page_map_memid;
@ -189,7 +203,9 @@ static mi_memid_t mi_page_map_memid;
// divide the main map in 64 (`MI_BFIELD_BITS`) parts commit those parts on demand
static _Atomic(mi_bfield_t) mi_page_map_commit;
#define MI_PAGE_MAP_ENTRIES_PER_CBIT (MI_PAGE_MAP_COUNT / MI_BFIELD_BITS)
mi_decl_nodiscard static bool mi_page_map_ensure_committed(size_t idx, mi_submap_t* submap);
mi_decl_nodiscard static bool mi_page_map_ensure_submap_at(size_t idx, mi_submap_t* submap);
static bool mi_page_map_set_range(mi_page_t* page, size_t idx, size_t sub_idx, size_t slice_count);
static inline bool mi_page_map_is_committed(size_t idx, size_t* pbit_idx) {
mi_bfield_t commit = mi_atomic_load_relaxed(&mi_page_map_commit);
@ -199,16 +215,19 @@ static inline bool mi_page_map_is_committed(size_t idx, size_t* pbit_idx) {
return ((commit & (MI_ZU(1) << bit_idx)) != 0);
}
static mi_page_t** mi_page_map_ensure_committed(size_t idx) {
static bool mi_page_map_ensure_committed(size_t idx, mi_submap_t* submap) {
mi_assert_internal(submap!=NULL && *submap==NULL);
size_t bit_idx;
if mi_unlikely(!mi_page_map_is_committed(idx, &bit_idx)) {
uint8_t* start = (uint8_t*)&_mi_page_map[bit_idx * MI_PAGE_MAP_ENTRIES_PER_CBIT];
if (!_mi_os_commit(start, MI_PAGE_MAP_ENTRIES_PER_CBIT * sizeof(mi_page_t**), NULL)) {
return NULL;
if (!_mi_os_commit(start, MI_PAGE_MAP_ENTRIES_PER_CBIT * sizeof(mi_submap_t), NULL)) {
mi_page_map_cannot_commit();
return false;
}
mi_atomic_or_acq_rel(&mi_page_map_commit, MI_ZU(1) << bit_idx);
}
return mi_atomic_load_ptr_acquire(mi_page_t*, &_mi_page_map[idx]); // _mi_page_map_at(idx);
*submap = mi_atomic_load_ptr_acquire(mi_page_t*, &_mi_page_map[idx]); // acquire _mi_page_map_at(idx);
return true;
}
// initialize the page map
@ -258,7 +277,11 @@ bool _mi_page_map_init(void) {
if (!mi_page_map_memid.initially_zero) { // initialize low addresses with NULL
_mi_memzero_aligned(sub0, submap_size);
}
mi_page_map_ensure_committed(0);
mi_submap_t nullsub = NULL;
if (!mi_page_map_ensure_committed(0,&nullsub)) {
mi_page_map_cannot_commit();
return false;
}
mi_atomic_store_ptr_release(mi_page_t*, &_mi_page_map[0], sub0);
mi_assert_internal(_mi_ptr_page(NULL)==NULL);
@ -273,7 +296,7 @@ void _mi_page_map_unsafe_destroy(mi_subproc_t* subproc) {
for (size_t idx = 1; idx < mi_page_map_count; idx++) { // skip entry 0 (as we allocate that submap at the end of the page_map)
// free all sub-maps
if (mi_page_map_is_committed(idx, NULL)) {
mi_page_t** sub = _mi_page_map_at(idx);
mi_submap_t sub = _mi_page_map_at(idx);
if (sub != NULL) {
mi_memid_t memid = _mi_memid_create_os(sub, MI_PAGE_MAP_SUB_SIZE, true, false, false);
_mi_os_free_ex(memid.mem.os.base, memid.mem.os.size, true, memid, subproc);
@ -290,41 +313,62 @@ void _mi_page_map_unsafe_destroy(mi_subproc_t* subproc) {
}
static mi_page_t** mi_page_map_ensure_submap_at(size_t idx) {
mi_page_t** sub = mi_page_map_ensure_committed(idx);
static bool mi_page_map_ensure_submap_at(size_t idx, mi_submap_t* submap) {
mi_assert_internal(submap!=NULL && *submap==NULL);
mi_submap_t sub = NULL;
if (!mi_page_map_ensure_committed(idx, &sub)) {
return false;
}
if mi_unlikely(sub == NULL) {
// sub map not yet allocated, alloc now
mi_memid_t memid;
mi_page_t** expect = sub;
const size_t submap_size = MI_PAGE_MAP_SUB_SIZE;
sub = (mi_page_t**)_mi_os_zalloc(submap_size, &memid);
if (sub == NULL) {
_mi_error_message(EFAULT, "internal error: unable to extend the page map\n");
return NULL;
sub = (mi_submap_t)_mi_os_zalloc(submap_size, &memid);
if (sub==NULL) {
_mi_warning_message("internal error: unable to extend the page map\n");
return false;
}
if (!mi_atomic_cas_ptr_strong_acq_rel(mi_page_t*, &_mi_page_map[idx], &expect, sub)) {
// another thread already allocated it.. free and continue
_mi_os_free(sub, submap_size, memid);
sub = expect;
mi_assert_internal(sub!=NULL);
}
}
return sub;
mi_assert_internal(sub!=NULL);
*submap = sub;
return true;
}
static void mi_page_map_set_range(mi_page_t* page, size_t idx, size_t sub_idx, size_t slice_count) {
static bool mi_page_map_set_range_prim(mi_page_t* page, size_t idx, size_t sub_idx, size_t slice_count) {
// is the page map area that contains the page address committed?
while (slice_count > 0) {
mi_page_t** sub = mi_page_map_ensure_submap_at(idx);
mi_submap_t sub = NULL;
if (!mi_page_map_ensure_submap_at(idx, &sub)) {
return false;
};
mi_assert_internal(sub!=NULL);
// set the offsets for the page
while (sub_idx < MI_PAGE_MAP_SUB_COUNT) {
while (slice_count > 0 && sub_idx < MI_PAGE_MAP_SUB_COUNT) {
sub[sub_idx] = page;
slice_count--; if (slice_count == 0) return;
slice_count--;
sub_idx++;
}
idx++; // potentially wrap around to the next idx
sub_idx = 0;
}
return true;
}
static bool mi_page_map_set_range(mi_page_t* page, size_t idx, size_t sub_idx, size_t slice_count) {
if mi_unlikely(!mi_page_map_set_range_prim(page,idx,sub_idx,slice_count)) {
// failed to commit, call again to reset the page pointer if needed
if (page!=NULL) {
mi_page_map_set_range_prim(NULL,idx,sub_idx,slice_count);
}
return false;
}
return true;
}
static size_t mi_page_map_get_idx(mi_page_t* page, size_t* sub_idx, size_t* slice_count) {
@ -335,18 +379,18 @@ static size_t mi_page_map_get_idx(mi_page_t* page, size_t* sub_idx, size_t* slic
return _mi_page_map_index(page, sub_idx);
}
void _mi_page_map_register(mi_page_t* page) {
bool _mi_page_map_register(mi_page_t* page) {
mi_assert_internal(page != NULL);
mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN));
mi_assert_internal(_mi_page_map != NULL); // should be initialized before multi-thread access!
if mi_unlikely(_mi_page_map == NULL) {
if (!_mi_page_map_init()) return;
if (!_mi_page_map_init()) return false;
}
mi_assert(_mi_page_map!=NULL);
size_t slice_count;
size_t sub_idx;
const size_t idx = mi_page_map_get_idx(page, &sub_idx, &slice_count);
mi_page_map_set_range(page, idx, sub_idx, slice_count);
return mi_page_map_set_range(page, idx, sub_idx, slice_count);
}
void _mi_page_map_unregister(mi_page_t* page) {

View file

@ -33,6 +33,7 @@ static void strdup_test(); // issue #445
static void heap_thread_free_huge();
static void test_std_string(); // issue #697
static void test_thread_local(); // issue #944
static void test_thread_leak(); // issue #1104
// static void test_mixed0(); // issue #942
static void test_mixed1(); // issue #942
static void test_stl_allocators();
@ -45,11 +46,12 @@ static void test_dep() { };
#endif
int main() {
mi_stats_reset(); // ignore earlier allocations
//mi_stats_reset(); // ignore earlier allocations
//various_tests();
//test_mixed1();
test_dep();
// test_dep();
test_thread_leak();
//test_std_string();
//test_thread_local();
@ -68,7 +70,7 @@ int main() {
test_mt_shutdown();
*/
//fail_aslr();
mi_stats_print(NULL);
//mi_stats_print(NULL);
return 0;
}
@ -378,6 +380,32 @@ static void heap_thread_free_huge() {
}
}
static std::atomic<long> gsum;
static void local_alloc() {
long sum = 0;
for(int i = 0; i < 1000000; i++) {
const int n = 1 + std::rand() % 1000;
uint8_t* p = (uint8_t*)calloc(n, 1);
p[0] = 1;
sum += p[std::rand() % n];
if ((std::rand() % 100) > 24) {
free(p);
}
}
gsum += sum;
}
static void test_thread_leak() {
std::vector<std::thread> threads;
for (int i=1; i<=100; ++i) {
threads.emplace_back(std::thread(&local_alloc));
}
for (auto& th : threads) {
th.join();
}
}
static void test_mt_shutdown()
{