mirror of
https://github.com/microsoft/mimalloc.git
synced 2025-08-23 15:54:47 +03:00
Compare commits
7 commits
dc84ea7866
...
c1f17cd253
Author | SHA1 | Date | |
---|---|---|---|
![]() |
c1f17cd253 | ||
![]() |
1b5ee4e2d0 | ||
![]() |
aaf8da9aba | ||
![]() |
99976d6c2b | ||
![]() |
2ce6568af6 | ||
![]() |
d8321f6d66 | ||
![]() |
878627072b |
7 changed files with 179 additions and 137 deletions
|
@ -13,6 +13,9 @@ terms of the MIT license. A copy of the license can be found in the file
|
|||
#ifndef MI_BITS_H
|
||||
#define MI_BITS_H
|
||||
|
||||
#include <stddef.h> // size_t
|
||||
#include <stdint.h> // int64_t etc
|
||||
#include <stdbool.h> // bool
|
||||
|
||||
// ------------------------------------------------------
|
||||
// Size of a pointer.
|
||||
|
@ -90,7 +93,7 @@ typedef int32_t mi_ssize_t;
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#if (MI_ARCH_X86 || MI_ARCH_X64)
|
||||
#if MI_ARCH_X64 && defined(__AVX2__)
|
||||
#include <immintrin.h>
|
||||
#elif MI_ARCH_ARM64 && MI_OPT_SIMD
|
||||
#include <arm_neon.h>
|
||||
|
@ -120,37 +123,20 @@ typedef int32_t mi_ssize_t;
|
|||
#define MI_MAX_VABITS (32)
|
||||
#endif
|
||||
|
||||
|
||||
// use a flat page-map (or a 2-level one)
|
||||
#ifndef MI_PAGE_MAP_FLAT
|
||||
#if MI_MAX_VABITS <= 40 && !MI_SECURE && !defined(__APPLE__)
|
||||
#if MI_MAX_VABITS <= 40 && !defined(__APPLE__)
|
||||
#define MI_PAGE_MAP_FLAT 1
|
||||
#else
|
||||
#define MI_PAGE_MAP_FLAT 0
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if MI_PAGE_MAP_FLAT && MI_SECURE
|
||||
#error should not use MI_PAGE_MAP_FLAT with a secure build
|
||||
#endif
|
||||
|
||||
|
||||
/* --------------------------------------------------------------------------------
|
||||
Builtin's
|
||||
-------------------------------------------------------------------------------- */
|
||||
|
||||
#if defined(__GNUC__) || defined(__clang__)
|
||||
#define mi_unlikely(x) (__builtin_expect(!!(x),false))
|
||||
#define mi_likely(x) (__builtin_expect(!!(x),true))
|
||||
#elif (defined(__cplusplus) && (__cplusplus >= 202002L)) || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L)
|
||||
#define mi_unlikely(x) (x) [[unlikely]]
|
||||
#define mi_likely(x) (x) [[likely]]
|
||||
#else
|
||||
#define mi_unlikely(x) (x)
|
||||
#define mi_likely(x) (x)
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef __has_builtin
|
||||
#define __has_builtin(x) 0
|
||||
#endif
|
||||
|
@ -188,28 +174,17 @@ typedef int32_t mi_ssize_t;
|
|||
-------------------------------------------------------------------------------- */
|
||||
|
||||
size_t _mi_popcount_generic(size_t x);
|
||||
extern bool _mi_cpu_has_popcnt;
|
||||
|
||||
static inline size_t mi_popcount(size_t x) {
|
||||
#if defined(__GNUC__) && (MI_ARCH_X64 || MI_ARCH_X86)
|
||||
#if !defined(__BMI1__)
|
||||
if mi_unlikely(!_mi_cpu_has_popcnt) { return _mi_popcount_generic(x); }
|
||||
#endif
|
||||
size_t r;
|
||||
__asm ("popcnt\t%1,%0" : "=r"(r) : "r"(x) : "cc");
|
||||
return r;
|
||||
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86)
|
||||
#if !defined(__BMI1__)
|
||||
if mi_unlikely(!_mi_cpu_has_popcnt) { return _mi_popcount_generic(x); }
|
||||
#endif
|
||||
return (size_t)mi_msc_builtinz(__popcnt)(x);
|
||||
#elif defined(_MSC_VER) && MI_ARCH_ARM64
|
||||
return (size_t)mi_msc_builtinz(__popcnt)(x);
|
||||
#elif mi_has_builtinz(popcount)
|
||||
#if mi_has_builtinz(popcount)
|
||||
return mi_builtinz(popcount)(x);
|
||||
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
|
||||
return mi_msc_builtinz(__popcnt)(x);
|
||||
#elif MI_ARCH_X64 && defined(__BMI1__)
|
||||
return (size_t)_mm_popcnt_u64(x);
|
||||
#else
|
||||
#define MI_HAS_FAST_POPCOUNT 0
|
||||
return _mi_popcount_generic(x);
|
||||
return (x<=1 ? x : _mi_popcount_generic(x));
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -223,29 +198,19 @@ size_t _mi_clz_generic(size_t x);
|
|||
size_t _mi_ctz_generic(size_t x);
|
||||
|
||||
static inline size_t mi_ctz(size_t x) {
|
||||
#if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__)
|
||||
#if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__) // on x64 tzcnt is defined for 0
|
||||
size_t r;
|
||||
__asm ("tzcnt\t%1, %0" : "=r"(r) : "r"(x) : "cc");
|
||||
return r;
|
||||
#elif defined(__GNUC__) && MI_ARCH_X64
|
||||
// tzcnt is interpreted as bsf if BMI1 is not supported (pre-haswell)
|
||||
// if the argument is zero:
|
||||
// - tzcnt: sets carry-flag, and returns MI_SIZE_BITS
|
||||
// - bsf : sets zero-flag, and leaves the destination _unmodified_ (on both AMD and Intel now, see <https://github.com/llvm/llvm-project/pull/102885>)
|
||||
// so we always initialize r to MI_SIZE_BITS to work correctly on all cpu's without branching
|
||||
size_t r = MI_SIZE_BITS;
|
||||
__asm ("tzcnt\t%1, %0" : "+r"(r) : "r"(x) : "cc"); // use '+r' to keep the assignment to r in case this becomes bsf on older cpu's
|
||||
return r;
|
||||
#elif defined(_MSC_VER) && MI_ARCH_X64 && defined(__BMI1__)
|
||||
return _tzcnt_u64(x);
|
||||
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
|
||||
unsigned long idx;
|
||||
return (mi_msc_builtinz(_BitScanForward)(&idx, x) ? (size_t)idx : MI_SIZE_BITS);
|
||||
#elif mi_has_builtinz(ctz)
|
||||
return (x!=0 ? (size_t)mi_builtinz(ctz)(x) : MI_SIZE_BITS);
|
||||
#elif defined(_MSC_VER) && MI_ARCH_X64 && defined(__BMI1__)
|
||||
return (x!=0 ? _tzcnt_u64(x) : MI_SIZE_BITS); // ensure it still works on non-BMI1 cpu's as well
|
||||
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
|
||||
if (x==0) return MI_SIZE_BITS; // test explicitly for `x==0` to avoid codegen bug (issue #1071)
|
||||
unsigned long idx; mi_msc_builtinz(_BitScanForward)(&idx, x);
|
||||
return (size_t)idx;
|
||||
#elif defined(__GNUC__) && MI_ARCH_X86
|
||||
size_t r = MI_SIZE_BITS;
|
||||
#elif defined(__GNUC__) && (MI_ARCH_X64 || MI_ARCH_X86)
|
||||
size_t r = MI_SIZE_BITS; // bsf leaves destination unmodified if the argument is 0 (see <https://github.com/llvm/llvm-project/pull/102885>)
|
||||
__asm ("bsf\t%1, %0" : "+r"(r) : "r"(x) : "cc");
|
||||
return r;
|
||||
#elif MI_HAS_FAST_POPCOUNT
|
||||
|
@ -257,18 +222,17 @@ static inline size_t mi_ctz(size_t x) {
|
|||
}
|
||||
|
||||
static inline size_t mi_clz(size_t x) {
|
||||
// we don't optimize anymore to lzcnt as there are still non BMI1 cpu's around (like Intel Celeron, see issue #1016)
|
||||
// on pre-haswell cpu's lzcnt gets executed as bsr which is not equivalent (at it returns the bit position)
|
||||
#if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__) // on x64 lzcnt is defined for 0
|
||||
size_t r;
|
||||
__asm ("lzcnt\t%1, %0" : "=r"(r) : "r"(x) : "cc");
|
||||
return r;
|
||||
#elif defined(_MSC_VER) && MI_ARCH_X64 && defined(__BMI1__)
|
||||
return _lzcnt_u64(x);
|
||||
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
|
||||
unsigned long idx;
|
||||
return (mi_msc_builtinz(_BitScanReverse)(&idx, x) ? MI_SIZE_BITS - 1 - (size_t)idx : MI_SIZE_BITS);
|
||||
#elif mi_has_builtinz(clz)
|
||||
return (x!=0 ? (size_t)mi_builtinz(clz)(x) : MI_SIZE_BITS);
|
||||
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
|
||||
if (x==0) return MI_SIZE_BITS; // test explicitly for `x==0` to avoid codegen bug (issue #1071)
|
||||
unsigned long idx; mi_msc_builtinz(_BitScanReverse)(&idx, x);
|
||||
return (MI_SIZE_BITS - 1 - (size_t)idx);
|
||||
#elif defined(__GNUC__) && (MI_ARCH_X64 || MI_ARCH_X86)
|
||||
if (x==0) return MI_SIZE_BITS;
|
||||
size_t r;
|
||||
|
@ -297,11 +261,9 @@ static inline bool mi_bsf(size_t x, size_t* idx) {
|
|||
bool is_zero;
|
||||
__asm ( "tzcnt\t%2, %1" : "=@ccc"(is_zero), "=r"(*idx) : "r"(x) : "cc" );
|
||||
return !is_zero;
|
||||
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
|
||||
if (x==0) return false; // test explicitly for `x==0` to avoid codegen bug (issue #1071)
|
||||
unsigned long i; mi_msc_builtinz(_BitScanForward)(&i, x);
|
||||
*idx = (size_t)i;
|
||||
return true;
|
||||
#elif 0 && defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
|
||||
unsigned long i;
|
||||
return (mi_msc_builtinz(_BitScanForward)(&i, x) ? (*idx = (size_t)i, true) : false);
|
||||
#else
|
||||
return (x!=0 ? (*idx = mi_ctz(x), true) : false);
|
||||
#endif
|
||||
|
@ -311,11 +273,14 @@ static inline bool mi_bsf(size_t x, size_t* idx) {
|
|||
// return false if `x==0` (with `*idx` undefined) and true otherwise,
|
||||
// with the `idx` is set to the bit index (`0 <= *idx < MI_BFIELD_BITS`).
|
||||
static inline bool mi_bsr(size_t x, size_t* idx) {
|
||||
#if defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
|
||||
if (x==0) return false; // test explicitly for `x==0` to avoid codegen bug (issue #1071)
|
||||
unsigned long i; mi_msc_builtinz(_BitScanReverse)(&i, x);
|
||||
*idx = (size_t)i;
|
||||
return true;
|
||||
#if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__) && (!defined(__clang_major__) || __clang_major__ >= 9)
|
||||
// on x64 the carry flag is set on zero which gives better codegen
|
||||
bool is_zero;
|
||||
__asm ("lzcnt\t%2, %1" : "=@ccc"(is_zero), "=r"(*idx) : "r"(x) : "cc");
|
||||
return !is_zero;
|
||||
#elif 0 && defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
|
||||
unsigned long i;
|
||||
return (mi_msc_builtinz(_BitScanReverse)(&i, x) ? (*idx = (size_t)i, true) : false);
|
||||
#else
|
||||
return (x!=0 ? (*idx = MI_SIZE_BITS - 1 - mi_clz(x), true) : false);
|
||||
#endif
|
||||
|
|
|
@ -223,7 +223,7 @@ void* _mi_arenas_alloc_aligned(mi_subproc_t* subproc, size_t size, size_
|
|||
void _mi_arenas_free(void* p, size_t size, mi_memid_t memid);
|
||||
bool _mi_arenas_contain(const void* p);
|
||||
void _mi_arenas_collect(bool force_purge, bool visit_all, mi_tld_t* tld);
|
||||
void _mi_arenas_unsafe_destroy_all(mi_tld_t* tld);
|
||||
void _mi_arenas_unsafe_destroy_all(mi_subproc_t* subproc);
|
||||
|
||||
mi_page_t* _mi_arenas_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment);
|
||||
void _mi_arenas_page_free(mi_page_t* page, mi_tld_t* tld);
|
||||
|
@ -238,7 +238,7 @@ bool _mi_meta_is_meta_page(void* p);
|
|||
|
||||
// "page-map.c"
|
||||
bool _mi_page_map_init(void);
|
||||
void _mi_page_map_register(mi_page_t* page);
|
||||
mi_decl_nodiscard bool _mi_page_map_register(mi_page_t* page);
|
||||
void _mi_page_map_unregister(mi_page_t* page);
|
||||
void _mi_page_map_unregister_range(void* start, size_t size);
|
||||
mi_page_t* _mi_safe_ptr_page(const void* p);
|
||||
|
@ -604,7 +604,8 @@ static inline mi_page_t* _mi_unchecked_ptr_page(const void* p) {
|
|||
#define MI_PAGE_MAP_SHIFT (MI_MAX_VABITS - MI_PAGE_MAP_SUB_SHIFT - MI_ARENA_SLICE_SHIFT)
|
||||
#define MI_PAGE_MAP_COUNT (MI_ZU(1) << MI_PAGE_MAP_SHIFT)
|
||||
|
||||
extern mi_decl_hidden _Atomic(mi_page_t**)* _mi_page_map;
|
||||
typedef mi_page_t** mi_submap_t;
|
||||
extern mi_decl_hidden _Atomic(mi_submap_t)* _mi_page_map;
|
||||
|
||||
static inline size_t _mi_page_map_index(const void* p, size_t* sub_idx) {
|
||||
const size_t u = (size_t)((uintptr_t)p / MI_ARENA_SLICE_SIZE);
|
||||
|
@ -612,7 +613,7 @@ static inline size_t _mi_page_map_index(const void* p, size_t* sub_idx) {
|
|||
return (u / MI_PAGE_MAP_SUB_COUNT);
|
||||
}
|
||||
|
||||
static inline mi_page_t** _mi_page_map_at(size_t idx) {
|
||||
static inline mi_submap_t _mi_page_map_at(size_t idx) {
|
||||
return mi_atomic_load_ptr_relaxed(mi_page_t*, &_mi_page_map[idx]);
|
||||
}
|
||||
|
||||
|
@ -625,7 +626,7 @@ static inline mi_page_t* _mi_unchecked_ptr_page(const void* p) {
|
|||
static inline mi_page_t* _mi_checked_ptr_page(const void* p) {
|
||||
size_t sub_idx;
|
||||
const size_t idx = _mi_page_map_index(p, &sub_idx);
|
||||
mi_page_t** const sub = _mi_page_map_at(idx);
|
||||
mi_submap_t const sub = _mi_page_map_at(idx);
|
||||
if mi_unlikely(sub == NULL) return NULL;
|
||||
return sub[sub_idx];
|
||||
}
|
||||
|
|
|
@ -22,6 +22,7 @@ terms of the MIT license. A copy of the license can be found in the file
|
|||
#include <mimalloc-stats.h>
|
||||
#include <stddef.h> // ptrdiff_t
|
||||
#include <stdint.h> // uintptr_t, uint16_t, etc
|
||||
#include <stdbool.h> // bool
|
||||
#include <limits.h> // SIZE_MAX etc.
|
||||
#include <errno.h> // error codes
|
||||
#include "bits.h" // size defines (MI_INTPTR_SIZE etc), bit operations
|
||||
|
|
41
src/arena.c
41
src/arena.c
|
@ -684,7 +684,7 @@ static mi_page_t* mi_arenas_page_alloc_fresh(size_t slice_count, size_t block_si
|
|||
commit_size = _mi_align_up(block_start + block_size, MI_PAGE_MIN_COMMIT_SIZE);
|
||||
if (commit_size > page_noguard_size) { commit_size = page_noguard_size; }
|
||||
bool is_zero;
|
||||
if (!mi_arena_commit( mi_memid_arena(memid), page, commit_size, &is_zero, 0)) {
|
||||
if mi_unlikely(!mi_arena_commit( mi_memid_arena(memid), page, commit_size, &is_zero, 0)) {
|
||||
_mi_arenas_free(page, alloc_size, memid);
|
||||
return NULL;
|
||||
}
|
||||
|
@ -710,7 +710,10 @@ static mi_page_t* mi_arenas_page_alloc_fresh(size_t slice_count, size_t block_si
|
|||
mi_page_try_claim_ownership(page);
|
||||
|
||||
// register in the page map
|
||||
_mi_page_map_register(page);
|
||||
if mi_unlikely(!_mi_page_map_register(page)) {
|
||||
_mi_arenas_free( page, alloc_size, memid );
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// stats
|
||||
mi_tld_stat_increase(tld, pages, 1);
|
||||
|
@ -990,7 +993,7 @@ void _mi_arenas_page_unabandon(mi_page_t* page) {
|
|||
Arena free
|
||||
----------------------------------------------------------- */
|
||||
static void mi_arena_schedule_purge(mi_arena_t* arena, size_t slice_index, size_t slices);
|
||||
static void mi_arenas_try_purge(bool force, bool visit_all, mi_tld_t* tld);
|
||||
static void mi_arenas_try_purge(bool force, bool visit_all, mi_subproc_t* subproc, size_t tseq);
|
||||
|
||||
void _mi_arenas_free(void* p, size_t size, mi_memid_t memid) {
|
||||
if (p==NULL) return;
|
||||
|
@ -1051,7 +1054,7 @@ void _mi_arenas_free(void* p, size_t size, mi_memid_t memid) {
|
|||
|
||||
// Purge the arenas; if `force_purge` is true, amenable parts are purged even if not yet expired
|
||||
void _mi_arenas_collect(bool force_purge, bool visit_all, mi_tld_t* tld) {
|
||||
mi_arenas_try_purge(force_purge, visit_all, tld);
|
||||
mi_arenas_try_purge(force_purge, visit_all, tld->subproc, tld->thread_seq);
|
||||
}
|
||||
|
||||
|
||||
|
@ -1085,9 +1088,8 @@ bool _mi_arenas_contain(const void* p) {
|
|||
// for dynamic libraries that are unloaded and need to release all their allocated memory.
|
||||
static void mi_arenas_unsafe_destroy(mi_subproc_t* subproc) {
|
||||
mi_assert_internal(subproc != NULL);
|
||||
const size_t max_arena = mi_arenas_get_count(subproc);
|
||||
size_t new_max_arena = 0;
|
||||
for (size_t i = 0; i < max_arena; i++) {
|
||||
const size_t arena_count = mi_arenas_get_count(subproc);
|
||||
for (size_t i = 0; i < arena_count; i++) {
|
||||
mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &subproc->arenas[i]);
|
||||
if (arena != NULL) {
|
||||
// mi_lock_done(&arena->abandoned_visit_lock);
|
||||
|
@ -1097,18 +1099,17 @@ static void mi_arenas_unsafe_destroy(mi_subproc_t* subproc) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
// try to lower the max arena.
|
||||
size_t expected = max_arena;
|
||||
mi_atomic_cas_strong_acq_rel(&subproc->arena_count, &expected, new_max_arena);
|
||||
size_t expected = arena_count;
|
||||
mi_atomic_cas_strong_acq_rel(&subproc->arena_count, &expected, 0);
|
||||
}
|
||||
|
||||
|
||||
// destroy owned arenas; this is unsafe and should only be done using `mi_option_destroy_on_exit`
|
||||
// for dynamic libraries that are unloaded and need to release all their allocated memory.
|
||||
void _mi_arenas_unsafe_destroy_all(mi_tld_t* tld) {
|
||||
mi_arenas_unsafe_destroy(tld->subproc);
|
||||
_mi_arenas_collect(true /* force purge */, true /* visit all*/, tld); // purge non-owned arenas
|
||||
void _mi_arenas_unsafe_destroy_all(mi_subproc_t* subproc) {
|
||||
mi_arenas_unsafe_destroy(subproc);
|
||||
// mi_arenas_try_purge(true /* force purge */, true /* visit all*/, subproc, 0 /* thread seq */); // purge non-owned arenas
|
||||
}
|
||||
|
||||
|
||||
|
@ -1772,14 +1773,13 @@ static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force)
|
|||
}
|
||||
|
||||
|
||||
static void mi_arenas_try_purge(bool force, bool visit_all, mi_tld_t* tld)
|
||||
static void mi_arenas_try_purge(bool force, bool visit_all, mi_subproc_t* subproc, size_t tseq)
|
||||
{
|
||||
// try purge can be called often so try to only run when needed
|
||||
const long delay = mi_arena_purge_delay();
|
||||
if (_mi_preloading() || delay <= 0) return; // nothing will be scheduled
|
||||
|
||||
// check if any arena needs purging?
|
||||
mi_subproc_t* subproc = tld->subproc;
|
||||
const mi_msecs_t now = _mi_clock_now();
|
||||
const mi_msecs_t arenas_expire = mi_atomic_loadi64_acquire(&subproc->purge_expire);
|
||||
if (!visit_all && !force && (arenas_expire == 0 || arenas_expire > now)) return;
|
||||
|
@ -1793,7 +1793,7 @@ static void mi_arenas_try_purge(bool force, bool visit_all, mi_tld_t* tld)
|
|||
{
|
||||
// increase global expire: at most one purge per delay cycle
|
||||
if (arenas_expire > now) { mi_atomic_storei64_release(&subproc->purge_expire, now + (delay/10)); }
|
||||
const size_t arena_start = tld->thread_seq % max_arena;
|
||||
const size_t arena_start = tseq % max_arena;
|
||||
size_t max_purge_count = (visit_all ? max_arena : (max_arena/4)+1);
|
||||
bool all_visited = true;
|
||||
bool any_purged = false;
|
||||
|
@ -1894,12 +1894,12 @@ static bool mi_arena_page_register(size_t slice_index, size_t slice_count, mi_ar
|
|||
mi_assert_internal(slice_count == 1);
|
||||
mi_page_t* page = (mi_page_t*)mi_arena_slice_start(arena, slice_index);
|
||||
mi_assert_internal(mi_bitmap_is_setN(page->memid.mem.arena.arena->pages, page->memid.mem.arena.slice_index, 1));
|
||||
_mi_page_map_register(page);
|
||||
if (!_mi_page_map_register(page)) return false; // break
|
||||
mi_assert_internal(_mi_ptr_page(page)==page);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool mi_arena_pages_reregister(mi_arena_t* arena) {
|
||||
mi_decl_nodiscard static bool mi_arena_pages_reregister(mi_arena_t* arena) {
|
||||
return _mi_bitmap_forall_set(arena->pages, &mi_arena_page_register, arena, NULL);
|
||||
}
|
||||
|
||||
|
@ -1979,7 +1979,10 @@ mi_decl_export bool mi_arena_reload(void* start, size_t size, mi_commit_fun_t* c
|
|||
if (!mi_arenas_add(arena->subproc, arena, arena_id)) {
|
||||
return false;
|
||||
}
|
||||
mi_arena_pages_reregister(arena);
|
||||
if (!mi_arena_pages_reregister(arena)) {
|
||||
// todo: clear arena entry in the subproc?
|
||||
return false;
|
||||
}
|
||||
|
||||
// adjust abandoned page count
|
||||
for (size_t bin = 0; bin < MI_BIN_COUNT; bin++) {
|
||||
|
|
|
@ -711,7 +711,7 @@ void mi_process_init(void) mi_attr_noexcept {
|
|||
mi_detect_cpu_features();
|
||||
_mi_stats_init();
|
||||
_mi_os_init();
|
||||
_mi_page_map_init();
|
||||
_mi_page_map_init(); // this could fail.. should we abort in that case?
|
||||
mi_heap_main_init();
|
||||
mi_tld_main_init();
|
||||
// the following two can potentially allocate (on freeBSD for locks and thread keys)
|
||||
|
@ -778,7 +778,7 @@ void mi_cdecl mi_process_done(void) mi_attr_noexcept {
|
|||
if (mi_option_is_enabled(mi_option_destroy_on_exit)) {
|
||||
mi_heap_collect(heap, true /* force */);
|
||||
_mi_heap_unsafe_destroy_all(heap); // forcefully release all memory held by all heaps (of this thread only!)
|
||||
_mi_arenas_unsafe_destroy_all(heap->tld);
|
||||
_mi_arenas_unsafe_destroy_all(_mi_subproc_main());
|
||||
_mi_page_map_unsafe_destroy(_mi_subproc_main());
|
||||
}
|
||||
//_mi_page_map_unsafe_destroy(_mi_subproc_main());
|
||||
|
|
108
src/page-map.c
108
src/page-map.c
|
@ -10,7 +10,7 @@ terms of the MIT license. A copy of the license can be found in the file
|
|||
#include "bitmap.h"
|
||||
|
||||
static void mi_page_map_cannot_commit(void) {
|
||||
_mi_error_message(EFAULT,"unable to commit memory for the page address map\n");
|
||||
_mi_warning_message("unable to commit the allocation page-map on-demand\n" );
|
||||
}
|
||||
|
||||
#if MI_PAGE_MAP_FLAT
|
||||
|
@ -32,7 +32,7 @@ static mi_memid_t mi_page_map_memid;
|
|||
#define MI_PAGE_MAP_ENTRIES_PER_COMMIT_BIT MI_ARENA_SLICE_SIZE
|
||||
static mi_bitmap_t* mi_page_map_commit; // one bit per committed 64 KiB entries
|
||||
|
||||
static void mi_page_map_ensure_committed(size_t idx, size_t slice_count);
|
||||
mi_decl_nodiscard static bool mi_page_map_ensure_committed(size_t idx, size_t slice_count);
|
||||
|
||||
bool _mi_page_map_init(void) {
|
||||
size_t vbits = (size_t)mi_option_get_clamp(mi_option_max_vabits, 0, MI_SIZE_BITS);
|
||||
|
@ -71,7 +71,10 @@ bool _mi_page_map_init(void) {
|
|||
|
||||
// commit the first part so NULL pointers get resolved without an access violation
|
||||
if (!commit) {
|
||||
mi_page_map_ensure_committed(0, 1);
|
||||
if (!mi_page_map_ensure_committed(0, 1)) {
|
||||
mi_page_map_cannot_commit();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
_mi_page_map[0] = 1; // so _mi_ptr_page(NULL) == NULL
|
||||
mi_assert_internal(_mi_ptr_page(NULL)==NULL);
|
||||
|
@ -90,7 +93,7 @@ void _mi_page_map_unsafe_destroy(mi_subproc_t* subproc) {
|
|||
}
|
||||
|
||||
|
||||
static void mi_page_map_ensure_committed(size_t idx, size_t slice_count) {
|
||||
static bool mi_page_map_ensure_committed(size_t idx, size_t slice_count) {
|
||||
// is the page map area that contains the page address committed?
|
||||
// we always set the commit bits so we can track what ranges are in-use.
|
||||
// we only actually commit if the map wasn't committed fully already.
|
||||
|
@ -103,7 +106,10 @@ static void mi_page_map_ensure_committed(size_t idx, size_t slice_count) {
|
|||
bool is_zero;
|
||||
uint8_t* const start = _mi_page_map + (i * MI_PAGE_MAP_ENTRIES_PER_COMMIT_BIT);
|
||||
const size_t size = MI_PAGE_MAP_ENTRIES_PER_COMMIT_BIT;
|
||||
if (!_mi_os_commit(start, size, &is_zero)) return;
|
||||
if (!_mi_os_commit(start, size, &is_zero)) {
|
||||
mi_page_map_cannot_commit();
|
||||
return false;
|
||||
}
|
||||
if (!is_zero && !mi_page_map_memid.initially_zero) { _mi_memzero(start, size); }
|
||||
mi_bitmap_set(mi_page_map_commit, i);
|
||||
}
|
||||
|
@ -113,6 +119,7 @@ static void mi_page_map_ensure_committed(size_t idx, size_t slice_count) {
|
|||
_mi_page_map[idx] = 0;
|
||||
_mi_page_map[idx+slice_count-1] = 0;
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
@ -124,25 +131,28 @@ static size_t mi_page_map_get_idx(mi_page_t* page, uint8_t** page_start, size_t*
|
|||
return _mi_page_map_index(page);
|
||||
}
|
||||
|
||||
void _mi_page_map_register(mi_page_t* page) {
|
||||
bool _mi_page_map_register(mi_page_t* page) {
|
||||
mi_assert_internal(page != NULL);
|
||||
mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN));
|
||||
mi_assert_internal(_mi_page_map != NULL); // should be initialized before multi-thread access!
|
||||
if mi_unlikely(_mi_page_map == NULL) {
|
||||
if (!_mi_page_map_init()) return;
|
||||
if (!_mi_page_map_init()) return false;
|
||||
}
|
||||
mi_assert(_mi_page_map!=NULL);
|
||||
uint8_t* page_start;
|
||||
size_t slice_count;
|
||||
const size_t idx = mi_page_map_get_idx(page, &page_start, &slice_count);
|
||||
|
||||
mi_page_map_ensure_committed(idx, slice_count);
|
||||
if (!mi_page_map_ensure_committed(idx, slice_count)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// set the offsets
|
||||
for (size_t i = 0; i < slice_count; i++) {
|
||||
mi_assert_internal(i < 128);
|
||||
_mi_page_map[idx + i] = (uint8_t)(i+1);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void _mi_page_map_unregister(mi_page_t* page) {
|
||||
|
@ -158,7 +168,10 @@ void _mi_page_map_unregister(mi_page_t* page) {
|
|||
void _mi_page_map_unregister_range(void* start, size_t size) {
|
||||
const size_t slice_count = _mi_divide_up(size, MI_ARENA_SLICE_SIZE);
|
||||
const uintptr_t index = _mi_page_map_index(start);
|
||||
mi_page_map_ensure_committed(index, slice_count); // we commit the range in total; todo: scan the commit bits and clear only those ranges?
|
||||
// todo: scan the commit bits and clear only those ranges?
|
||||
if (!mi_page_map_ensure_committed(index, slice_count)) { // we commit the range in total;
|
||||
return;
|
||||
}
|
||||
_mi_memzero(&_mi_page_map[index], slice_count);
|
||||
}
|
||||
|
||||
|
@ -180,8 +193,9 @@ mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_att
|
|||
|
||||
// A 2-level page map
|
||||
#define MI_PAGE_MAP_SUB_SIZE (MI_PAGE_MAP_SUB_COUNT * sizeof(mi_page_t*))
|
||||
#define MI_PAGE_MAP_ENTRIES_PER_CBIT (MI_PAGE_MAP_COUNT / MI_BFIELD_BITS)
|
||||
|
||||
mi_decl_cache_align _Atomic(mi_page_t**)* _mi_page_map;
|
||||
mi_decl_cache_align _Atomic(mi_submap_t)* _mi_page_map;
|
||||
static size_t mi_page_map_count;
|
||||
static void* mi_page_map_max_address;
|
||||
static mi_memid_t mi_page_map_memid;
|
||||
|
@ -189,7 +203,9 @@ static mi_memid_t mi_page_map_memid;
|
|||
// divide the main map in 64 (`MI_BFIELD_BITS`) parts commit those parts on demand
|
||||
static _Atomic(mi_bfield_t) mi_page_map_commit;
|
||||
|
||||
#define MI_PAGE_MAP_ENTRIES_PER_CBIT (MI_PAGE_MAP_COUNT / MI_BFIELD_BITS)
|
||||
mi_decl_nodiscard static bool mi_page_map_ensure_committed(size_t idx, mi_submap_t* submap);
|
||||
mi_decl_nodiscard static bool mi_page_map_ensure_submap_at(size_t idx, mi_submap_t* submap);
|
||||
static bool mi_page_map_set_range(mi_page_t* page, size_t idx, size_t sub_idx, size_t slice_count);
|
||||
|
||||
static inline bool mi_page_map_is_committed(size_t idx, size_t* pbit_idx) {
|
||||
mi_bfield_t commit = mi_atomic_load_relaxed(&mi_page_map_commit);
|
||||
|
@ -199,16 +215,19 @@ static inline bool mi_page_map_is_committed(size_t idx, size_t* pbit_idx) {
|
|||
return ((commit & (MI_ZU(1) << bit_idx)) != 0);
|
||||
}
|
||||
|
||||
static mi_page_t** mi_page_map_ensure_committed(size_t idx) {
|
||||
static bool mi_page_map_ensure_committed(size_t idx, mi_submap_t* submap) {
|
||||
mi_assert_internal(submap!=NULL && *submap==NULL);
|
||||
size_t bit_idx;
|
||||
if mi_unlikely(!mi_page_map_is_committed(idx, &bit_idx)) {
|
||||
uint8_t* start = (uint8_t*)&_mi_page_map[bit_idx * MI_PAGE_MAP_ENTRIES_PER_CBIT];
|
||||
if (!_mi_os_commit(start, MI_PAGE_MAP_ENTRIES_PER_CBIT * sizeof(mi_page_t**), NULL)) {
|
||||
return NULL;
|
||||
if (!_mi_os_commit(start, MI_PAGE_MAP_ENTRIES_PER_CBIT * sizeof(mi_submap_t), NULL)) {
|
||||
mi_page_map_cannot_commit();
|
||||
return false;
|
||||
}
|
||||
mi_atomic_or_acq_rel(&mi_page_map_commit, MI_ZU(1) << bit_idx);
|
||||
}
|
||||
return mi_atomic_load_ptr_acquire(mi_page_t*, &_mi_page_map[idx]); // _mi_page_map_at(idx);
|
||||
*submap = mi_atomic_load_ptr_acquire(mi_page_t*, &_mi_page_map[idx]); // acquire _mi_page_map_at(idx);
|
||||
return true;
|
||||
}
|
||||
|
||||
// initialize the page map
|
||||
|
@ -258,7 +277,11 @@ bool _mi_page_map_init(void) {
|
|||
if (!mi_page_map_memid.initially_zero) { // initialize low addresses with NULL
|
||||
_mi_memzero_aligned(sub0, submap_size);
|
||||
}
|
||||
mi_page_map_ensure_committed(0);
|
||||
mi_submap_t nullsub = NULL;
|
||||
if (!mi_page_map_ensure_committed(0,&nullsub)) {
|
||||
mi_page_map_cannot_commit();
|
||||
return false;
|
||||
}
|
||||
mi_atomic_store_ptr_release(mi_page_t*, &_mi_page_map[0], sub0);
|
||||
|
||||
mi_assert_internal(_mi_ptr_page(NULL)==NULL);
|
||||
|
@ -273,7 +296,7 @@ void _mi_page_map_unsafe_destroy(mi_subproc_t* subproc) {
|
|||
for (size_t idx = 1; idx < mi_page_map_count; idx++) { // skip entry 0 (as we allocate that submap at the end of the page_map)
|
||||
// free all sub-maps
|
||||
if (mi_page_map_is_committed(idx, NULL)) {
|
||||
mi_page_t** sub = _mi_page_map_at(idx);
|
||||
mi_submap_t sub = _mi_page_map_at(idx);
|
||||
if (sub != NULL) {
|
||||
mi_memid_t memid = _mi_memid_create_os(sub, MI_PAGE_MAP_SUB_SIZE, true, false, false);
|
||||
_mi_os_free_ex(memid.mem.os.base, memid.mem.os.size, true, memid, subproc);
|
||||
|
@ -290,41 +313,62 @@ void _mi_page_map_unsafe_destroy(mi_subproc_t* subproc) {
|
|||
}
|
||||
|
||||
|
||||
static mi_page_t** mi_page_map_ensure_submap_at(size_t idx) {
|
||||
mi_page_t** sub = mi_page_map_ensure_committed(idx);
|
||||
static bool mi_page_map_ensure_submap_at(size_t idx, mi_submap_t* submap) {
|
||||
mi_assert_internal(submap!=NULL && *submap==NULL);
|
||||
mi_submap_t sub = NULL;
|
||||
if (!mi_page_map_ensure_committed(idx, &sub)) {
|
||||
return false;
|
||||
}
|
||||
if mi_unlikely(sub == NULL) {
|
||||
// sub map not yet allocated, alloc now
|
||||
mi_memid_t memid;
|
||||
mi_page_t** expect = sub;
|
||||
const size_t submap_size = MI_PAGE_MAP_SUB_SIZE;
|
||||
sub = (mi_page_t**)_mi_os_zalloc(submap_size, &memid);
|
||||
sub = (mi_submap_t)_mi_os_zalloc(submap_size, &memid);
|
||||
if (sub==NULL) {
|
||||
_mi_error_message(EFAULT, "internal error: unable to extend the page map\n");
|
||||
return NULL;
|
||||
_mi_warning_message("internal error: unable to extend the page map\n");
|
||||
return false;
|
||||
}
|
||||
if (!mi_atomic_cas_ptr_strong_acq_rel(mi_page_t*, &_mi_page_map[idx], &expect, sub)) {
|
||||
// another thread already allocated it.. free and continue
|
||||
_mi_os_free(sub, submap_size, memid);
|
||||
sub = expect;
|
||||
}
|
||||
}
|
||||
mi_assert_internal(sub!=NULL);
|
||||
}
|
||||
}
|
||||
return sub;
|
||||
*submap = sub;
|
||||
return true;
|
||||
}
|
||||
|
||||
static void mi_page_map_set_range(mi_page_t* page, size_t idx, size_t sub_idx, size_t slice_count) {
|
||||
static bool mi_page_map_set_range_prim(mi_page_t* page, size_t idx, size_t sub_idx, size_t slice_count) {
|
||||
// is the page map area that contains the page address committed?
|
||||
while (slice_count > 0) {
|
||||
mi_page_t** sub = mi_page_map_ensure_submap_at(idx);
|
||||
mi_submap_t sub = NULL;
|
||||
if (!mi_page_map_ensure_submap_at(idx, &sub)) {
|
||||
return false;
|
||||
};
|
||||
mi_assert_internal(sub!=NULL);
|
||||
// set the offsets for the page
|
||||
while (sub_idx < MI_PAGE_MAP_SUB_COUNT) {
|
||||
while (slice_count > 0 && sub_idx < MI_PAGE_MAP_SUB_COUNT) {
|
||||
sub[sub_idx] = page;
|
||||
slice_count--; if (slice_count == 0) return;
|
||||
slice_count--;
|
||||
sub_idx++;
|
||||
}
|
||||
idx++; // potentially wrap around to the next idx
|
||||
sub_idx = 0;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool mi_page_map_set_range(mi_page_t* page, size_t idx, size_t sub_idx, size_t slice_count) {
|
||||
if mi_unlikely(!mi_page_map_set_range_prim(page,idx,sub_idx,slice_count)) {
|
||||
// failed to commit, call again to reset the page pointer if needed
|
||||
if (page!=NULL) {
|
||||
mi_page_map_set_range_prim(NULL,idx,sub_idx,slice_count);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static size_t mi_page_map_get_idx(mi_page_t* page, size_t* sub_idx, size_t* slice_count) {
|
||||
|
@ -335,18 +379,18 @@ static size_t mi_page_map_get_idx(mi_page_t* page, size_t* sub_idx, size_t* slic
|
|||
return _mi_page_map_index(page, sub_idx);
|
||||
}
|
||||
|
||||
void _mi_page_map_register(mi_page_t* page) {
|
||||
bool _mi_page_map_register(mi_page_t* page) {
|
||||
mi_assert_internal(page != NULL);
|
||||
mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN));
|
||||
mi_assert_internal(_mi_page_map != NULL); // should be initialized before multi-thread access!
|
||||
if mi_unlikely(_mi_page_map == NULL) {
|
||||
if (!_mi_page_map_init()) return;
|
||||
if (!_mi_page_map_init()) return false;
|
||||
}
|
||||
mi_assert(_mi_page_map!=NULL);
|
||||
size_t slice_count;
|
||||
size_t sub_idx;
|
||||
const size_t idx = mi_page_map_get_idx(page, &sub_idx, &slice_count);
|
||||
mi_page_map_set_range(page, idx, sub_idx, slice_count);
|
||||
return mi_page_map_set_range(page, idx, sub_idx, slice_count);
|
||||
}
|
||||
|
||||
void _mi_page_map_unregister(mi_page_t* page) {
|
||||
|
|
|
@ -33,6 +33,7 @@ static void strdup_test(); // issue #445
|
|||
static void heap_thread_free_huge();
|
||||
static void test_std_string(); // issue #697
|
||||
static void test_thread_local(); // issue #944
|
||||
static void test_thread_leak(); // issue #1104
|
||||
// static void test_mixed0(); // issue #942
|
||||
static void test_mixed1(); // issue #942
|
||||
static void test_stl_allocators();
|
||||
|
@ -45,11 +46,12 @@ static void test_dep() { };
|
|||
#endif
|
||||
|
||||
int main() {
|
||||
mi_stats_reset(); // ignore earlier allocations
|
||||
//mi_stats_reset(); // ignore earlier allocations
|
||||
//various_tests();
|
||||
//test_mixed1();
|
||||
|
||||
test_dep();
|
||||
// test_dep();
|
||||
test_thread_leak();
|
||||
|
||||
//test_std_string();
|
||||
//test_thread_local();
|
||||
|
@ -68,7 +70,7 @@ int main() {
|
|||
test_mt_shutdown();
|
||||
*/
|
||||
//fail_aslr();
|
||||
mi_stats_print(NULL);
|
||||
//mi_stats_print(NULL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -378,6 +380,32 @@ static void heap_thread_free_huge() {
|
|||
}
|
||||
}
|
||||
|
||||
static std::atomic<long> gsum;
|
||||
|
||||
static void local_alloc() {
|
||||
long sum = 0;
|
||||
for(int i = 0; i < 1000000; i++) {
|
||||
const int n = 1 + std::rand() % 1000;
|
||||
uint8_t* p = (uint8_t*)calloc(n, 1);
|
||||
p[0] = 1;
|
||||
sum += p[std::rand() % n];
|
||||
if ((std::rand() % 100) > 24) {
|
||||
free(p);
|
||||
}
|
||||
}
|
||||
gsum += sum;
|
||||
}
|
||||
|
||||
static void test_thread_leak() {
|
||||
std::vector<std::thread> threads;
|
||||
for (int i=1; i<=100; ++i) {
|
||||
threads.emplace_back(std::thread(&local_alloc));
|
||||
}
|
||||
for (auto& th : threads) {
|
||||
th.join();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void test_mt_shutdown()
|
||||
{
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue