From 1e2221f5126fa3686cff9fd656842cf35059b4e6 Mon Sep 17 00:00:00 2001 From: daanx Date: Sat, 21 Dec 2024 19:28:53 -0800 Subject: [PATCH 1/8] fix signed/unsigned; fix heap_destroy assert failure --- src/heap.c | 3 ++- src/page-map.c | 13 +++++++++++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/heap.c b/src/heap.c index 412c6465..a1b06c6b 100644 --- a/src/heap.c +++ b/src/heap.c @@ -340,6 +340,7 @@ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_ // mi_page_free(page,false); page->next = NULL; page->prev = NULL; + mi_page_set_heap(page, NULL); _mi_arena_page_free(page); return true; // keep going @@ -507,7 +508,7 @@ bool mi_heap_reload(mi_heap_t* heap, mi_arena_id_t arena_id) { // reinit direct pages (as we may be in a different process) mi_assert_internal(heap->page_count == 0); - for (int i = 0; i < MI_PAGES_DIRECT; i++) { + for (size_t i = 0; i < MI_PAGES_DIRECT; i++) { heap->pages_free_direct[i] = (mi_page_t*)&_mi_page_empty; } diff --git a/src/page-map.c b/src/page-map.c index 7b74c711..d6517f72 100644 --- a/src/page-map.c +++ b/src/page-map.c @@ -9,6 +9,14 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc/internal.h" #include "bitmap.h" +// The page-map contains a byte for each 64kb slice in the address space. +// For an address `a` where `n = _mi_page_map[a >> 16]`: +// 0 = unused +// 1 = the slice at `a & ~0xFFFF` is a mimalloc page. +// 1 < n << 127 = the slice is part of a page, starting at `(((a>>16) - n - 1) << 16)`. +// +// 1 byte per slice => 1 GiB page map = 2^30 slices of 2^16 = 2^46 = 64 TiB address space. +// 4 GiB virtual for 256 TiB address space (48 bit) (and 64 KiB for 4 GiB address space (on 32-bit)). mi_decl_cache_align uint8_t* _mi_page_map = NULL; static bool mi_page_map_all_committed = false; static size_t mi_page_map_entries_per_commit_bit = MI_ARENA_SLICE_SIZE; @@ -24,10 +32,11 @@ bool _mi_page_map_init(void) { size_t vbits = (size_t)mi_option_get_clamp(mi_option_max_vabits, 0, MI_SIZE_BITS); if (vbits == 0) { vbits = _mi_os_virtual_address_bits(); + #if MI_ARCH_X64 if (vbits >= 48) { vbits = 47; } + #endif } - // 1 byte per block = 2 GiB for 128 TiB address space (48 bit = 256 TiB address space) - // 64 KiB for 4 GiB address space (on 32-bit) + mi_page_map_max_address = (void*)(MI_PU(1) << vbits); const size_t page_map_size = (MI_ZU(1) << (vbits - MI_ARENA_SLICE_SHIFT)); From 56cbddfc7e39ec0a4ea7585641bf333495b83604 Mon Sep 17 00:00:00 2001 From: daanx Date: Sat, 21 Dec 2024 23:08:52 -0800 Subject: [PATCH 2/8] initial work on a two-level page-map --- include/mimalloc/bits.h | 8 ++ include/mimalloc/internal.h | 64 +++++++++++++--- src/page-map.c | 143 +++++++++++++++++++++++++++++++++++- test/test-stress.c | 4 +- 4 files changed, 206 insertions(+), 13 deletions(-) diff --git a/include/mimalloc/bits.h b/include/mimalloc/bits.h index 32b9d528..fb6c2e8c 100644 --- a/include/mimalloc/bits.h +++ b/include/mimalloc/bits.h @@ -68,6 +68,14 @@ typedef int32_t mi_ssize_t; #define MI_MiB (MI_KiB*MI_KiB) #define MI_GiB (MI_MiB*MI_KiB) +#if MI_INTPTR_SIZE > 4 +#define MI_MAX_VABITS (48) +#define MI_PAGE_MAP_FLAT 0 +#else +#define MI_MAX_VABITS (32) +#define MI_PAGE_MAP_FLAT 1 +#endif + /* -------------------------------------------------------------------------------- Architecture diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 208989e3..dbc45133 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -422,6 +422,14 @@ static inline bool mi_heap_is_initialized(mi_heap_t* heap) { return (heap != &_mi_heap_empty); } +static inline mi_page_t* _mi_heap_get_free_small_page(mi_heap_t* heap, size_t size) { + mi_assert_internal(size <= (MI_SMALL_SIZE_MAX + MI_PADDING_SIZE)); + const size_t idx = _mi_wsize_from_size(size); + mi_assert_internal(idx < MI_PAGES_DIRECT); + return heap->pages_free_direct[idx]; +} + + //static inline uintptr_t _mi_ptr_cookie(const void* p) { // extern mi_heap_t _mi_heap_main; // mi_assert_internal(_mi_heap_main.cookie != 0); @@ -433,14 +441,9 @@ static inline bool mi_heap_is_initialized(mi_heap_t* heap) { Pages ----------------------------------------------------------- */ -static inline mi_page_t* _mi_heap_get_free_small_page(mi_heap_t* heap, size_t size) { - mi_assert_internal(size <= (MI_SMALL_SIZE_MAX + MI_PADDING_SIZE)); - const size_t idx = _mi_wsize_from_size(size); - mi_assert_internal(idx < MI_PAGES_DIRECT); - return heap->pages_free_direct[idx]; -} - +#if MI_PAGE_MAP_FLAT +// flat page-map committed on demand extern uint8_t* _mi_page_map; static inline uintptr_t _mi_page_map_index(const void* p) { @@ -465,16 +468,59 @@ static inline mi_page_t* _mi_ptr_page_ex(const void* p, bool* valid) { static inline mi_page_t* _mi_checked_ptr_page(const void* p) { bool valid; - mi_page_t* const page = _mi_ptr_page_ex(p,&valid); + mi_page_t* const page = _mi_ptr_page_ex(p, &valid); return (valid ? page : NULL); } +static inline mi_page_t* _mi_unchecked_ptr_page(const void* p) { + return _mi_ptr_page_ex(p, NULL); +} + +#else + +// 2-level page map + +// one page-map directory = 64 KiB => covers 2^16 * 2^16 = 2^32 = 4 GiB address space +// the page-map needs 48-16-16 = 16 bits => 2^16 map directories = 2^16 * 2^3 = 2^19 = 512 KiB size. +// we commit the page-map directories on-demand. (2^16 * 2^16 = 2^32 ~= 4 GiB needed to cover 256 TeB) + +#define MI_PAGE_MAP_SUB_SHIFT (16) // 64 KiB +#define MI_PAGE_MAP_SUB_SIZE (MI_ZU(1) << MI_PAGE_MAP_SUB_SHIFT) +#define MI_PAGE_MAP_SHIFT (MI_MAX_VABITS - MI_PAGE_MAP_SUB_SHIFT - MI_ARENA_SLICE_SHIFT) +#define MI_PAGE_MAP_COUNT (MI_ZU(1) << MI_PAGE_MAP_SHIFT) + +extern uint8_t** _mi_page_map; + +static inline size_t _mi_page_map_index(const void* p, size_t* sub_idx) { + const uintptr_t u = (uintptr_t)p / MI_ARENA_SLICE_SIZE; + if (sub_idx != NULL) { *sub_idx = (uint32_t)u % MI_PAGE_MAP_SUB_SIZE; } + return (size_t)(u / MI_PAGE_MAP_COUNT); +} + +static inline mi_page_t* _mi_unchecked_ptr_page(const void* p) { + const uintptr_t u = (uintptr_t)p / MI_ARENA_SLICE_SIZE; + const uint8_t* const sub = _mi_page_map[u / MI_PAGE_MAP_COUNT]; + const uint8_t ofs = sub[(uint32_t)u % MI_PAGE_MAP_SUB_SIZE]; + return (mi_page_t*)((u - ofs + 1) * MI_ARENA_SLICE_SIZE); +} + +static inline mi_page_t* _mi_checked_ptr_page(const void* p) { + const uintptr_t u = (uintptr_t)p / MI_ARENA_SLICE_SIZE; + const uint8_t* const sub = _mi_page_map[u / MI_PAGE_MAP_COUNT]; + //if mi_unlikely(sub == NULL) { return NULL; } + const uint8_t ofs = sub[(uint32_t)u % MI_PAGE_MAP_SUB_SIZE]; + //if mi_unlikely(ofs == 0) { return NULL; } + return (mi_page_t*)((u - ofs + 1) * MI_ARENA_SLICE_SIZE); +} + +#endif + static inline mi_page_t* _mi_ptr_page(const void* p) { mi_assert_internal(p==NULL || mi_is_in_heap_region(p)); #if MI_DEBUG || defined(__APPLE__) return _mi_checked_ptr_page(p); #else - return _mi_ptr_page_ex(p,NULL); + return _mi_unchecked_ptr_page(p); #endif } diff --git a/src/page-map.c b/src/page-map.c index d6517f72..a814610f 100644 --- a/src/page-map.c +++ b/src/page-map.c @@ -9,6 +9,8 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc/internal.h" #include "bitmap.h" +#if MI_PAGE_MAP_FLAT + // The page-map contains a byte for each 64kb slice in the address space. // For an address `a` where `n = _mi_page_map[a >> 16]`: // 0 = unused @@ -17,6 +19,9 @@ terms of the MIT license. A copy of the license can be found in the file // // 1 byte per slice => 1 GiB page map = 2^30 slices of 2^16 = 2^46 = 64 TiB address space. // 4 GiB virtual for 256 TiB address space (48 bit) (and 64 KiB for 4 GiB address space (on 32-bit)). + +// 1MiB = 2^20*2^16 = 2^36 = 64GiB address space +// 2^12 pointers = 2^15 k = 32k mi_decl_cache_align uint8_t* _mi_page_map = NULL; static bool mi_page_map_all_committed = false; static size_t mi_page_map_entries_per_commit_bit = MI_ARENA_SLICE_SIZE; @@ -25,7 +30,7 @@ static mi_memid_t mi_page_map_memid; // (note: we need to initialize statically or otherwise C++ may run a default constructors after process initialization) -static mi_bitmap_t mi_page_map_commit = { MI_ATOMIC_VAR_INIT(MI_BITMAP_DEFAULT_CHUNK_COUNT), MI_ATOMIC_VAR_INIT(0), +sstatic mi_bitmap_t mi_page_map_commit = { MI_ATOMIC_VAR_INIT(MI_BITMAP_DEFAULT_CHUNK_COUNT), MI_ATOMIC_VAR_INIT(0), { 0 }, { {MI_ATOMIC_VAR_INIT(0)} }, {{{ MI_ATOMIC_VAR_INIT(0) }}} }; bool _mi_page_map_init(void) { @@ -101,7 +106,7 @@ static size_t mi_page_map_get_idx(mi_page_t* page, uint8_t** page_start, size_t* void _mi_page_map_register(mi_page_t* page) { mi_assert_internal(page != NULL); - mi_assert_internal(_mi_is_aligned(page,MI_PAGE_ALIGN)); + mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN)); mi_assert_internal(_mi_page_map != NULL); // should be initialized before multi-thread access! if mi_unlikely(_mi_page_map == NULL) { if (!_mi_page_map_init()) return; @@ -151,3 +156,137 @@ mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_att return false; } } + +#else + +mi_decl_cache_align uint8_t** _mi_page_map = NULL; + +static void* mi_page_map_max_address = NULL; +static mi_memid_t mi_page_map_memid; + +bool _mi_page_map_init(void) { + size_t vbits = (size_t)mi_option_get_clamp(mi_option_max_vabits, 0, MI_SIZE_BITS); + if (vbits == 0) { + vbits = _mi_os_virtual_address_bits(); + mi_assert_internal(vbits <= MI_MAX_VABITS); + } + + mi_page_map_max_address = (void*)(MI_PU(1) << vbits); + const size_t os_page_size = _mi_os_page_size(); + const size_t page_map_size = _mi_align_up(MI_ZU(1) << (vbits - MI_PAGE_MAP_SUB_SHIFT - MI_ARENA_SLICE_SHIFT + MI_INTPTR_SHIFT), os_page_size); + const size_t reserve_size = page_map_size + (2 * MI_PAGE_MAP_SUB_SIZE); + _mi_page_map = (uint8_t**)_mi_os_alloc_aligned(reserve_size, 1, true /* commit */, true, &mi_page_map_memid); + if (_mi_page_map==NULL) { + _mi_error_message(ENOMEM, "unable to reserve virtual memory for the page map (%zu KiB)\n", reserve_size / MI_KiB); + return false; + } + if (mi_page_map_memid.initially_committed && !mi_page_map_memid.initially_zero) { + _mi_warning_message("the page map was committed but not zero initialized!\n"); + _mi_memzero_aligned(_mi_page_map, reserve_size); + } + + uint8_t* sub0 = (uint8_t*)_mi_page_map + page_map_size; + uint8_t* sub1 = sub0 + MI_PAGE_MAP_SUB_SIZE; + // initialize the first part so NULL pointers get resolved without an access violation + _mi_page_map[0] = sub0; + sub0[0] = 1; // so _mi_ptr_page(NULL) == NULL + // and initialize the 4GiB range where we were allocated + _mi_page_map[_mi_page_map_index(_mi_page_map,NULL)] = sub1; + + mi_assert_internal(_mi_ptr_page(NULL)==NULL); + return true; +} + +static size_t mi_page_map_get_idx(mi_page_t* page, uint8_t** page_start, size_t* sub_idx, size_t* slice_count) { + size_t page_size; + *page_start = mi_page_area(page, &page_size); + if (page_size > MI_LARGE_PAGE_SIZE) { page_size = MI_LARGE_PAGE_SIZE - MI_ARENA_SLICE_SIZE; } // furthest interior pointer + *slice_count = mi_slice_count_of_size(page_size) + (((uint8_t*)*page_start - (uint8_t*)page)/MI_ARENA_SLICE_SIZE); // add for large aligned blocks + return _mi_page_map_index(page,sub_idx); +} + + +static inline void mi_page_map_set_range(size_t idx, size_t sub_idx, size_t slice_count, uint8_t (*set)(uint8_t ofs)) { + // is the page map area that contains the page address committed? + uint8_t ofs = 1; + while (slice_count > 0) { + uint8_t* sub = _mi_page_map[idx]; + if (sub == NULL) { + mi_memid_t memid; + sub = (uint8_t*)_mi_os_alloc(MI_PAGE_MAP_SUB_SIZE, &memid); + if (sub == NULL) { + _mi_error_message(EFAULT, "internal error: unable to extend the page map\n"); + return; // abort? + } + } + // set the offsets for the page + while (sub_idx < MI_PAGE_MAP_SUB_SIZE && slice_count > 0) { + sub[sub_idx] = set(ofs); + sub_idx++; + ofs++; + slice_count--; + } + sub_idx = 0; // potentially wrap around to the next idx + } +} + +static uint8_t set_ofs(uint8_t ofs) { + return ofs; +} + +void _mi_page_map_register(mi_page_t* page) { + mi_assert_internal(page != NULL); + mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN)); + mi_assert_internal(_mi_page_map != NULL); // should be initialized before multi-thread access! + if mi_unlikely(_mi_page_map == NULL) { + if (!_mi_page_map_init()) return; + } + mi_assert(_mi_page_map!=NULL); + uint8_t* page_start; + size_t slice_count; + size_t sub_idx; + const size_t idx = mi_page_map_get_idx(page, &page_start, &sub_idx, &slice_count); + mi_page_map_set_range(idx, sub_idx, slice_count, &set_ofs); +} + +static uint8_t set_zero(uint8_t ofs) { + MI_UNUSED(ofs); + return 0; +} + + +void _mi_page_map_unregister(mi_page_t* page) { + mi_assert_internal(_mi_page_map != NULL); + // get index and count + uint8_t* page_start; + size_t slice_count; + size_t sub_idx; + const size_t idx = mi_page_map_get_idx(page, &page_start, &sub_idx, &slice_count); + // unset the offsets + mi_page_map_set_range(idx, sub_idx, slice_count, &set_zero); +} + +void _mi_page_map_unregister_range(void* start, size_t size) { + const size_t slice_count = _mi_divide_up(size, MI_ARENA_SLICE_SIZE); + size_t sub_idx; + const size_t idx = _mi_page_map_index(start, &sub_idx); + mi_page_map_set_range(idx, sub_idx, slice_count, &set_zero); +} + +mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { + + if mi_unlikely(p >= mi_page_map_max_address) return false; + size_t sub_idx; + const size_t idx = _mi_page_map_index(p, &sub_idx); + uint8_t* sub = _mi_page_map[idx]; + if (sub != NULL) { + return (sub[sub_idx] != 0); + } + else { + return false; + } +} + + +#endif + diff --git a/test/test-stress.c b/test/test-stress.c index 0920a02e..bbcded65 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -302,8 +302,8 @@ int main(int argc, char** argv) { mi_option_enable(mi_option_visit_abandoned); #endif #if !defined(NDEBUG) && !defined(USE_STD_MALLOC) - // mi_option_set(mi_option_arena_reserve, 32 * 1024 /* in kib = 32MiB */); - mi_option_set(mi_option_purge_delay,10); + mi_option_set(mi_option_arena_reserve, 32 * 1024 /* in kib = 32MiB */); + //mi_option_set(mi_option_purge_delay,10); #endif #ifndef USE_STD_MALLOC mi_stats_reset(); From c9b2d31665b9102114569ccf78be1328c2843fe7 Mon Sep 17 00:00:00 2001 From: daanx Date: Sat, 21 Dec 2024 23:17:11 -0800 Subject: [PATCH 3/8] fix page_map initialization --- src/page-map.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/page-map.c b/src/page-map.c index a814610f..403be079 100644 --- a/src/page-map.c +++ b/src/page-map.c @@ -214,6 +214,12 @@ static inline void mi_page_map_set_range(size_t idx, size_t sub_idx, size_t slic if (sub == NULL) { mi_memid_t memid; sub = (uint8_t*)_mi_os_alloc(MI_PAGE_MAP_SUB_SIZE, &memid); + uint8_t* expect = NULL; + if (!mi_atomic_cas_strong_acq_rel(((_Atomic(uint8_t*)*)&_mi_page_map[idx]), &expect, sub)) { + _mi_os_free(sub, MI_PAGE_MAP_SUB_SIZE, memid); + sub = expect; + mi_assert_internal(sub!=NULL); + } if (sub == NULL) { _mi_error_message(EFAULT, "internal error: unable to extend the page map\n"); return; // abort? From 93fa8d895ad7366285782cf1f1259fe427c4d631 Mon Sep 17 00:00:00 2001 From: daanx Date: Sun, 22 Dec 2024 12:18:53 -0800 Subject: [PATCH 4/8] revert back to flat address map --- include/mimalloc/bits.h | 8 -- include/mimalloc/internal.h | 65 ++-------- src/free.c | 8 +- src/page-map.c | 248 ++++++++---------------------------- 4 files changed, 66 insertions(+), 263 deletions(-) diff --git a/include/mimalloc/bits.h b/include/mimalloc/bits.h index fb6c2e8c..32b9d528 100644 --- a/include/mimalloc/bits.h +++ b/include/mimalloc/bits.h @@ -68,14 +68,6 @@ typedef int32_t mi_ssize_t; #define MI_MiB (MI_KiB*MI_KiB) #define MI_GiB (MI_MiB*MI_KiB) -#if MI_INTPTR_SIZE > 4 -#define MI_MAX_VABITS (48) -#define MI_PAGE_MAP_FLAT 0 -#else -#define MI_MAX_VABITS (32) -#define MI_PAGE_MAP_FLAT 1 -#endif - /* -------------------------------------------------------------------------------- Architecture diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index dbc45133..17c02941 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -169,6 +169,7 @@ bool _mi_page_map_init(void); void _mi_page_map_register(mi_page_t* page); void _mi_page_map_unregister(mi_page_t* page); void _mi_page_map_unregister_range(void* start, size_t size); +mi_page_t* _mi_safe_ptr_page(const void* p); // "page.c" void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept mi_attr_malloc; @@ -441,29 +442,18 @@ static inline mi_page_t* _mi_heap_get_free_small_page(mi_heap_t* heap, size_t si Pages ----------------------------------------------------------- */ -#if MI_PAGE_MAP_FLAT - // flat page-map committed on demand extern uint8_t* _mi_page_map; -static inline uintptr_t _mi_page_map_index(const void* p) { - return (((uintptr_t)p) >> MI_ARENA_SLICE_SHIFT); +static inline size_t _mi_page_map_index(const void* p) { + return (size_t)((uintptr_t)p >> MI_ARENA_SLICE_SHIFT); } static inline mi_page_t* _mi_ptr_page_ex(const void* p, bool* valid) { - #if 1 - const uintptr_t idx = _mi_page_map_index(p); + const size_t idx = _mi_page_map_index(p); const size_t ofs = _mi_page_map[idx]; - if (valid != NULL) *valid = (ofs != 0); - return (mi_page_t*)((idx - ofs + 1) << MI_ARENA_SLICE_SHIFT); - #else - const uintptr_t idx = _mi_page_map_index(p); - const uintptr_t up = idx << MI_ARENA_SLICE_SHIFT; - __builtin_prefetch((void*)up); - const size_t ofs = _mi_page_map[idx]; - if (valid != NULL) *valid = (ofs != 0); - return (mi_page_t*)(up - ((ofs - 1) << MI_ARENA_SLICE_SHIFT)); - #endif + if (valid != NULL) { *valid = (ofs != 0); } + return (mi_page_t*)((((uintptr_t)p >> MI_ARENA_SLICE_SHIFT) + 1 - ofs) << MI_ARENA_SLICE_SHIFT); } static inline mi_page_t* _mi_checked_ptr_page(const void* p) { @@ -476,49 +466,10 @@ static inline mi_page_t* _mi_unchecked_ptr_page(const void* p) { return _mi_ptr_page_ex(p, NULL); } -#else - -// 2-level page map - -// one page-map directory = 64 KiB => covers 2^16 * 2^16 = 2^32 = 4 GiB address space -// the page-map needs 48-16-16 = 16 bits => 2^16 map directories = 2^16 * 2^3 = 2^19 = 512 KiB size. -// we commit the page-map directories on-demand. (2^16 * 2^16 = 2^32 ~= 4 GiB needed to cover 256 TeB) - -#define MI_PAGE_MAP_SUB_SHIFT (16) // 64 KiB -#define MI_PAGE_MAP_SUB_SIZE (MI_ZU(1) << MI_PAGE_MAP_SUB_SHIFT) -#define MI_PAGE_MAP_SHIFT (MI_MAX_VABITS - MI_PAGE_MAP_SUB_SHIFT - MI_ARENA_SLICE_SHIFT) -#define MI_PAGE_MAP_COUNT (MI_ZU(1) << MI_PAGE_MAP_SHIFT) - -extern uint8_t** _mi_page_map; - -static inline size_t _mi_page_map_index(const void* p, size_t* sub_idx) { - const uintptr_t u = (uintptr_t)p / MI_ARENA_SLICE_SIZE; - if (sub_idx != NULL) { *sub_idx = (uint32_t)u % MI_PAGE_MAP_SUB_SIZE; } - return (size_t)(u / MI_PAGE_MAP_COUNT); -} - -static inline mi_page_t* _mi_unchecked_ptr_page(const void* p) { - const uintptr_t u = (uintptr_t)p / MI_ARENA_SLICE_SIZE; - const uint8_t* const sub = _mi_page_map[u / MI_PAGE_MAP_COUNT]; - const uint8_t ofs = sub[(uint32_t)u % MI_PAGE_MAP_SUB_SIZE]; - return (mi_page_t*)((u - ofs + 1) * MI_ARENA_SLICE_SIZE); -} - -static inline mi_page_t* _mi_checked_ptr_page(const void* p) { - const uintptr_t u = (uintptr_t)p / MI_ARENA_SLICE_SIZE; - const uint8_t* const sub = _mi_page_map[u / MI_PAGE_MAP_COUNT]; - //if mi_unlikely(sub == NULL) { return NULL; } - const uint8_t ofs = sub[(uint32_t)u % MI_PAGE_MAP_SUB_SIZE]; - //if mi_unlikely(ofs == 0) { return NULL; } - return (mi_page_t*)((u - ofs + 1) * MI_ARENA_SLICE_SIZE); -} - -#endif - static inline mi_page_t* _mi_ptr_page(const void* p) { mi_assert_internal(p==NULL || mi_is_in_heap_region(p)); #if MI_DEBUG || defined(__APPLE__) - return _mi_checked_ptr_page(p); + return _mi_checked_ptr_page(p); #else return _mi_unchecked_ptr_page(p); #endif @@ -637,7 +588,7 @@ static inline bool mi_page_immediate_available(const mi_page_t* page) { return (page->free != NULL); } - + // is the page not yet used up to its reserved space? static inline bool mi_page_is_expandable(const mi_page_t* page) { mi_assert_internal(page != NULL); diff --git a/src/free.c b/src/free.c index 88f784c7..d08123a2 100644 --- a/src/free.c +++ b/src/free.c @@ -145,14 +145,14 @@ static inline mi_page_t* mi_checked_ptr_page(const void* p, const char* msg) _mi_error_message(EINVAL, "%s: invalid (unaligned) pointer: %p\n", msg, p); return NULL; } - #endif - mi_page_t* const page = _mi_ptr_page(p); - #if MI_DEBUG + mi_page_t* const page = _mi_safe_ptr_page(p); if (page == NULL && p != NULL) { _mi_error_message(EINVAL, "%s: invalid pointer: %p\n", msg, p); } - #endif return page; + #else + return _mi_ptr_page(p); + #endif } // Free a block diff --git a/src/page-map.c b/src/page-map.c index 403be079..a4001359 100644 --- a/src/page-map.c +++ b/src/page-map.c @@ -9,60 +9,61 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc/internal.h" #include "bitmap.h" -#if MI_PAGE_MAP_FLAT -// The page-map contains a byte for each 64kb slice in the address space. -// For an address `a` where `n = _mi_page_map[a >> 16]`: +// The page-map contains a byte for each 64kb slice in the address space. +// For an address `a` where `ofs = _mi_page_map[a >> 16]`: // 0 = unused // 1 = the slice at `a & ~0xFFFF` is a mimalloc page. -// 1 < n << 127 = the slice is part of a page, starting at `(((a>>16) - n - 1) << 16)`. -// -// 1 byte per slice => 1 GiB page map = 2^30 slices of 2^16 = 2^46 = 64 TiB address space. -// 4 GiB virtual for 256 TiB address space (48 bit) (and 64 KiB for 4 GiB address space (on 32-bit)). +// 1 < ofs <= 127 = the slice is part of a page, starting at `(((a>>16) - ofs - 1) << 16)`. +// +// 1 byte per slice => 1 TiB address space needs a 2^14 * 2^16 = 16 MiB page map. +// A full 256 TiB address space (48 bit) needs a 4 GiB page map. +// A full 4 GiB address space (32 bit) needs only a 64 KiB page map. -// 1MiB = 2^20*2^16 = 2^36 = 64GiB address space -// 2^12 pointers = 2^15 k = 32k mi_decl_cache_align uint8_t* _mi_page_map = NULL; -static bool mi_page_map_all_committed = false; -static size_t mi_page_map_entries_per_commit_bit = MI_ARENA_SLICE_SIZE; -static void* mi_page_map_max_address = NULL; -static mi_memid_t mi_page_map_memid; +static void* mi_page_map_max_address = NULL; +static mi_memid_t mi_page_map_memid; +#define MI_PAGE_MAP_ENTRIES_PER_COMMIT_BIT MI_ARENA_SLICE_SIZE +static mi_bitmap_t* mi_page_map_commit; // one bit per committed 64 KiB entries -// (note: we need to initialize statically or otherwise C++ may run a default constructors after process initialization) -sstatic mi_bitmap_t mi_page_map_commit = { MI_ATOMIC_VAR_INIT(MI_BITMAP_DEFAULT_CHUNK_COUNT), MI_ATOMIC_VAR_INIT(0), - { 0 }, { {MI_ATOMIC_VAR_INIT(0)} }, {{{ MI_ATOMIC_VAR_INIT(0) }}} }; +static void mi_page_map_ensure_committed(size_t idx, size_t slice_count); bool _mi_page_map_init(void) { - size_t vbits = (size_t)mi_option_get_clamp(mi_option_max_vabits, 0, MI_SIZE_BITS); + size_t vbits = (size_t)mi_option_get_clamp(mi_option_max_vabits, 0, MI_SIZE_BITS); if (vbits == 0) { vbits = _mi_os_virtual_address_bits(); - #if MI_ARCH_X64 + #if MI_ARCH_X64 // canonical address is limited to the first 128 TiB if (vbits >= 48) { vbits = 47; } #endif } - + + // Allocate the page map and commit bits mi_page_map_max_address = (void*)(MI_PU(1) << vbits); const size_t page_map_size = (MI_ZU(1) << (vbits - MI_ARENA_SLICE_SHIFT)); - - mi_page_map_entries_per_commit_bit = _mi_divide_up(page_map_size, MI_BITMAP_DEFAULT_BIT_COUNT); - // mi_bitmap_init(&mi_page_map_commit, MI_BITMAP_MIN_BIT_COUNT, true); - - mi_page_map_all_committed = (page_map_size <= 1*MI_MiB || mi_option_is_enabled(mi_option_debug_commit_full_pagemap)); // _mi_os_has_overcommit(); // commit on-access on Linux systems? - _mi_page_map = (uint8_t*)_mi_os_alloc_aligned(page_map_size, 1, mi_page_map_all_committed, true, &mi_page_map_memid); - if (_mi_page_map==NULL) { + const bool commit = (page_map_size <= 1*MI_MiB || mi_option_is_enabled(mi_option_debug_commit_full_pagemap)); // _mi_os_has_overcommit(); // commit on-access on Linux systems? + const size_t commit_bits = _mi_divide_up(page_map_size, MI_PAGE_MAP_ENTRIES_PER_COMMIT_BIT); + const size_t bitmap_size = (commit ? 0 : mi_bitmap_size(commit_bits, NULL)); + const size_t reserve_size = bitmap_size + page_map_size; + uint8_t* const base = (uint8_t*)_mi_os_alloc_aligned(reserve_size, 1, commit, true /* allow large */, &mi_page_map_memid); + if (base==NULL) { _mi_error_message(ENOMEM, "unable to reserve virtual memory for the page map (%zu KiB)\n", page_map_size / MI_KiB); return false; } if (mi_page_map_memid.initially_committed && !mi_page_map_memid.initially_zero) { - _mi_warning_message("the page map was committed but not zero initialized!\n"); - _mi_memzero_aligned(_mi_page_map, page_map_size); + _mi_warning_message("internal: the page map was committed but not zero initialized!\n"); + _mi_memzero_aligned(base, reserve_size); } + if (bitmap_size > 0) { + mi_page_map_commit = (mi_bitmap_t*)base; + _mi_os_commit(mi_page_map_commit, bitmap_size, NULL); + mi_bitmap_init(mi_page_map_commit, commit_bits, true); + } + _mi_page_map = base + bitmap_size; + // commit the first part so NULL pointers get resolved without an access violation - if (!mi_page_map_all_committed) { - bool is_zero; - _mi_os_commit(_mi_page_map, _mi_os_page_size(), &is_zero); - if (!is_zero && !mi_page_map_memid.initially_zero) { _mi_memzero(_mi_page_map, _mi_os_page_size()); } + if (!commit) { + mi_page_map_ensure_committed(0, 1); } _mi_page_map[0] = 1; // so _mi_ptr_page(NULL) == NULL mi_assert_internal(_mi_ptr_page(NULL)==NULL); @@ -70,30 +71,31 @@ bool _mi_page_map_init(void) { } static void mi_page_map_ensure_committed(size_t idx, size_t slice_count) { - // is the page map area that contains the page address committed? + // is the page map area that contains the page address committed? // we always set the commit bits so we can track what ranges are in-use. // we only actually commit if the map wasn't committed fully already. - const size_t commit_bit_idx_lo = idx / mi_page_map_entries_per_commit_bit; - const size_t commit_bit_idx_hi = (idx + slice_count - 1) / mi_page_map_entries_per_commit_bit; - for (size_t i = commit_bit_idx_lo; i <= commit_bit_idx_hi; i++) { // per bit to avoid crossing over bitmap chunks - if (mi_bitmap_is_clearN(&mi_page_map_commit, i, 1)) { - // this may race, in which case we do multiple commits (which is ok) - if (!mi_page_map_all_committed) { + if (mi_page_map_commit != NULL) { + const size_t commit_idx = idx / MI_PAGE_MAP_ENTRIES_PER_COMMIT_BIT; + const size_t commit_idx_hi = (idx + slice_count - 1) / MI_PAGE_MAP_ENTRIES_PER_COMMIT_BIT; + for (size_t i = commit_idx; i <= commit_idx_hi; i++) { // per bit to avoid crossing over bitmap chunks + if (mi_bitmap_is_clear(mi_page_map_commit, i)) { + // this may race, in which case we do multiple commits (which is ok) bool is_zero; - uint8_t* const start = _mi_page_map + (i*mi_page_map_entries_per_commit_bit); - const size_t size = mi_page_map_entries_per_commit_bit; + uint8_t* const start = _mi_page_map + (i * MI_PAGE_MAP_ENTRIES_PER_COMMIT_BIT); + const size_t size = MI_PAGE_MAP_ENTRIES_PER_COMMIT_BIT; _mi_os_commit(start, size, &is_zero); - if (!is_zero && !mi_page_map_memid.initially_zero) { _mi_memzero(start, size); } + if (!is_zero && !mi_page_map_memid.initially_zero) { _mi_memzero(start, size); } + mi_bitmap_set(mi_page_map_commit, i); } - mi_bitmap_set(&mi_page_map_commit, i); } } #if MI_DEBUG > 0 _mi_page_map[idx] = 0; _mi_page_map[idx+slice_count-1] = 0; - #endif + #endif } + static size_t mi_page_map_get_idx(mi_page_t* page, uint8_t** page_start, size_t* slice_count) { size_t page_size; *page_start = mi_page_area(page, &page_size); @@ -102,8 +104,6 @@ static size_t mi_page_map_get_idx(mi_page_t* page, uint8_t** page_start, size_t* return _mi_page_map_index(page); } - - void _mi_page_map_register(mi_page_t* page) { mi_assert_internal(page != NULL); mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN)); @@ -125,7 +125,6 @@ void _mi_page_map_register(mi_page_t* page) { } } - void _mi_page_map_unregister(mi_page_t* page) { mi_assert_internal(_mi_page_map != NULL); // get index and count @@ -143,156 +142,17 @@ void _mi_page_map_unregister_range(void* start, size_t size) { _mi_memzero(&_mi_page_map[index], slice_count); } -mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { - // if mi_unlikely(_mi_page_map==NULL) { // happens on macOS during loading - // _mi_page_map_init(); - // } - if mi_unlikely(p >= mi_page_map_max_address) return false; - uintptr_t idx = ((uintptr_t)p >> MI_ARENA_SLICE_SHIFT); - if (mi_page_map_all_committed || mi_bitmap_is_setN(&mi_page_map_commit, idx/mi_page_map_entries_per_commit_bit, 1)) { - return (_mi_page_map[idx] != 0); - } - else { - return false; - } -} -#else - -mi_decl_cache_align uint8_t** _mi_page_map = NULL; - -static void* mi_page_map_max_address = NULL; -static mi_memid_t mi_page_map_memid; - -bool _mi_page_map_init(void) { - size_t vbits = (size_t)mi_option_get_clamp(mi_option_max_vabits, 0, MI_SIZE_BITS); - if (vbits == 0) { - vbits = _mi_os_virtual_address_bits(); - mi_assert_internal(vbits <= MI_MAX_VABITS); - } - - mi_page_map_max_address = (void*)(MI_PU(1) << vbits); - const size_t os_page_size = _mi_os_page_size(); - const size_t page_map_size = _mi_align_up(MI_ZU(1) << (vbits - MI_PAGE_MAP_SUB_SHIFT - MI_ARENA_SLICE_SHIFT + MI_INTPTR_SHIFT), os_page_size); - const size_t reserve_size = page_map_size + (2 * MI_PAGE_MAP_SUB_SIZE); - _mi_page_map = (uint8_t**)_mi_os_alloc_aligned(reserve_size, 1, true /* commit */, true, &mi_page_map_memid); - if (_mi_page_map==NULL) { - _mi_error_message(ENOMEM, "unable to reserve virtual memory for the page map (%zu KiB)\n", reserve_size / MI_KiB); - return false; - } - if (mi_page_map_memid.initially_committed && !mi_page_map_memid.initially_zero) { - _mi_warning_message("the page map was committed but not zero initialized!\n"); - _mi_memzero_aligned(_mi_page_map, reserve_size); - } - - uint8_t* sub0 = (uint8_t*)_mi_page_map + page_map_size; - uint8_t* sub1 = sub0 + MI_PAGE_MAP_SUB_SIZE; - // initialize the first part so NULL pointers get resolved without an access violation - _mi_page_map[0] = sub0; - sub0[0] = 1; // so _mi_ptr_page(NULL) == NULL - // and initialize the 4GiB range where we were allocated - _mi_page_map[_mi_page_map_index(_mi_page_map,NULL)] = sub1; - - mi_assert_internal(_mi_ptr_page(NULL)==NULL); - return true; -} - -static size_t mi_page_map_get_idx(mi_page_t* page, uint8_t** page_start, size_t* sub_idx, size_t* slice_count) { - size_t page_size; - *page_start = mi_page_area(page, &page_size); - if (page_size > MI_LARGE_PAGE_SIZE) { page_size = MI_LARGE_PAGE_SIZE - MI_ARENA_SLICE_SIZE; } // furthest interior pointer - *slice_count = mi_slice_count_of_size(page_size) + (((uint8_t*)*page_start - (uint8_t*)page)/MI_ARENA_SLICE_SIZE); // add for large aligned blocks - return _mi_page_map_index(page,sub_idx); -} - - -static inline void mi_page_map_set_range(size_t idx, size_t sub_idx, size_t slice_count, uint8_t (*set)(uint8_t ofs)) { - // is the page map area that contains the page address committed? - uint8_t ofs = 1; - while (slice_count > 0) { - uint8_t* sub = _mi_page_map[idx]; - if (sub == NULL) { - mi_memid_t memid; - sub = (uint8_t*)_mi_os_alloc(MI_PAGE_MAP_SUB_SIZE, &memid); - uint8_t* expect = NULL; - if (!mi_atomic_cas_strong_acq_rel(((_Atomic(uint8_t*)*)&_mi_page_map[idx]), &expect, sub)) { - _mi_os_free(sub, MI_PAGE_MAP_SUB_SIZE, memid); - sub = expect; - mi_assert_internal(sub!=NULL); - } - if (sub == NULL) { - _mi_error_message(EFAULT, "internal error: unable to extend the page map\n"); - return; // abort? - } - } - // set the offsets for the page - while (sub_idx < MI_PAGE_MAP_SUB_SIZE && slice_count > 0) { - sub[sub_idx] = set(ofs); - sub_idx++; - ofs++; - slice_count--; - } - sub_idx = 0; // potentially wrap around to the next idx - } -} - -static uint8_t set_ofs(uint8_t ofs) { - return ofs; -} - -void _mi_page_map_register(mi_page_t* page) { - mi_assert_internal(page != NULL); - mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN)); - mi_assert_internal(_mi_page_map != NULL); // should be initialized before multi-thread access! - if mi_unlikely(_mi_page_map == NULL) { - if (!_mi_page_map_init()) return; - } - mi_assert(_mi_page_map!=NULL); - uint8_t* page_start; - size_t slice_count; - size_t sub_idx; - const size_t idx = mi_page_map_get_idx(page, &page_start, &sub_idx, &slice_count); - mi_page_map_set_range(idx, sub_idx, slice_count, &set_ofs); -} - -static uint8_t set_zero(uint8_t ofs) { - MI_UNUSED(ofs); - return 0; -} - - -void _mi_page_map_unregister(mi_page_t* page) { - mi_assert_internal(_mi_page_map != NULL); - // get index and count - uint8_t* page_start; - size_t slice_count; - size_t sub_idx; - const size_t idx = mi_page_map_get_idx(page, &page_start, &sub_idx, &slice_count); - // unset the offsets - mi_page_map_set_range(idx, sub_idx, slice_count, &set_zero); -} - -void _mi_page_map_unregister_range(void* start, size_t size) { - const size_t slice_count = _mi_divide_up(size, MI_ARENA_SLICE_SIZE); - size_t sub_idx; - const size_t idx = _mi_page_map_index(start, &sub_idx); - mi_page_map_set_range(idx, sub_idx, slice_count, &set_zero); +mi_page_t* _mi_safe_ptr_page(const void* p) { + if mi_unlikely(p >= mi_page_map_max_address) return NULL; + const uintptr_t idx = _mi_page_map_index(p); + if mi_unlikely(mi_page_map_commit == NULL || !mi_bitmap_is_set(mi_page_map_commit, idx/MI_PAGE_MAP_ENTRIES_PER_COMMIT_BIT)) return NULL; + const uintptr_t ofs = _mi_page_map[idx]; + if mi_unlikely(ofs == 0) return NULL; + return (mi_page_t*)((((uintptr_t)p >> MI_ARENA_SLICE_SHIFT) - ofs + 1) << MI_ARENA_SLICE_SHIFT); } mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { - - if mi_unlikely(p >= mi_page_map_max_address) return false; - size_t sub_idx; - const size_t idx = _mi_page_map_index(p, &sub_idx); - uint8_t* sub = _mi_page_map[idx]; - if (sub != NULL) { - return (sub[sub_idx] != 0); - } - else { - return false; - } + return (_mi_safe_ptr_page(p) != NULL); } - -#endif - From 8d16303aa6a6d25975f01569b71b7127a0a8d559 Mon Sep 17 00:00:00 2001 From: daanx Date: Sun, 22 Dec 2024 12:21:31 -0800 Subject: [PATCH 5/8] add -mtune=native with opt arch --- CMakeLists.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ebd02b20..07a292e0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -91,7 +91,7 @@ endif() if (CMAKE_GENERATOR MATCHES "^Visual Studio.*$") message(STATUS "Note: when building with Visual Studio the build type is specified when building.") - message(STATUS "For example: 'cmake --build . --config=Release") + message(STATUS "For example: 'cmake --build . --config=Release") endif() if("${CMAKE_BINARY_DIR}" MATCHES ".*(S|s)ecure$") @@ -401,9 +401,9 @@ if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU|Intel") endif() if(MI_OPT_ARCH) if(MI_ARCH STREQUAL "x64") - set(MI_OPT_ARCH_FLAGS "-march=haswell;-mavx2") # fast bit scan (since 2013) + set(MI_OPT_ARCH_FLAGS "-march=haswell;-mavx2;-mtune=native") # fast bit scan (since 2013) elseif(MI_ARCH STREQUAL "arm64") - set(MI_OPT_ARCH_FLAGS "-march=armv8.1-a") # fast atomics (since 2016) + set(MI_OPT_ARCH_FLAGS "-march=armv8.1-a;-mtune=native") # fast atomics (since 2016) endif() endif() endif() @@ -557,7 +557,7 @@ if(MI_BUILD_SHARED) elseif(MI_ARCH STREQUAL "x64") set(MIMALLOC_REDIRECT_SUFFIX "") if(CMAKE_SYSTEM_PROCESSOR STREQUAL "ARM64") - message(STATUS "Note: x64 code emulated on Windows for arm64 should use an arm64ec build of 'mimalloc-override.dll'") + message(STATUS "Note: x64 code emulated on Windows for arm64 should use an arm64ec build of 'mimalloc-override.dll'") message(STATUS " with 'mimalloc-redirect-arm64ec.dll'. See the 'bin\\readme.md' for more information.") endif() elseif(MI_ARCH STREQUAL "x86") @@ -681,7 +681,7 @@ endif() # ----------------------------------------------------------------------------- if (MI_OVERRIDE) if (MI_BUILD_SHARED) - target_compile_definitions(mimalloc PRIVATE MI_MALLOC_OVERRIDE) + target_compile_definitions(mimalloc PRIVATE MI_MALLOC_OVERRIDE) endif() if(NOT WIN32) # It is only possible to override malloc on Windows when building as a DLL. From 3c7d7e1f11eeca0dec9d48119ed22f40e63ae518 Mon Sep 17 00:00:00 2001 From: daanx Date: Sun, 22 Dec 2024 14:07:57 -0800 Subject: [PATCH 6/8] experiment with 2 level pagemap --- include/mimalloc/bits.h | 18 ++++ include/mimalloc/internal.h | 43 +++++++++- src/page-map.c | 162 ++++++++++++++++++++++++++++++++++++ 3 files changed, 222 insertions(+), 1 deletion(-) diff --git a/include/mimalloc/bits.h b/include/mimalloc/bits.h index 32b9d528..ca0b5905 100644 --- a/include/mimalloc/bits.h +++ b/include/mimalloc/bits.h @@ -107,6 +107,24 @@ typedef int32_t mi_ssize_t; // Define big endian if needed // #define MI_BIG_ENDIAN 1 +#if MI_DEFAULT_VIRTUAL_ADDRESS_BITS > 0 +#define MI_MAX_VABITS MI_DEFAULT_VIRTUAL_ADDRESS_BITS +#elif MI_ARCH_X64 +#define MI_MAX_VABITS (47) +#elif MI_INTPTR_SIZE > 4 +#define MI_MAX_VABITS (48) +#else +#define MI_MAX_VABITS (32) +#endif + +#ifndef MI_PAGE_MAP_FLAT +#if MI_MAX_VABITS <= 40 +#define MI_PAGE_MAP_FLAT 1 +#else +#define MI_PAGE_MAP_FLAT 0 +#endif +#endif + /* -------------------------------------------------------------------------------- Builtin's diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 17c02941..8955db5e 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -442,6 +442,8 @@ static inline mi_page_t* _mi_heap_get_free_small_page(mi_heap_t* heap, size_t si Pages ----------------------------------------------------------- */ +#if MI_PAGE_MAP_FLAT + // flat page-map committed on demand extern uint8_t* _mi_page_map; @@ -466,10 +468,49 @@ static inline mi_page_t* _mi_unchecked_ptr_page(const void* p) { return _mi_ptr_page_ex(p, NULL); } +#else + +// 2-level page map + +// one sub page-map = 64 KiB => covers 2^13 * 2^16 = 2^32 = 512 MiB address space +// the page-map needs 48-16-13 = 19 bits => 2^19 sub map pointers = 4 MiB size. +// we commit the page-map and the sub maps on-demand. + +#define MI_PAGE_MAP_SUB_SHIFT (13) +#define MI_PAGE_MAP_SUB_COUNT (MI_ZU(1) << MI_PAGE_MAP_SUB_SHIFT) + +#define MI_PAGE_MAP_SHIFT (MI_MAX_VABITS - MI_PAGE_MAP_SUB_SHIFT - MI_ARENA_SLICE_SHIFT) +#define MI_PAGE_MAP_COUNT (MI_ZU(1) << MI_PAGE_MAP_SHIFT) + +extern mi_page_t*** _mi_page_map; + +static inline size_t _mi_page_map_index(const void* p, size_t* sub_idx) { + const uintptr_t u = (uintptr_t)p / MI_ARENA_SLICE_SIZE; + if (sub_idx != NULL) { *sub_idx = (uint32_t)u % MI_PAGE_MAP_SUB_COUNT; } + return (size_t)(u / MI_PAGE_MAP_SUB_COUNT); +} + +static inline mi_page_t* _mi_unchecked_ptr_page(const void* p) { + size_t sub_idx; + const size_t idx = _mi_page_map_index(p, &sub_idx); + return _mi_page_map[idx][sub_idx]; +} + +static inline mi_page_t* _mi_checked_ptr_page(const void* p) { + size_t sub_idx; + const size_t idx = _mi_page_map_index(p, &sub_idx); + mi_page_t** const sub = _mi_page_map[idx]; + if mi_unlikely(sub == NULL) return NULL; + return sub[sub_idx]; +} + +#endif + + static inline mi_page_t* _mi_ptr_page(const void* p) { mi_assert_internal(p==NULL || mi_is_in_heap_region(p)); #if MI_DEBUG || defined(__APPLE__) - return _mi_checked_ptr_page(p); + return _mi_checked_ptr_page(p); #else return _mi_unchecked_ptr_page(p); #endif diff --git a/src/page-map.c b/src/page-map.c index a4001359..99a9b60a 100644 --- a/src/page-map.c +++ b/src/page-map.c @@ -9,6 +9,7 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc/internal.h" #include "bitmap.h" +#if MI_PAGE_MAP_FLAT // The page-map contains a byte for each 64kb slice in the address space. // For an address `a` where `ofs = _mi_page_map[a >> 16]`: @@ -156,3 +157,164 @@ mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_att return (_mi_safe_ptr_page(p) != NULL); } +#else + +mi_decl_cache_align mi_page_t*** _mi_page_map; +static void* mi_page_map_max_address; +static mi_memid_t mi_page_map_memid; + +static _Atomic(mi_bfield_t) mi_page_map_commit; // one bit per committed 64 KiB entries + +static mi_page_t** mi_page_map_ensure_at(size_t idx); +static inline void mi_page_map_set_range(mi_page_t* page, size_t idx, size_t sub_idx, size_t slice_count); + +bool _mi_page_map_init(void) { + size_t vbits = (size_t)mi_option_get_clamp(mi_option_max_vabits, 0, MI_SIZE_BITS); + if (vbits == 0) { + vbits = _mi_os_virtual_address_bits(); + #if MI_ARCH_X64 // canonical address is limited to the first 128 TiB + if (vbits >= 48) { vbits = 47; } + #endif + } + + // Allocate the page map and commit bits + mi_page_map_max_address = (void*)(MI_PU(1) << vbits); + const size_t page_map_count = (MI_ZU(1) << (vbits - MI_PAGE_MAP_SUB_SHIFT - MI_ARENA_SLICE_SHIFT)); + const size_t os_page_size = _mi_os_page_size(); + const size_t page_map_size = _mi_align_up( page_map_count * sizeof(mi_page_t**), os_page_size); + const size_t reserve_size = page_map_size + os_page_size; + const bool commit = page_map_size <= 64*MI_KiB || mi_option_is_enabled(mi_option_debug_commit_full_pagemap); // _mi_os_has_overcommit(); // commit on-access on Linux systems? + _mi_page_map = (mi_page_t***)_mi_os_alloc_aligned(reserve_size, 1, commit, true /* allow large */, &mi_page_map_memid); + if (_mi_page_map==NULL) { + _mi_error_message(ENOMEM, "unable to reserve virtual memory for the page map (%zu KiB)\n", page_map_size / MI_KiB); + return false; + } + if (mi_page_map_memid.initially_committed && !mi_page_map_memid.initially_zero) { + _mi_warning_message("internal: the page map was committed but not zero initialized!\n"); + _mi_memzero_aligned(_mi_page_map, page_map_size); + } + mi_atomic_store_release(&mi_page_map_commit, (commit ? ~0 : (mi_bfield_t)0)); + + // commit the first part so NULL pointers get resolved without an access violation + mi_page_map_ensure_at(0); + + // note: for the NULL range we only commit one OS page + // mi_page_map_set_range(NULL, 0, 0, 1); + _mi_page_map[0] = (mi_page_t**)((uint8_t*)_mi_page_map + page_map_size); + if (!mi_page_map_memid.initially_committed) { + _mi_os_commit(_mi_page_map[0], os_page_size, NULL); + } + _mi_page_map[0][0] = NULL; + + mi_assert_internal(_mi_ptr_page(NULL)==NULL); + return true; +} + +static inline bool mi_page_map_is_committed(size_t idx, size_t* pbit_idx) { + mi_bfield_t commit = mi_atomic_load_relaxed(&mi_page_map_commit); + const size_t bit_idx = (idx*MI_INTPTR_SIZE)/MI_ARENA_SLICE_SIZE; // we commit a slice of entries at a time + mi_assert_internal(bit_idx < MI_BFIELD_BITS); + if (pbit_idx != NULL) { *pbit_idx = bit_idx; } + return ((commit & (MI_ZU(1) << bit_idx)) != 0); +} + +static mi_page_t** mi_page_map_ensure_committed(size_t idx) { + size_t bit_idx; + if mi_unlikely(!mi_page_map_is_committed(idx, &bit_idx)) { + uint8_t* start = (uint8_t*)_mi_page_map + (bit_idx * MI_ARENA_SLICE_SIZE); + _mi_os_commit(start, MI_ARENA_SLICE_SIZE, NULL); + mi_atomic_or_acq_rel(&mi_page_map_commit, MI_ZU(1) << bit_idx); + } + return _mi_page_map[idx]; +} + +static mi_page_t** mi_page_map_ensure_at(size_t idx) { + mi_page_t** sub = mi_page_map_ensure_committed(idx); + if mi_unlikely(sub == NULL) { + // sub map not yet allocated, alloc now + mi_memid_t memid; + sub = (mi_page_t**)_mi_os_alloc(MI_PAGE_MAP_SUB_COUNT * sizeof(mi_page_t*), &memid); + mi_page_t** expect = NULL; + if (!mi_atomic_cas_strong_acq_rel(((_Atomic(mi_page_t**)*)&_mi_page_map[idx]), &expect, sub)) { + // another thread already allocated it.. free and continue + _mi_os_free(sub, MI_PAGE_MAP_SUB_COUNT * sizeof(mi_page_t*), memid); + sub = expect; + mi_assert_internal(sub!=NULL); + } + if (sub == NULL) { + _mi_error_message(EFAULT, "internal error: unable to extend the page map\n"); + } + } + return sub; +} + +static void mi_page_map_set_range(mi_page_t* page, size_t idx, size_t sub_idx, size_t slice_count) { + // is the page map area that contains the page address committed? + while (slice_count > 0) { + mi_page_t** sub = mi_page_map_ensure_at(idx); + // set the offsets for the page + while (sub_idx < MI_PAGE_MAP_SUB_COUNT) { + sub[sub_idx] = page; + slice_count--; if (slice_count == 0) return; + sub_idx++; + } + idx++; // potentially wrap around to the next idx + sub_idx = 0; + } +} + +static size_t mi_page_map_get_idx(mi_page_t* page, size_t* sub_idx, size_t* slice_count) { + size_t page_size; + uint8_t* page_start = mi_page_area(page, &page_size); + if (page_size > MI_LARGE_PAGE_SIZE) { page_size = MI_LARGE_PAGE_SIZE - MI_ARENA_SLICE_SIZE; } // furthest interior pointer + *slice_count = mi_slice_count_of_size(page_size) + ((page_start - (uint8_t*)page)/MI_ARENA_SLICE_SIZE); // add for large aligned blocks + return _mi_page_map_index(page, sub_idx); +} + +void _mi_page_map_register(mi_page_t* page) { + mi_assert_internal(page != NULL); + mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN)); + mi_assert_internal(_mi_page_map != NULL); // should be initialized before multi-thread access! + if mi_unlikely(_mi_page_map == NULL) { + if (!_mi_page_map_init()) return; + } + mi_assert(_mi_page_map!=NULL); + size_t slice_count; + size_t sub_idx; + const size_t idx = mi_page_map_get_idx(page, &sub_idx, &slice_count); + mi_page_map_set_range(page, idx, sub_idx, slice_count); +} + +void _mi_page_map_unregister(mi_page_t* page) { + mi_assert_internal(_mi_page_map != NULL); + // get index and count + size_t slice_count; + size_t sub_idx; + const size_t idx = mi_page_map_get_idx(page, &sub_idx, &slice_count); + // unset the offsets + mi_page_map_set_range(page, idx, sub_idx, slice_count); +} + +void _mi_page_map_unregister_range(void* start, size_t size) { + const size_t slice_count = _mi_divide_up(size, MI_ARENA_SLICE_SIZE); + size_t sub_idx; + const uintptr_t idx = _mi_page_map_index(start, &sub_idx); + mi_page_map_set_range(NULL, idx, sub_idx, slice_count); // todo: avoid committing if not already committed? +} + + +mi_page_t* _mi_safe_ptr_page(const void* p) { + if mi_unlikely(p >= mi_page_map_max_address) return NULL; + size_t sub_idx; + const size_t idx = _mi_page_map_index(p,&sub_idx); + if mi_unlikely(!mi_page_map_is_committed(idx,NULL)) return NULL; + mi_page_t** const sub = _mi_page_map[idx]; + if mi_unlikely(sub==NULL) return NULL; + return sub[sub_idx]; +} + +mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { + return (_mi_safe_ptr_page(p) != NULL); +} + +#endif From a42a2a926b5fd68a40bd7b75d1362d5c1f4e7d1b Mon Sep 17 00:00:00 2001 From: daanx Date: Sun, 22 Dec 2024 14:18:33 -0800 Subject: [PATCH 7/8] improving level 2 page-map --- include/mimalloc/internal.h | 11 ++++++----- src/page-map.c | 17 ++++++++++++----- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 8955db5e..5dc2074d 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -470,11 +470,12 @@ static inline mi_page_t* _mi_unchecked_ptr_page(const void* p) { #else -// 2-level page map - -// one sub page-map = 64 KiB => covers 2^13 * 2^16 = 2^32 = 512 MiB address space -// the page-map needs 48-16-13 = 19 bits => 2^19 sub map pointers = 4 MiB size. -// we commit the page-map and the sub maps on-demand. +// 2-level page map: +// The page-map is usually 4 MiB and points to sub maps of 64 KiB. +// The page-map is committed on-demand (in 64 KiB) parts (and sub-maps are committed on-demand as well) +// One sub page-map = 64 KiB => covers 2^13 * 2^16 = 2^32 = 512 MiB address space +// The page-map needs 48-16-13 = 19 bits => 2^19 sub map pointers = 4 MiB size. +// (Choosing a MI_PAGE_MAP_SUB_SHIFT of 16 gives slightly better code but will commit the initial sub-map at 512 KiB) #define MI_PAGE_MAP_SUB_SHIFT (13) #define MI_PAGE_MAP_SUB_COUNT (MI_ZU(1) << MI_PAGE_MAP_SUB_SHIFT) diff --git a/src/page-map.c b/src/page-map.c index 99a9b60a..5a25b839 100644 --- a/src/page-map.c +++ b/src/page-map.c @@ -159,11 +159,13 @@ mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_att #else +// A 2-level page map + mi_decl_cache_align mi_page_t*** _mi_page_map; static void* mi_page_map_max_address; static mi_memid_t mi_page_map_memid; -static _Atomic(mi_bfield_t) mi_page_map_commit; // one bit per committed 64 KiB entries +static _Atomic(mi_bfield_t) mi_page_map_commit; static mi_page_t** mi_page_map_ensure_at(size_t idx); static inline void mi_page_map_set_range(mi_page_t* page, size_t idx, size_t sub_idx, size_t slice_count); @@ -178,8 +180,10 @@ bool _mi_page_map_init(void) { } // Allocate the page map and commit bits + mi_assert(MI_MAX_VABITS >= vbits); mi_page_map_max_address = (void*)(MI_PU(1) << vbits); const size_t page_map_count = (MI_ZU(1) << (vbits - MI_PAGE_MAP_SUB_SHIFT - MI_ARENA_SLICE_SHIFT)); + mi_assert(page_map_count <= MI_PAGE_MAP_COUNT); const size_t os_page_size = _mi_os_page_size(); const size_t page_map_size = _mi_align_up( page_map_count * sizeof(mi_page_t**), os_page_size); const size_t reserve_size = page_map_size + os_page_size; @@ -193,7 +197,7 @@ bool _mi_page_map_init(void) { _mi_warning_message("internal: the page map was committed but not zero initialized!\n"); _mi_memzero_aligned(_mi_page_map, page_map_size); } - mi_atomic_store_release(&mi_page_map_commit, (commit ? ~0 : (mi_bfield_t)0)); + mi_atomic_store_release(&mi_page_map_commit, (commit ? ~MI_ZU(0) : MI_ZU(0))); // commit the first part so NULL pointers get resolved without an access violation mi_page_map_ensure_at(0); @@ -210,9 +214,12 @@ bool _mi_page_map_init(void) { return true; } + +#define MI_PAGE_MAP_ENTRIES_PER_CBIT (MI_PAGE_MAP_COUNT / MI_BFIELD_BITS) + static inline bool mi_page_map_is_committed(size_t idx, size_t* pbit_idx) { mi_bfield_t commit = mi_atomic_load_relaxed(&mi_page_map_commit); - const size_t bit_idx = (idx*MI_INTPTR_SIZE)/MI_ARENA_SLICE_SIZE; // we commit a slice of entries at a time + const size_t bit_idx = idx/MI_PAGE_MAP_ENTRIES_PER_CBIT; mi_assert_internal(bit_idx < MI_BFIELD_BITS); if (pbit_idx != NULL) { *pbit_idx = bit_idx; } return ((commit & (MI_ZU(1) << bit_idx)) != 0); @@ -221,8 +228,8 @@ static inline bool mi_page_map_is_committed(size_t idx, size_t* pbit_idx) { static mi_page_t** mi_page_map_ensure_committed(size_t idx) { size_t bit_idx; if mi_unlikely(!mi_page_map_is_committed(idx, &bit_idx)) { - uint8_t* start = (uint8_t*)_mi_page_map + (bit_idx * MI_ARENA_SLICE_SIZE); - _mi_os_commit(start, MI_ARENA_SLICE_SIZE, NULL); + uint8_t* start = (uint8_t*)&_mi_page_map[bit_idx * MI_PAGE_MAP_ENTRIES_PER_CBIT]; + _mi_os_commit(start, MI_PAGE_MAP_ENTRIES_PER_CBIT * sizeof(mi_page_t**), NULL); mi_atomic_or_acq_rel(&mi_page_map_commit, MI_ZU(1) << bit_idx); } return _mi_page_map[idx]; From c5cfc92f0cc8809d7fdd5e86c67321d90dd33a04 Mon Sep 17 00:00:00 2001 From: daanx Date: Sun, 22 Dec 2024 14:39:57 -0800 Subject: [PATCH 8/8] small fixes --- include/mimalloc/bits.h | 2 ++ src/arena-meta.c | 2 +- src/page-map.c | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/include/mimalloc/bits.h b/include/mimalloc/bits.h index ca0b5905..ed4a7b44 100644 --- a/include/mimalloc/bits.h +++ b/include/mimalloc/bits.h @@ -107,6 +107,7 @@ typedef int32_t mi_ssize_t; // Define big endian if needed // #define MI_BIG_ENDIAN 1 +// maximum virtual address bits in a user-space pointer #if MI_DEFAULT_VIRTUAL_ADDRESS_BITS > 0 #define MI_MAX_VABITS MI_DEFAULT_VIRTUAL_ADDRESS_BITS #elif MI_ARCH_X64 @@ -117,6 +118,7 @@ typedef int32_t mi_ssize_t; #define MI_MAX_VABITS (32) #endif +// use a flat page-map (or a 2-level one) #ifndef MI_PAGE_MAP_FLAT #if MI_MAX_VABITS <= 40 #define MI_PAGE_MAP_FLAT 1 diff --git a/src/arena-meta.c b/src/arena-meta.c index 065a1331..fcfb680c 100644 --- a/src/arena-meta.c +++ b/src/arena-meta.c @@ -25,7 +25,7 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_META_PAGE_SIZE MI_ARENA_SLICE_SIZE #define MI_META_PAGE_ALIGN MI_ARENA_SLICE_ALIGN -#define MI_META_BLOCK_SIZE (64) +#define MI_META_BLOCK_SIZE (128) // large enough such that META_MAX_SIZE > 4k (even on 32-bit) #define MI_META_BLOCK_ALIGN MI_META_BLOCK_SIZE #define MI_META_BLOCKS_PER_PAGE (MI_ARENA_SLICE_SIZE / MI_META_BLOCK_SIZE) // 1024 #define MI_META_MAX_SIZE (MI_BCHUNK_SIZE * MI_META_BLOCK_SIZE) diff --git a/src/page-map.c b/src/page-map.c index 5a25b839..190be6c0 100644 --- a/src/page-map.c +++ b/src/page-map.c @@ -147,7 +147,7 @@ void _mi_page_map_unregister_range(void* start, size_t size) { mi_page_t* _mi_safe_ptr_page(const void* p) { if mi_unlikely(p >= mi_page_map_max_address) return NULL; const uintptr_t idx = _mi_page_map_index(p); - if mi_unlikely(mi_page_map_commit == NULL || !mi_bitmap_is_set(mi_page_map_commit, idx/MI_PAGE_MAP_ENTRIES_PER_COMMIT_BIT)) return NULL; + if mi_unlikely(mi_page_map_commit != NULL && !mi_bitmap_is_set(mi_page_map_commit, idx/MI_PAGE_MAP_ENTRIES_PER_COMMIT_BIT)) return NULL; const uintptr_t ofs = _mi_page_map[idx]; if mi_unlikely(ofs == 0) return NULL; return (mi_page_t*)((((uintptr_t)p >> MI_ARENA_SLICE_SHIFT) - ofs + 1) << MI_ARENA_SLICE_SHIFT);