diff --git a/CMakeLists.txt b/CMakeLists.txt index ebd02b20..07a292e0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -91,7 +91,7 @@ endif() if (CMAKE_GENERATOR MATCHES "^Visual Studio.*$") message(STATUS "Note: when building with Visual Studio the build type is specified when building.") - message(STATUS "For example: 'cmake --build . --config=Release") + message(STATUS "For example: 'cmake --build . --config=Release") endif() if("${CMAKE_BINARY_DIR}" MATCHES ".*(S|s)ecure$") @@ -401,9 +401,9 @@ if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU|Intel") endif() if(MI_OPT_ARCH) if(MI_ARCH STREQUAL "x64") - set(MI_OPT_ARCH_FLAGS "-march=haswell;-mavx2") # fast bit scan (since 2013) + set(MI_OPT_ARCH_FLAGS "-march=haswell;-mavx2;-mtune=native") # fast bit scan (since 2013) elseif(MI_ARCH STREQUAL "arm64") - set(MI_OPT_ARCH_FLAGS "-march=armv8.1-a") # fast atomics (since 2016) + set(MI_OPT_ARCH_FLAGS "-march=armv8.1-a;-mtune=native") # fast atomics (since 2016) endif() endif() endif() @@ -557,7 +557,7 @@ if(MI_BUILD_SHARED) elseif(MI_ARCH STREQUAL "x64") set(MIMALLOC_REDIRECT_SUFFIX "") if(CMAKE_SYSTEM_PROCESSOR STREQUAL "ARM64") - message(STATUS "Note: x64 code emulated on Windows for arm64 should use an arm64ec build of 'mimalloc-override.dll'") + message(STATUS "Note: x64 code emulated on Windows for arm64 should use an arm64ec build of 'mimalloc-override.dll'") message(STATUS " with 'mimalloc-redirect-arm64ec.dll'. See the 'bin\\readme.md' for more information.") endif() elseif(MI_ARCH STREQUAL "x86") @@ -681,7 +681,7 @@ endif() # ----------------------------------------------------------------------------- if (MI_OVERRIDE) if (MI_BUILD_SHARED) - target_compile_definitions(mimalloc PRIVATE MI_MALLOC_OVERRIDE) + target_compile_definitions(mimalloc PRIVATE MI_MALLOC_OVERRIDE) endif() if(NOT WIN32) # It is only possible to override malloc on Windows when building as a DLL. diff --git a/include/mimalloc/bits.h b/include/mimalloc/bits.h index 32b9d528..ed4a7b44 100644 --- a/include/mimalloc/bits.h +++ b/include/mimalloc/bits.h @@ -107,6 +107,26 @@ typedef int32_t mi_ssize_t; // Define big endian if needed // #define MI_BIG_ENDIAN 1 +// maximum virtual address bits in a user-space pointer +#if MI_DEFAULT_VIRTUAL_ADDRESS_BITS > 0 +#define MI_MAX_VABITS MI_DEFAULT_VIRTUAL_ADDRESS_BITS +#elif MI_ARCH_X64 +#define MI_MAX_VABITS (47) +#elif MI_INTPTR_SIZE > 4 +#define MI_MAX_VABITS (48) +#else +#define MI_MAX_VABITS (32) +#endif + +// use a flat page-map (or a 2-level one) +#ifndef MI_PAGE_MAP_FLAT +#if MI_MAX_VABITS <= 40 +#define MI_PAGE_MAP_FLAT 1 +#else +#define MI_PAGE_MAP_FLAT 0 +#endif +#endif + /* -------------------------------------------------------------------------------- Builtin's diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 208989e3..5dc2074d 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -169,6 +169,7 @@ bool _mi_page_map_init(void); void _mi_page_map_register(mi_page_t* page); void _mi_page_map_unregister(mi_page_t* page); void _mi_page_map_unregister_range(void* start, size_t size); +mi_page_t* _mi_safe_ptr_page(const void* p); // "page.c" void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept mi_attr_malloc; @@ -422,6 +423,14 @@ static inline bool mi_heap_is_initialized(mi_heap_t* heap) { return (heap != &_mi_heap_empty); } +static inline mi_page_t* _mi_heap_get_free_small_page(mi_heap_t* heap, size_t size) { + mi_assert_internal(size <= (MI_SMALL_SIZE_MAX + MI_PADDING_SIZE)); + const size_t idx = _mi_wsize_from_size(size); + mi_assert_internal(idx < MI_PAGES_DIRECT); + return heap->pages_free_direct[idx]; +} + + //static inline uintptr_t _mi_ptr_cookie(const void* p) { // extern mi_heap_t _mi_heap_main; // mi_assert_internal(_mi_heap_main.cookie != 0); @@ -433,48 +442,78 @@ static inline bool mi_heap_is_initialized(mi_heap_t* heap) { Pages ----------------------------------------------------------- */ -static inline mi_page_t* _mi_heap_get_free_small_page(mi_heap_t* heap, size_t size) { - mi_assert_internal(size <= (MI_SMALL_SIZE_MAX + MI_PADDING_SIZE)); - const size_t idx = _mi_wsize_from_size(size); - mi_assert_internal(idx < MI_PAGES_DIRECT); - return heap->pages_free_direct[idx]; -} - +#if MI_PAGE_MAP_FLAT +// flat page-map committed on demand extern uint8_t* _mi_page_map; -static inline uintptr_t _mi_page_map_index(const void* p) { - return (((uintptr_t)p) >> MI_ARENA_SLICE_SHIFT); +static inline size_t _mi_page_map_index(const void* p) { + return (size_t)((uintptr_t)p >> MI_ARENA_SLICE_SHIFT); } static inline mi_page_t* _mi_ptr_page_ex(const void* p, bool* valid) { - #if 1 - const uintptr_t idx = _mi_page_map_index(p); + const size_t idx = _mi_page_map_index(p); const size_t ofs = _mi_page_map[idx]; - if (valid != NULL) *valid = (ofs != 0); - return (mi_page_t*)((idx - ofs + 1) << MI_ARENA_SLICE_SHIFT); - #else - const uintptr_t idx = _mi_page_map_index(p); - const uintptr_t up = idx << MI_ARENA_SLICE_SHIFT; - __builtin_prefetch((void*)up); - const size_t ofs = _mi_page_map[idx]; - if (valid != NULL) *valid = (ofs != 0); - return (mi_page_t*)(up - ((ofs - 1) << MI_ARENA_SLICE_SHIFT)); - #endif + if (valid != NULL) { *valid = (ofs != 0); } + return (mi_page_t*)((((uintptr_t)p >> MI_ARENA_SLICE_SHIFT) + 1 - ofs) << MI_ARENA_SLICE_SHIFT); } static inline mi_page_t* _mi_checked_ptr_page(const void* p) { bool valid; - mi_page_t* const page = _mi_ptr_page_ex(p,&valid); + mi_page_t* const page = _mi_ptr_page_ex(p, &valid); return (valid ? page : NULL); } +static inline mi_page_t* _mi_unchecked_ptr_page(const void* p) { + return _mi_ptr_page_ex(p, NULL); +} + +#else + +// 2-level page map: +// The page-map is usually 4 MiB and points to sub maps of 64 KiB. +// The page-map is committed on-demand (in 64 KiB) parts (and sub-maps are committed on-demand as well) +// One sub page-map = 64 KiB => covers 2^13 * 2^16 = 2^32 = 512 MiB address space +// The page-map needs 48-16-13 = 19 bits => 2^19 sub map pointers = 4 MiB size. +// (Choosing a MI_PAGE_MAP_SUB_SHIFT of 16 gives slightly better code but will commit the initial sub-map at 512 KiB) + +#define MI_PAGE_MAP_SUB_SHIFT (13) +#define MI_PAGE_MAP_SUB_COUNT (MI_ZU(1) << MI_PAGE_MAP_SUB_SHIFT) + +#define MI_PAGE_MAP_SHIFT (MI_MAX_VABITS - MI_PAGE_MAP_SUB_SHIFT - MI_ARENA_SLICE_SHIFT) +#define MI_PAGE_MAP_COUNT (MI_ZU(1) << MI_PAGE_MAP_SHIFT) + +extern mi_page_t*** _mi_page_map; + +static inline size_t _mi_page_map_index(const void* p, size_t* sub_idx) { + const uintptr_t u = (uintptr_t)p / MI_ARENA_SLICE_SIZE; + if (sub_idx != NULL) { *sub_idx = (uint32_t)u % MI_PAGE_MAP_SUB_COUNT; } + return (size_t)(u / MI_PAGE_MAP_SUB_COUNT); +} + +static inline mi_page_t* _mi_unchecked_ptr_page(const void* p) { + size_t sub_idx; + const size_t idx = _mi_page_map_index(p, &sub_idx); + return _mi_page_map[idx][sub_idx]; +} + +static inline mi_page_t* _mi_checked_ptr_page(const void* p) { + size_t sub_idx; + const size_t idx = _mi_page_map_index(p, &sub_idx); + mi_page_t** const sub = _mi_page_map[idx]; + if mi_unlikely(sub == NULL) return NULL; + return sub[sub_idx]; +} + +#endif + + static inline mi_page_t* _mi_ptr_page(const void* p) { mi_assert_internal(p==NULL || mi_is_in_heap_region(p)); #if MI_DEBUG || defined(__APPLE__) return _mi_checked_ptr_page(p); #else - return _mi_ptr_page_ex(p,NULL); + return _mi_unchecked_ptr_page(p); #endif } @@ -591,7 +630,7 @@ static inline bool mi_page_immediate_available(const mi_page_t* page) { return (page->free != NULL); } - + // is the page not yet used up to its reserved space? static inline bool mi_page_is_expandable(const mi_page_t* page) { mi_assert_internal(page != NULL); diff --git a/src/arena-meta.c b/src/arena-meta.c index 4928a813..3bcb03eb 100644 --- a/src/arena-meta.c +++ b/src/arena-meta.c @@ -25,7 +25,7 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_META_PAGE_SIZE MI_ARENA_SLICE_SIZE #define MI_META_PAGE_ALIGN MI_ARENA_SLICE_ALIGN -#define MI_META_BLOCK_SIZE (64) +#define MI_META_BLOCK_SIZE (128) // large enough such that META_MAX_SIZE > 4k (even on 32-bit) #define MI_META_BLOCK_ALIGN MI_META_BLOCK_SIZE #define MI_META_BLOCKS_PER_PAGE (MI_ARENA_SLICE_SIZE / MI_META_BLOCK_SIZE) // 1024 #define MI_META_MAX_SIZE (MI_BCHUNK_SIZE * MI_META_BLOCK_SIZE) diff --git a/src/free.c b/src/free.c index 88f784c7..d08123a2 100644 --- a/src/free.c +++ b/src/free.c @@ -145,14 +145,14 @@ static inline mi_page_t* mi_checked_ptr_page(const void* p, const char* msg) _mi_error_message(EINVAL, "%s: invalid (unaligned) pointer: %p\n", msg, p); return NULL; } - #endif - mi_page_t* const page = _mi_ptr_page(p); - #if MI_DEBUG + mi_page_t* const page = _mi_safe_ptr_page(p); if (page == NULL && p != NULL) { _mi_error_message(EINVAL, "%s: invalid pointer: %p\n", msg, p); } - #endif return page; + #else + return _mi_ptr_page(p); + #endif } // Free a block diff --git a/src/heap.c b/src/heap.c index 35c01941..a7e8e454 100644 --- a/src/heap.c +++ b/src/heap.c @@ -346,6 +346,7 @@ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_ // mi_page_free(page,false); page->next = NULL; page->prev = NULL; + mi_page_set_heap(page, NULL); _mi_arena_page_free(page); return true; // keep going @@ -513,7 +514,7 @@ bool mi_heap_reload(mi_heap_t* heap, mi_arena_id_t arena_id) { // reinit direct pages (as we may be in a different process) mi_assert_internal(heap->page_count == 0); - for (int i = 0; i < MI_PAGES_DIRECT; i++) { + for (size_t i = 0; i < MI_PAGES_DIRECT; i++) { heap->pages_free_direct[i] = (mi_page_t*)&_mi_page_empty; } diff --git a/src/page-map.c b/src/page-map.c index 64f4bbbb..190be6c0 100644 --- a/src/page-map.c +++ b/src/page-map.c @@ -9,46 +9,62 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc/internal.h" #include "bitmap.h" +#if MI_PAGE_MAP_FLAT + +// The page-map contains a byte for each 64kb slice in the address space. +// For an address `a` where `ofs = _mi_page_map[a >> 16]`: +// 0 = unused +// 1 = the slice at `a & ~0xFFFF` is a mimalloc page. +// 1 < ofs <= 127 = the slice is part of a page, starting at `(((a>>16) - ofs - 1) << 16)`. +// +// 1 byte per slice => 1 TiB address space needs a 2^14 * 2^16 = 16 MiB page map. +// A full 256 TiB address space (48 bit) needs a 4 GiB page map. +// A full 4 GiB address space (32 bit) needs only a 64 KiB page map. + mi_decl_cache_align uint8_t* _mi_page_map = NULL; -static bool mi_page_map_all_committed = false; -static size_t mi_page_map_entries_per_commit_bit = MI_ARENA_SLICE_SIZE; -static void* mi_page_map_max_address = NULL; -static mi_memid_t mi_page_map_memid; +static void* mi_page_map_max_address = NULL; +static mi_memid_t mi_page_map_memid; +#define MI_PAGE_MAP_ENTRIES_PER_COMMIT_BIT MI_ARENA_SLICE_SIZE +static mi_bitmap_t* mi_page_map_commit; // one bit per committed 64 KiB entries -// (note: we need to initialize statically or otherwise C++ may run a default constructors after process initialization) -static mi_bitmap_t mi_page_map_commit = { MI_ATOMIC_VAR_INIT(MI_BITMAP_DEFAULT_CHUNK_COUNT), - { 0 }, { {MI_ATOMIC_VAR_INIT(0)} }, {{{ MI_ATOMIC_VAR_INIT(0) }}} }; +static void mi_page_map_ensure_committed(size_t idx, size_t slice_count); bool _mi_page_map_init(void) { - size_t vbits = (size_t)mi_option_get_clamp(mi_option_max_vabits, 0, MI_SIZE_BITS); + size_t vbits = (size_t)mi_option_get_clamp(mi_option_max_vabits, 0, MI_SIZE_BITS); if (vbits == 0) { vbits = _mi_os_virtual_address_bits(); + #if MI_ARCH_X64 // canonical address is limited to the first 128 TiB if (vbits >= 48) { vbits = 47; } + #endif } - // 1 byte per block = 2 GiB for 128 TiB address space (48 bit = 256 TiB address space) - // 64 KiB for 4 GiB address space (on 32-bit) + + // Allocate the page map and commit bits mi_page_map_max_address = (void*)(MI_PU(1) << vbits); const size_t page_map_size = (MI_ZU(1) << (vbits - MI_ARENA_SLICE_SHIFT)); - - mi_page_map_entries_per_commit_bit = _mi_divide_up(page_map_size, MI_BITMAP_DEFAULT_BIT_COUNT); - // mi_bitmap_init(&mi_page_map_commit, MI_BITMAP_MIN_BIT_COUNT, true); - - mi_page_map_all_committed = (page_map_size <= 1*MI_MiB || mi_option_is_enabled(mi_option_debug_commit_full_pagemap)); // _mi_os_has_overcommit(); // commit on-access on Linux systems? - _mi_page_map = (uint8_t*)_mi_os_alloc_aligned(page_map_size, 1, mi_page_map_all_committed, true, &mi_page_map_memid); - if (_mi_page_map==NULL) { + const bool commit = (page_map_size <= 1*MI_MiB || mi_option_is_enabled(mi_option_debug_commit_full_pagemap)); // _mi_os_has_overcommit(); // commit on-access on Linux systems? + const size_t commit_bits = _mi_divide_up(page_map_size, MI_PAGE_MAP_ENTRIES_PER_COMMIT_BIT); + const size_t bitmap_size = (commit ? 0 : mi_bitmap_size(commit_bits, NULL)); + const size_t reserve_size = bitmap_size + page_map_size; + uint8_t* const base = (uint8_t*)_mi_os_alloc_aligned(reserve_size, 1, commit, true /* allow large */, &mi_page_map_memid); + if (base==NULL) { _mi_error_message(ENOMEM, "unable to reserve virtual memory for the page map (%zu KiB)\n", page_map_size / MI_KiB); return false; } if (mi_page_map_memid.initially_committed && !mi_page_map_memid.initially_zero) { - _mi_warning_message("the page map was committed but not zero initialized!\n"); - _mi_memzero_aligned(_mi_page_map, page_map_size); + _mi_warning_message("internal: the page map was committed but not zero initialized!\n"); + _mi_memzero_aligned(base, reserve_size); } + if (bitmap_size > 0) { + mi_page_map_commit = (mi_bitmap_t*)base; + _mi_os_commit(mi_page_map_commit, bitmap_size, NULL); + mi_bitmap_init(mi_page_map_commit, commit_bits, true); + } + _mi_page_map = base + bitmap_size; + // commit the first part so NULL pointers get resolved without an access violation - if (!mi_page_map_all_committed) { - bool is_zero; - _mi_os_commit(_mi_page_map, _mi_os_page_size(), &is_zero); - if (!is_zero && !mi_page_map_memid.initially_zero) { _mi_memzero(_mi_page_map, _mi_os_page_size()); } + if (!commit) { + mi_page_map_ensure_committed(0, 1); } _mi_page_map[0] = 1; // so _mi_ptr_page(NULL) == NULL mi_assert_internal(_mi_ptr_page(NULL)==NULL); @@ -56,30 +72,31 @@ bool _mi_page_map_init(void) { } static void mi_page_map_ensure_committed(size_t idx, size_t slice_count) { - // is the page map area that contains the page address committed? + // is the page map area that contains the page address committed? // we always set the commit bits so we can track what ranges are in-use. // we only actually commit if the map wasn't committed fully already. - const size_t commit_bit_idx_lo = idx / mi_page_map_entries_per_commit_bit; - const size_t commit_bit_idx_hi = (idx + slice_count - 1) / mi_page_map_entries_per_commit_bit; - for (size_t i = commit_bit_idx_lo; i <= commit_bit_idx_hi; i++) { // per bit to avoid crossing over bitmap chunks - if (mi_bitmap_is_clearN(&mi_page_map_commit, i, 1)) { - // this may race, in which case we do multiple commits (which is ok) - if (!mi_page_map_all_committed) { + if (mi_page_map_commit != NULL) { + const size_t commit_idx = idx / MI_PAGE_MAP_ENTRIES_PER_COMMIT_BIT; + const size_t commit_idx_hi = (idx + slice_count - 1) / MI_PAGE_MAP_ENTRIES_PER_COMMIT_BIT; + for (size_t i = commit_idx; i <= commit_idx_hi; i++) { // per bit to avoid crossing over bitmap chunks + if (mi_bitmap_is_clear(mi_page_map_commit, i)) { + // this may race, in which case we do multiple commits (which is ok) bool is_zero; - uint8_t* const start = _mi_page_map + (i*mi_page_map_entries_per_commit_bit); - const size_t size = mi_page_map_entries_per_commit_bit; + uint8_t* const start = _mi_page_map + (i * MI_PAGE_MAP_ENTRIES_PER_COMMIT_BIT); + const size_t size = MI_PAGE_MAP_ENTRIES_PER_COMMIT_BIT; _mi_os_commit(start, size, &is_zero); - if (!is_zero && !mi_page_map_memid.initially_zero) { _mi_memzero(start, size); } + if (!is_zero && !mi_page_map_memid.initially_zero) { _mi_memzero(start, size); } + mi_bitmap_set(mi_page_map_commit, i); } - mi_bitmap_set(&mi_page_map_commit, i); } } #if MI_DEBUG > 0 _mi_page_map[idx] = 0; _mi_page_map[idx+slice_count-1] = 0; - #endif + #endif } + static size_t mi_page_map_get_idx(mi_page_t* page, uint8_t** page_start, size_t* slice_count) { size_t page_size; *page_start = mi_page_area(page, &page_size); @@ -88,11 +105,9 @@ static size_t mi_page_map_get_idx(mi_page_t* page, uint8_t** page_start, size_t* return _mi_page_map_index(page); } - - void _mi_page_map_register(mi_page_t* page) { mi_assert_internal(page != NULL); - mi_assert_internal(_mi_is_aligned(page,MI_PAGE_ALIGN)); + mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN)); mi_assert_internal(_mi_page_map != NULL); // should be initialized before multi-thread access! if mi_unlikely(_mi_page_map == NULL) { if (!_mi_page_map_init()) return; @@ -111,7 +126,6 @@ void _mi_page_map_register(mi_page_t* page) { } } - void _mi_page_map_unregister(mi_page_t* page) { mi_assert_internal(_mi_page_map != NULL); // get index and count @@ -129,16 +143,185 @@ void _mi_page_map_unregister_range(void* start, size_t size) { _mi_memzero(&_mi_page_map[index], slice_count); } + +mi_page_t* _mi_safe_ptr_page(const void* p) { + if mi_unlikely(p >= mi_page_map_max_address) return NULL; + const uintptr_t idx = _mi_page_map_index(p); + if mi_unlikely(mi_page_map_commit != NULL && !mi_bitmap_is_set(mi_page_map_commit, idx/MI_PAGE_MAP_ENTRIES_PER_COMMIT_BIT)) return NULL; + const uintptr_t ofs = _mi_page_map[idx]; + if mi_unlikely(ofs == 0) return NULL; + return (mi_page_t*)((((uintptr_t)p >> MI_ARENA_SLICE_SHIFT) - ofs + 1) << MI_ARENA_SLICE_SHIFT); +} + mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { - // if mi_unlikely(_mi_page_map==NULL) { // happens on macOS during loading - // _mi_page_map_init(); - // } - if mi_unlikely(p >= mi_page_map_max_address) return false; - uintptr_t idx = ((uintptr_t)p >> MI_ARENA_SLICE_SHIFT); - if (mi_page_map_all_committed || mi_bitmap_is_setN(&mi_page_map_commit, idx/mi_page_map_entries_per_commit_bit, 1)) { - return (_mi_page_map[idx] != 0); + return (_mi_safe_ptr_page(p) != NULL); +} + +#else + +// A 2-level page map + +mi_decl_cache_align mi_page_t*** _mi_page_map; +static void* mi_page_map_max_address; +static mi_memid_t mi_page_map_memid; + +static _Atomic(mi_bfield_t) mi_page_map_commit; + +static mi_page_t** mi_page_map_ensure_at(size_t idx); +static inline void mi_page_map_set_range(mi_page_t* page, size_t idx, size_t sub_idx, size_t slice_count); + +bool _mi_page_map_init(void) { + size_t vbits = (size_t)mi_option_get_clamp(mi_option_max_vabits, 0, MI_SIZE_BITS); + if (vbits == 0) { + vbits = _mi_os_virtual_address_bits(); + #if MI_ARCH_X64 // canonical address is limited to the first 128 TiB + if (vbits >= 48) { vbits = 47; } + #endif } - else { + + // Allocate the page map and commit bits + mi_assert(MI_MAX_VABITS >= vbits); + mi_page_map_max_address = (void*)(MI_PU(1) << vbits); + const size_t page_map_count = (MI_ZU(1) << (vbits - MI_PAGE_MAP_SUB_SHIFT - MI_ARENA_SLICE_SHIFT)); + mi_assert(page_map_count <= MI_PAGE_MAP_COUNT); + const size_t os_page_size = _mi_os_page_size(); + const size_t page_map_size = _mi_align_up( page_map_count * sizeof(mi_page_t**), os_page_size); + const size_t reserve_size = page_map_size + os_page_size; + const bool commit = page_map_size <= 64*MI_KiB || mi_option_is_enabled(mi_option_debug_commit_full_pagemap); // _mi_os_has_overcommit(); // commit on-access on Linux systems? + _mi_page_map = (mi_page_t***)_mi_os_alloc_aligned(reserve_size, 1, commit, true /* allow large */, &mi_page_map_memid); + if (_mi_page_map==NULL) { + _mi_error_message(ENOMEM, "unable to reserve virtual memory for the page map (%zu KiB)\n", page_map_size / MI_KiB); return false; } + if (mi_page_map_memid.initially_committed && !mi_page_map_memid.initially_zero) { + _mi_warning_message("internal: the page map was committed but not zero initialized!\n"); + _mi_memzero_aligned(_mi_page_map, page_map_size); + } + mi_atomic_store_release(&mi_page_map_commit, (commit ? ~MI_ZU(0) : MI_ZU(0))); + + // commit the first part so NULL pointers get resolved without an access violation + mi_page_map_ensure_at(0); + + // note: for the NULL range we only commit one OS page + // mi_page_map_set_range(NULL, 0, 0, 1); + _mi_page_map[0] = (mi_page_t**)((uint8_t*)_mi_page_map + page_map_size); + if (!mi_page_map_memid.initially_committed) { + _mi_os_commit(_mi_page_map[0], os_page_size, NULL); + } + _mi_page_map[0][0] = NULL; + + mi_assert_internal(_mi_ptr_page(NULL)==NULL); + return true; } + + +#define MI_PAGE_MAP_ENTRIES_PER_CBIT (MI_PAGE_MAP_COUNT / MI_BFIELD_BITS) + +static inline bool mi_page_map_is_committed(size_t idx, size_t* pbit_idx) { + mi_bfield_t commit = mi_atomic_load_relaxed(&mi_page_map_commit); + const size_t bit_idx = idx/MI_PAGE_MAP_ENTRIES_PER_CBIT; + mi_assert_internal(bit_idx < MI_BFIELD_BITS); + if (pbit_idx != NULL) { *pbit_idx = bit_idx; } + return ((commit & (MI_ZU(1) << bit_idx)) != 0); +} + +static mi_page_t** mi_page_map_ensure_committed(size_t idx) { + size_t bit_idx; + if mi_unlikely(!mi_page_map_is_committed(idx, &bit_idx)) { + uint8_t* start = (uint8_t*)&_mi_page_map[bit_idx * MI_PAGE_MAP_ENTRIES_PER_CBIT]; + _mi_os_commit(start, MI_PAGE_MAP_ENTRIES_PER_CBIT * sizeof(mi_page_t**), NULL); + mi_atomic_or_acq_rel(&mi_page_map_commit, MI_ZU(1) << bit_idx); + } + return _mi_page_map[idx]; +} + +static mi_page_t** mi_page_map_ensure_at(size_t idx) { + mi_page_t** sub = mi_page_map_ensure_committed(idx); + if mi_unlikely(sub == NULL) { + // sub map not yet allocated, alloc now + mi_memid_t memid; + sub = (mi_page_t**)_mi_os_alloc(MI_PAGE_MAP_SUB_COUNT * sizeof(mi_page_t*), &memid); + mi_page_t** expect = NULL; + if (!mi_atomic_cas_strong_acq_rel(((_Atomic(mi_page_t**)*)&_mi_page_map[idx]), &expect, sub)) { + // another thread already allocated it.. free and continue + _mi_os_free(sub, MI_PAGE_MAP_SUB_COUNT * sizeof(mi_page_t*), memid); + sub = expect; + mi_assert_internal(sub!=NULL); + } + if (sub == NULL) { + _mi_error_message(EFAULT, "internal error: unable to extend the page map\n"); + } + } + return sub; +} + +static void mi_page_map_set_range(mi_page_t* page, size_t idx, size_t sub_idx, size_t slice_count) { + // is the page map area that contains the page address committed? + while (slice_count > 0) { + mi_page_t** sub = mi_page_map_ensure_at(idx); + // set the offsets for the page + while (sub_idx < MI_PAGE_MAP_SUB_COUNT) { + sub[sub_idx] = page; + slice_count--; if (slice_count == 0) return; + sub_idx++; + } + idx++; // potentially wrap around to the next idx + sub_idx = 0; + } +} + +static size_t mi_page_map_get_idx(mi_page_t* page, size_t* sub_idx, size_t* slice_count) { + size_t page_size; + uint8_t* page_start = mi_page_area(page, &page_size); + if (page_size > MI_LARGE_PAGE_SIZE) { page_size = MI_LARGE_PAGE_SIZE - MI_ARENA_SLICE_SIZE; } // furthest interior pointer + *slice_count = mi_slice_count_of_size(page_size) + ((page_start - (uint8_t*)page)/MI_ARENA_SLICE_SIZE); // add for large aligned blocks + return _mi_page_map_index(page, sub_idx); +} + +void _mi_page_map_register(mi_page_t* page) { + mi_assert_internal(page != NULL); + mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN)); + mi_assert_internal(_mi_page_map != NULL); // should be initialized before multi-thread access! + if mi_unlikely(_mi_page_map == NULL) { + if (!_mi_page_map_init()) return; + } + mi_assert(_mi_page_map!=NULL); + size_t slice_count; + size_t sub_idx; + const size_t idx = mi_page_map_get_idx(page, &sub_idx, &slice_count); + mi_page_map_set_range(page, idx, sub_idx, slice_count); +} + +void _mi_page_map_unregister(mi_page_t* page) { + mi_assert_internal(_mi_page_map != NULL); + // get index and count + size_t slice_count; + size_t sub_idx; + const size_t idx = mi_page_map_get_idx(page, &sub_idx, &slice_count); + // unset the offsets + mi_page_map_set_range(page, idx, sub_idx, slice_count); +} + +void _mi_page_map_unregister_range(void* start, size_t size) { + const size_t slice_count = _mi_divide_up(size, MI_ARENA_SLICE_SIZE); + size_t sub_idx; + const uintptr_t idx = _mi_page_map_index(start, &sub_idx); + mi_page_map_set_range(NULL, idx, sub_idx, slice_count); // todo: avoid committing if not already committed? +} + + +mi_page_t* _mi_safe_ptr_page(const void* p) { + if mi_unlikely(p >= mi_page_map_max_address) return NULL; + size_t sub_idx; + const size_t idx = _mi_page_map_index(p,&sub_idx); + if mi_unlikely(!mi_page_map_is_committed(idx,NULL)) return NULL; + mi_page_t** const sub = _mi_page_map[idx]; + if mi_unlikely(sub==NULL) return NULL; + return sub[sub_idx]; +} + +mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { + return (_mi_safe_ptr_page(p) != NULL); +} + +#endif diff --git a/test/test-stress.c b/test/test-stress.c index 6933e6a2..e9c5214f 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -302,8 +302,8 @@ int main(int argc, char** argv) { mi_option_enable(mi_option_visit_abandoned); #endif #if !defined(NDEBUG) && !defined(USE_STD_MALLOC) - // mi_option_set(mi_option_arena_reserve, 32 * 1024 /* in kib = 32MiB */); - mi_option_set(mi_option_purge_delay,10); + mi_option_set(mi_option_arena_reserve, 32 * 1024 /* in kib = 32MiB */); + //mi_option_set(mi_option_purge_delay,10); #endif #if defined(NDEBUG) && !defined(USE_STD_MALLOC) // mi_option_set(mi_option_purge_delay,-1);