From 657135de36edad2082323426aea3e2fa1a9cf19a Mon Sep 17 00:00:00 2001 From: daanx Date: Mon, 23 Dec 2024 09:53:52 -0800 Subject: [PATCH 1/5] commit 2level page-map on over-commit systems --- CMakeLists.txt | 18 +++++++++++------- include/mimalloc/internal.h | 26 ++++++++++++-------------- src/options.c | 2 +- src/page-map.c | 3 ++- 4 files changed, 26 insertions(+), 23 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 07a292e0..c184a0b3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,10 +10,9 @@ option(MI_PADDING "Enable padding to detect heap block overflow (alway option(MI_OVERRIDE "Override the standard malloc interface (i.e. define entry points for 'malloc', 'free', etc)" ON) option(MI_XMALLOC "Enable abort() call on memory allocation failure by default" OFF) option(MI_SHOW_ERRORS "Show error and warning messages by default (only enabled by default in DEBUG mode)" OFF) -option(MI_TRACK_VALGRIND "Compile with Valgrind support (adds a small overhead)" OFF) -option(MI_TRACK_ASAN "Compile with address sanitizer support (adds a small overhead)" OFF) -option(MI_TRACK_ETW "Compile with Windows event tracing (ETW) support (adds a small overhead)" OFF) +option(MI_GUARDED "Build with guard pages behind certain object allocations (implies MI_NO_PADDING=ON)" OFF) option(MI_USE_CXX "Use the C++ compiler to compile the library (instead of the C compiler)" OFF) + option(MI_OPT_ARCH "Only for optimized builds: turn on architecture specific optimizations (for x64: '-march=haswell;-mavx2' (2013), for arm64: '-march=armv8.1-a' (2016))" ON) option(MI_OPT_SIMD "Use SIMD instructions (requires MI_OPT_ARCH to be enabled)" OFF) option(MI_SEE_ASM "Generate assembly files" OFF) @@ -21,14 +20,19 @@ option(MI_OSX_INTERPOSE "Use interpose to override standard malloc on macOS" option(MI_OSX_ZONE "Use malloc zone to override standard malloc on macOS" ON) option(MI_WIN_REDIRECT "Use redirection module ('mimalloc-redirect') on Windows if compiling mimalloc as a DLL" ON) option(MI_LOCAL_DYNAMIC_TLS "Use local-dynamic-tls, a slightly slower but dlopen-compatible thread local storage mechanism (Unix)" OFF) -option(MI_LIBC_MUSL "Set this when linking with musl libc" OFF) +option(MI_LIBC_MUSL "Enable this when linking with musl libc" OFF) + +option(MI_DEBUG_TSAN "Build with thread sanitizer (needs clang)" OFF) +option(MI_DEBUG_UBSAN "Build with undefined-behavior sanitizer (needs clang++)" OFF) +option(MI_TRACK_VALGRIND "Compile with Valgrind support (adds a small overhead)" OFF) +option(MI_TRACK_ASAN "Compile with address sanitizer support (adds a small overhead)" OFF) +option(MI_TRACK_ETW "Compile with Windows event tracing (ETW) support (adds a small overhead)" OFF) + option(MI_BUILD_SHARED "Build shared library" ON) option(MI_BUILD_STATIC "Build static library" ON) option(MI_BUILD_OBJECT "Build object library" ON) option(MI_BUILD_TESTS "Build test executables" ON) -option(MI_DEBUG_TSAN "Build with thread sanitizer (needs clang)" OFF) -option(MI_DEBUG_UBSAN "Build with undefined-behavior sanitizer (needs clang++)" OFF) -option(MI_GUARDED "Build with guard pages behind certain object allocations (implies MI_NO_PADDING=ON)" OFF) + option(MI_SKIP_COLLECT_ON_EXIT "Skip collecting memory on program exit" OFF) option(MI_NO_PADDING "Force no use of padding even in DEBUG mode etc." OFF) option(MI_INSTALL_TOPLEVEL "Install directly into $CMAKE_INSTALL_PREFIX instead of PREFIX/lib/mimalloc-version" OFF) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index e98a37f5..4cb54d6f 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -435,13 +435,14 @@ static inline mi_page_t* _mi_heap_get_free_small_page(mi_heap_t* heap, size_t si /* ----------------------------------------------------------- - Pages + The page map maps addresses to `mi_page_t` pointers ----------------------------------------------------------- */ #if MI_PAGE_MAP_FLAT -// flat page-map committed on demand +// flat page-map committed on demand, using one byte per slice (64 KiB). // single indirection and low commit, but large initial virtual reserve (4 GiB with 48 bit virtual addresses) +// used by default on <= 40 bit virtual address spaces. extern uint8_t* _mi_page_map; static inline size_t _mi_page_map_index(const void* p) { @@ -468,26 +469,23 @@ static inline mi_page_t* _mi_unchecked_ptr_page(const void* p) { #else // 2-level page map: -// double indirection but low commit and low virtual reserve. -// -// The page-map is usually 4 MiB and points to sub maps of 64 KiB. -// The page-map is committed on-demand (in 64 KiB) parts (and sub-maps are committed on-demand as well) -// One sub page-map = 64 KiB => covers 2^13 * 2^16 = 2^32 = 512 MiB address space -// The page-map needs 48-16-13 = 19 bits => 2^19 sub map pointers = 4 MiB size. -// (Choosing a MI_PAGE_MAP_SUB_SHIFT of 16 gives slightly better code but will commit the initial sub-map at 512 KiB) - +// double indirection, but low commit and low virtual reserve. +// +// the page-map is usually 4 MiB and points to sub maps of 64 KiB. +// the page-map is committed on-demand (in 64 KiB parts) (and sub-maps are committed on-demand as well) +// one sub page-map = 64 KiB => covers 2^(16-3) * 2^16 = 2^29 = 512 MiB address space +// the page-map needs 48-(16+13) = 19 bits => 2^19 sub map pointers = 4 MiB size. #define MI_PAGE_MAP_SUB_SHIFT (13) #define MI_PAGE_MAP_SUB_COUNT (MI_ZU(1) << MI_PAGE_MAP_SUB_SHIFT) - #define MI_PAGE_MAP_SHIFT (MI_MAX_VABITS - MI_PAGE_MAP_SUB_SHIFT - MI_ARENA_SLICE_SHIFT) #define MI_PAGE_MAP_COUNT (MI_ZU(1) << MI_PAGE_MAP_SHIFT) extern mi_page_t*** _mi_page_map; static inline size_t _mi_page_map_index(const void* p, size_t* sub_idx) { - const uintptr_t u = (uintptr_t)p / MI_ARENA_SLICE_SIZE; - if (sub_idx != NULL) { *sub_idx = (uint32_t)u % MI_PAGE_MAP_SUB_COUNT; } - return (size_t)(u / MI_PAGE_MAP_SUB_COUNT); + const size_t u = (size_t)((uintptr_t)p / MI_ARENA_SLICE_SIZE); + if (sub_idx != NULL) { *sub_idx = u % MI_PAGE_MAP_SUB_COUNT; } + return (u / MI_PAGE_MAP_SUB_COUNT); } static inline mi_page_t* _mi_unchecked_ptr_page(const void* p) { diff --git a/src/options.c b/src/options.c index fc3a2838..7562cd46 100644 --- a/src/options.c +++ b/src/options.c @@ -103,7 +103,7 @@ typedef struct mi_option_desc_s { #endif #ifndef MI_DEFAULT_PAGEMAP_COMMIT -#if defined(__APPLE__) +#if defined(__APPLE__) // when overloading malloc, we still get mixed pointers sometimes on macOS; this avoids a bad access #define MI_DEFAULT_PAGEMAP_COMMIT 1 #else #define MI_DEFAULT_PAGEMAP_COMMIT 0 diff --git a/src/page-map.c b/src/page-map.c index 37ce3082..db14265b 100644 --- a/src/page-map.c +++ b/src/page-map.c @@ -187,7 +187,8 @@ bool _mi_page_map_init(void) { const size_t os_page_size = _mi_os_page_size(); const size_t page_map_size = _mi_align_up( page_map_count * sizeof(mi_page_t**), os_page_size); const size_t reserve_size = page_map_size + os_page_size; - const bool commit = page_map_size <= 64*MI_KiB || mi_option_is_enabled(mi_option_pagemap_commit); // _mi_os_has_overcommit(); // commit on-access on Linux systems? + const bool commit = page_map_size <= 64*MI_KiB || + mi_option_is_enabled(mi_option_pagemap_commit) || _mi_os_has_overcommit(); _mi_page_map = (mi_page_t***)_mi_os_alloc_aligned(reserve_size, 1, commit, true /* allow large */, &mi_page_map_memid); if (_mi_page_map==NULL) { _mi_error_message(ENOMEM, "unable to reserve virtual memory for the page map (%zu KiB)\n", page_map_size / MI_KiB); From 88d8ee964f818b09ccd56c078b90851c78cd9af2 Mon Sep 17 00:00:00 2001 From: daanx Date: Mon, 23 Dec 2024 15:04:06 -0800 Subject: [PATCH 2/5] remove is_large member (and use is_pinned for this) --- doc/mimalloc-doc.h | 7 +++---- include/mimalloc.h | 4 ++-- include/mimalloc/internal.h | 4 ++-- src/arena.c | 23 ++++++++++------------- 4 files changed, 17 insertions(+), 21 deletions(-) diff --git a/doc/mimalloc-doc.h b/doc/mimalloc-doc.h index e1c14b44..e9da9b90 100644 --- a/doc/mimalloc-doc.h +++ b/doc/mimalloc-doc.h @@ -431,12 +431,11 @@ int mi_reserve_os_memory(size_t size, bool commit, bool allow_large); /// @param start Start of the memory area /// @param size The size of the memory area. /// @param is_committed Is the area already committed? -/// @param is_large Does it consist of large OS pages? Set this to \a true as well for memory -/// that should not be decommitted or protected (like rdma etc.) +/// @param is_pinned Can the memory not be decommitted or reset? (usually the case for large OS pages) /// @param is_zero Does the area consists of zero's? /// @param numa_node Possible associated numa node or `-1`. /// @return \a true if successful, and \a false on error. -bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node); +bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_pinned, bool is_zero, int numa_node); /// Reserve \a pages of huge OS pages (1GiB) evenly divided over \a numa_nodes nodes, /// but stops after at most `timeout_msecs` seconds. @@ -589,7 +588,7 @@ void mi_subproc_add_current_thread(mi_subproc_id_t subproc); /// Allocate \a size bytes aligned by \a alignment. /// @param size number of bytes to allocate. -/// @param alignment the minimal alignment of the allocated memory. +/// @param alignment the minimal alignment of the allocated memory. /// @returns pointer to the allocated memory or \a NULL if out of memory, /// or if the alignment is not a power of 2 (including 0). The \a size is unrestricted /// (and does not have to be an integral multiple of the \a alignment). diff --git a/include/mimalloc.h b/include/mimalloc.h index 8bff8923..508e6aec 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -274,7 +274,7 @@ mi_decl_export int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa mi_decl_export int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept; mi_decl_export int mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noexcept; -mi_decl_export bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept; +mi_decl_export bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_pinned /* cannot decommit/reset? */, bool is_zero, int numa_node) mi_attr_noexcept; mi_decl_export void mi_debug_show_arenas(bool show_pages, bool show_inuse, bool show_committed) mi_attr_noexcept; @@ -283,7 +283,7 @@ typedef void* mi_arena_id_t; mi_decl_export void* mi_arena_area(mi_arena_id_t arena_id, size_t* size); mi_decl_export int mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_msecs, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept; mi_decl_export int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept; -mi_decl_export bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept; +mi_decl_export bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_pinned, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept; #if MI_MALLOC_VERSION >= 182 // Create a heap that only allocates in the specified arena diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 4cb54d6f..281f531a 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -143,8 +143,8 @@ mi_arena_id_t _mi_arena_id_none(void); mi_arena_t* _mi_arena_from_id(mi_arena_id_t id); bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_t* request_arena); -void* _mi_arenas_alloc(mi_subproc_t* subproc, size_t size, bool commit, bool allow_large, mi_arena_t* req_arena, size_t tseq, mi_memid_t* memid); -void* _mi_arenas_alloc_aligned(mi_subproc_t* subproc, size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_arena_t* req_arena, size_t tseq, mi_memid_t* memid); +void* _mi_arenas_alloc(mi_subproc_t* subproc, size_t size, bool commit, bool allow_pinned, mi_arena_t* req_arena, size_t tseq, mi_memid_t* memid); +void* _mi_arenas_alloc_aligned(mi_subproc_t* subproc, size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_pinned, mi_arena_t* req_arena, size_t tseq, mi_memid_t* memid); void _mi_arenas_free(void* p, size_t size, mi_memid_t memid); bool _mi_arenas_contain(const void* p); void _mi_arenas_collect(bool force_purge, mi_tld_t* tld); diff --git a/src/arena.c b/src/arena.c index 00ff3720..7b97fbbc 100644 --- a/src/arena.c +++ b/src/arena.c @@ -41,7 +41,6 @@ typedef struct mi_arena_s { size_t info_slices; // initial slices reserved for the arena bitmaps int numa_node; // associated NUMA node bool is_exclusive; // only allow allocations if specifically for this arena - bool is_large; // memory area consists of large- or huge OS pages (always committed) _Atomic(mi_msecs_t) purge_expire; // expiration time when slices can be purged from `slices_purge`. mi_bitmap_t* slices_free; // is the slice free? @@ -333,8 +332,8 @@ static bool mi_arena_reserve(mi_subproc_t* subproc, size_t req_size, bool allow_ Arena iteration ----------------------------------------------------------- */ -static inline bool mi_arena_is_suitable(mi_arena_t* arena, mi_arena_t* req_arena, int numa_node, bool allow_large) { - if (!allow_large && arena->is_large) return false; +static inline bool mi_arena_is_suitable(mi_arena_t* arena, mi_arena_t* req_arena, int numa_node, bool allow_pinned) { + if (!allow_pinned && arena->memid.is_pinned) return false; if (!mi_arena_id_is_suitable(arena, req_arena)) return false; if (req_arena == NULL) { // if not specific, check numa affinity const bool numa_suitable = (numa_node < 0 || arena->numa_node < 0 || arena->numa_node == numa_node); @@ -1104,7 +1103,7 @@ static mi_bitmap_t* mi_arena_bitmap_init(size_t slice_count, uint8_t** base) { } -static bool mi_manage_os_memory_ex2(mi_subproc_t* subproc, void* start, size_t size, bool is_large, int numa_node, bool exclusive, mi_memid_t memid, mi_arena_id_t* arena_id) mi_attr_noexcept +static bool mi_manage_os_memory_ex2(mi_subproc_t* subproc, void* start, size_t size, int numa_node, bool exclusive, mi_memid_t memid, mi_arena_id_t* arena_id) mi_attr_noexcept { mi_assert(!is_large || (memid.initially_committed && memid.is_pinned)); mi_assert(_mi_is_aligned(start,MI_ARENA_SLICE_SIZE)); @@ -1154,8 +1153,7 @@ static bool mi_manage_os_memory_ex2(mi_subproc_t* subproc, void* start, size_t s arena->is_exclusive = exclusive; arena->slice_count = slice_count; arena->info_slices = info_slices; - arena->numa_node = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1) - arena->is_large = is_large; + arena->numa_node = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1) arena->purge_expire = 0; // mi_lock_init(&arena->abandoned_visit_lock); @@ -1190,14 +1188,14 @@ static bool mi_manage_os_memory_ex2(mi_subproc_t* subproc, void* start, size_t s } -bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept { +bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_pinned, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept { mi_memid_t memid = _mi_memid_create(MI_MEM_EXTERNAL); memid.mem.os.base = start; memid.mem.os.size = size; memid.initially_committed = is_committed; memid.initially_zero = is_zero; - memid.is_pinned = is_large; - return mi_manage_os_memory_ex2(_mi_subproc(), start, size, is_large, numa_node, exclusive, memid, arena_id); + memid.is_pinned = is_pinned; + return mi_manage_os_memory_ex2(_mi_subproc(), start, size, numa_node, exclusive, memid, arena_id); } // Reserve a range of regular OS memory @@ -1207,13 +1205,12 @@ static int mi_reserve_os_memory_ex2(mi_subproc_t* subproc, size_t size, bool com mi_memid_t memid; void* start = _mi_os_alloc_aligned(size, MI_ARENA_SLICE_ALIGN, commit, allow_large, &memid); if (start == NULL) return ENOMEM; - const bool is_large = memid.is_pinned; // todo: use separate is_large field? - if (!mi_manage_os_memory_ex2(subproc, start, size, is_large, -1 /* numa node */, exclusive, memid, arena_id)) { + if (!mi_manage_os_memory_ex2(subproc, start, size, -1 /* numa node */, exclusive, memid, arena_id)) { _mi_os_free_ex(start, size, commit, memid); _mi_verbose_message("failed to reserve %zu KiB memory\n", _mi_divide_up(size, 1024)); return ENOMEM; } - _mi_verbose_message("reserved %zu KiB memory%s\n", _mi_divide_up(size, 1024), is_large ? " (in large os pages)" : ""); + _mi_verbose_message("reserved %zu KiB memory%s\n", _mi_divide_up(size, 1024), memid.is_pinned ? " (in large os pages)" : ""); // mi_debug_show_arenas(true, true, false); return 0; @@ -1373,7 +1370,7 @@ int mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_m } _mi_verbose_message("numa node %i: reserved %zu GiB huge pages (of the %zu GiB requested)\n", numa_node, pages_reserved, pages); - if (!mi_manage_os_memory_ex2(_mi_subproc(), p, hsize, true, numa_node, exclusive, memid, arena_id)) { + if (!mi_manage_os_memory_ex2(_mi_subproc(), p, hsize, numa_node, exclusive, memid, arena_id)) { _mi_os_free(p, hsize, memid); return ENOMEM; } From b515a0ad4c58f1e264213f22998c628470746bc1 Mon Sep 17 00:00:00 2001 From: daanx Date: Mon, 23 Dec 2024 16:28:34 -0800 Subject: [PATCH 3/5] add _mi_os_guard_page_size --- include/mimalloc/internal.h | 8 ++++++ include/mimalloc/types.h | 7 +++-- src/arena-meta.c | 26 +++++++----------- src/arena.c | 38 +++++++++++--------------- src/os.c | 54 ++++++++++++++++++++++++++++++++++++- 5 files changed, 91 insertions(+), 42 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 281f531a..7c49d590 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -116,6 +116,7 @@ void _mi_os_free(void* p, size_t size, mi_memid_t memid); void _mi_os_free_ex(void* p, size_t size, bool still_committed, mi_memid_t memid); size_t _mi_os_page_size(void); +size_t _mi_os_guard_page_size(void); size_t _mi_os_good_alloc_size(size_t size); bool _mi_os_has_overcommit(void); bool _mi_os_has_virtual_reserve(void); @@ -129,6 +130,13 @@ bool _mi_os_unprotect(void* addr, size_t size); bool _mi_os_purge(void* p, size_t size); bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset); +size_t _mi_os_secure_guard_page_size(void); +bool _mi_os_secure_guard_page_set_at(void* addr, bool is_pinned); +bool _mi_os_secure_guard_page_set_before(void* addr, bool is_pinned); +bool _mi_os_secure_guard_page_reset_at(void* addr); +bool _mi_os_secure_guard_page_reset_before(void* addr); + + void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, mi_memid_t* memid); void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_memid_t* memid); diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index 84179458..c2ce4a26 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -46,8 +46,12 @@ terms of the MIT license. A copy of the license can be found in the file // Define MI_STAT as 1 to maintain statistics; set it to 2 to have detailed statistics (but costs some performance). // #define MI_STAT 1 -// Define MI_SECURE to enable security mitigations. The lowest two have minimal performance impact: +// Define MI_SECURE to enable security mitigations. Level 1 has minimal performance impact, +// but protects most metadata with guard pages: // #define MI_SECURE 1 // guard page around metadata +// +// Level 2 has more performance impact but protect well against various buffer overflows +// by surrounding all mimalloc pages with guard pages: // #define MI_SECURE 2 // guard page around each mimalloc page (can fragment VMA's with large heaps..) // // The next two levels can have more performance cost: @@ -126,7 +130,6 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_MEDIUM_PAGE_SIZE (8*MI_SMALL_PAGE_SIZE) // 512 KiB (=byte in the bitmap) #define MI_LARGE_PAGE_SIZE (MI_SIZE_SIZE*MI_MEDIUM_PAGE_SIZE) // 4 MiB (=word in the bitmap) - // Maximum number of size classes. (spaced exponentially in 12.5% increments) #define MI_BIN_HUGE (73U) #define MI_BIN_FULL (MI_BIN_HUGE+1) diff --git a/src/arena-meta.c b/src/arena-meta.c index 34be6e0e..c8c0cac6 100644 --- a/src/arena-meta.c +++ b/src/arena-meta.c @@ -25,12 +25,6 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_META_PAGE_SIZE MI_ARENA_SLICE_SIZE #define MI_META_PAGE_ALIGN MI_ARENA_SLICE_ALIGN -#if MI_SECURE -#define MI_META_PAGE_GUARD_SIZE (4*MI_KiB) -#else -#define MI_META_PAGE_GUARD_SIZE (0) -#endif - #define MI_META_BLOCK_SIZE (128) // large enough such that META_MAX_SIZE > 4k (even on 32-bit) #define MI_META_BLOCK_ALIGN MI_META_BLOCK_SIZE #define MI_META_BLOCKS_PER_PAGE (MI_ARENA_SLICE_SIZE / MI_META_BLOCK_SIZE) // 1024 @@ -47,7 +41,7 @@ static mi_decl_cache_align _Atomic(mi_meta_page_t*) mi_meta_pages = MI_ATOMIC_V #if MI_DEBUG > 1 static mi_meta_page_t* mi_meta_page_of_ptr(void* p, size_t* block_idx) { - mi_meta_page_t* mpage = (mi_meta_page_t*)((uint8_t*)mi_align_down_ptr(p,MI_META_PAGE_ALIGN) + MI_META_PAGE_GUARD_SIZE); + mi_meta_page_t* mpage = (mi_meta_page_t*)((uint8_t*)mi_align_down_ptr(p,MI_META_PAGE_ALIGN) + _mi_os_secure_guard_page_size()); if (block_idx != NULL) { *block_idx = ((uint8_t*)p - (uint8_t*)mpage) / MI_META_BLOCK_SIZE; } @@ -60,9 +54,9 @@ static mi_meta_page_t* mi_meta_page_next( mi_meta_page_t* mpage ) { } static void* mi_meta_block_start( mi_meta_page_t* mpage, size_t block_idx ) { - mi_assert_internal(_mi_is_aligned((uint8_t*)mpage - MI_META_PAGE_GUARD_SIZE, MI_META_PAGE_ALIGN)); + mi_assert_internal(_mi_is_aligned((uint8_t*)mpage - _mi_os_secure_guard_page_size(), MI_META_PAGE_ALIGN)); mi_assert_internal(block_idx < MI_META_BLOCKS_PER_PAGE); - void* p = ((uint8_t*)mpage - MI_META_PAGE_GUARD_SIZE + (block_idx * MI_META_BLOCK_SIZE)); + void* p = ((uint8_t*)mpage - _mi_os_secure_guard_page_size() + (block_idx * MI_META_BLOCK_SIZE)); mi_assert_internal(mpage == mi_meta_page_of_ptr(p,NULL)); return p; } @@ -82,20 +76,18 @@ static mi_meta_page_t* mi_meta_page_zalloc(void) { } // guard pages - #if MI_SECURE - if (!memid.is_pinned) { - _mi_os_decommit(base, MI_META_PAGE_GUARD_SIZE); - _mi_os_decommit(base + MI_META_PAGE_SIZE - MI_META_PAGE_GUARD_SIZE, MI_META_PAGE_GUARD_SIZE); - } + #if MI_SECURE >= 1 + _mi_os_secure_guard_page_set_at(base, memid.is_pinned); + _mi_os_secure_guard_page_set_before(base + MI_META_PAGE_SIZE, memid.is_pinned); #endif - + // initialize the page and free block bitmap - mi_meta_page_t* mpage = (mi_meta_page_t*)(base + MI_META_PAGE_GUARD_SIZE); + mi_meta_page_t* mpage = (mi_meta_page_t*)(base + _mi_os_secure_guard_page_size()); mpage->memid = memid; mi_bitmap_init(&mpage->blocks_free, MI_META_BLOCKS_PER_PAGE, true /* already_zero */); const size_t mpage_size = offsetof(mi_meta_page_t,blocks_free) + mi_bitmap_size(MI_META_BLOCKS_PER_PAGE, NULL); const size_t info_blocks = _mi_divide_up(mpage_size,MI_META_BLOCK_SIZE); - const size_t guard_blocks = _mi_divide_up(MI_META_PAGE_GUARD_SIZE, MI_META_BLOCK_SIZE); + const size_t guard_blocks = _mi_divide_up(_mi_os_secure_guard_page_size(), MI_META_BLOCK_SIZE); mi_assert_internal(info_blocks + 2*guard_blocks < MI_META_BLOCKS_PER_PAGE); mi_bitmap_unsafe_setN(&mpage->blocks_free, info_blocks + guard_blocks, MI_META_BLOCKS_PER_PAGE - info_blocks - 2*guard_blocks); diff --git a/src/arena.c b/src/arena.c index 7b97fbbc..3349abb1 100644 --- a/src/arena.c +++ b/src/arena.c @@ -576,12 +576,6 @@ static mi_page_t* mi_arenas_page_try_find_abandoned(mi_subproc_t* subproc, size_ return NULL; } -#if MI_SECURE < 2 -#define MI_ARENA_GUARD_PAGE_SIZE (0) -#else -#define MI_ARENA_GUARD_PAGE_SIZE (4*MI_KiB) -#endif - // Allocate a fresh page static mi_page_t* mi_arenas_page_alloc_fresh(mi_subproc_t* subproc, size_t slice_count, size_t block_size, size_t block_alignment, mi_arena_t* req_arena, size_t tseq) @@ -621,11 +615,14 @@ static mi_page_t* mi_arenas_page_alloc_fresh(mi_subproc_t* subproc, size_t slice mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN)); mi_assert_internal(!os_align || _mi_is_aligned((uint8_t*)page + page_alignment, block_alignment)); - // guard page at the end - const size_t page_noguard_size = mi_size_of_slices(slice_count) - MI_ARENA_GUARD_PAGE_SIZE; - #if MI_SECURE >= 2 - if (memid.initially_committed && !memid.is_pinned) { - _mi_os_decommit((uint8_t*)page + page_noguard_size, MI_ARENA_GUARD_PAGE_SIZE); + // guard page at the end of mimalloc page? + #if MI_SECURE < 2 + const size_t page_noguard_size = mi_size_of_slices(slice_count); + #else + mi_assert(mi_size_of_slices(slice_count) > _mi_os_secure_guard_page_size()); + const size_t page_noguard_size = mi_size_of_slices(slice_count) - _mi_os_secure_guard_page_size(); + if (memid.initially_committed) { + _mi_os_secure_guard_page_set_at((uint8_t*)page + page_noguard_size, memid.is_pinned); } #endif @@ -795,7 +792,7 @@ void _mi_arenas_page_free(mi_page_t* page) { // we must do this since we may later allocate large spans over this page and cannot have a guard page in between #if MI_SECURE >= 2 if (!page->memid.is_pinned) { - _mi_os_commit((uint8_t*)page + mi_memid_size(page->memid) - MI_ARENA_GUARD_PAGE_SIZE, MI_ARENA_GUARD_PAGE_SIZE, NULL); + _mi_os_secure_guard_page_reset_before((uint8_t*)page + mi_memid_size(page->memid)); } #endif @@ -1089,7 +1086,7 @@ static size_t mi_arena_info_slices_needed(size_t slice_count, size_t* bitmap_bas const size_t size = base_size + bitmaps_size; const size_t os_page_size = _mi_os_page_size(); - const size_t info_size = _mi_align_up(size, os_page_size) + MI_ARENA_GUARD_PAGE_SIZE; + const size_t info_size = _mi_align_up(size, os_page_size) + _mi_os_secure_guard_page_size(); const size_t info_slices = mi_slice_count_of_size(info_size); if (bitmap_base != NULL) *bitmap_base = base_size; @@ -1105,7 +1102,6 @@ static mi_bitmap_t* mi_arena_bitmap_init(size_t slice_count, uint8_t** base) { static bool mi_manage_os_memory_ex2(mi_subproc_t* subproc, void* start, size_t size, int numa_node, bool exclusive, mi_memid_t memid, mi_arena_id_t* arena_id) mi_attr_noexcept { - mi_assert(!is_large || (memid.initially_committed && memid.is_pinned)); mi_assert(_mi_is_aligned(start,MI_ARENA_SLICE_SIZE)); mi_assert(start!=NULL); if (start==NULL) return false; @@ -1134,17 +1130,15 @@ static bool mi_manage_os_memory_ex2(mi_subproc_t* subproc, void* start, size_t s // commit & zero if needed if (!memid.initially_committed) { - // if MI_SECURE, leave a guard OS page decommitted at the end - _mi_os_commit(arena, mi_size_of_slices(info_slices) - MI_ARENA_GUARD_PAGE_SIZE, NULL); + // leave a guard OS page decommitted at the end + _mi_os_commit(arena, mi_size_of_slices(info_slices) - _mi_os_secure_guard_page_size(), NULL); } - else if (!memid.is_pinned) { - #if MI_SECURE > 0 - // if MI_SECURE, decommit a guard OS page at the end of the arena info - _mi_os_decommit((uint8_t*)arena + mi_size_of_slices(info_slices) - MI_ARENA_GUARD_PAGE_SIZE, MI_ARENA_GUARD_PAGE_SIZE); - #endif + else { + // if MI_SECURE, set a guard page at the end + _mi_os_secure_guard_page_set_before((uint8_t*)arena + mi_size_of_slices(info_slices), memid.is_pinned); } if (!memid.initially_zero) { - _mi_memzero(arena, mi_size_of_slices(info_slices) - MI_ARENA_GUARD_PAGE_SIZE); + _mi_memzero(arena, mi_size_of_slices(info_slices) - _mi_os_secure_guard_page_size()); } // init diff --git a/src/os.c b/src/os.c index 80d44d12..399aac6c 100644 --- a/src/os.c +++ b/src/os.c @@ -61,8 +61,16 @@ size_t _mi_os_large_page_size(void) { return (mi_os_mem_config.large_page_size != 0 ? mi_os_mem_config.large_page_size : _mi_os_page_size()); } +size_t _mi_os_guard_page_size(void) { + const size_t gsize = _mi_os_page_size(); + mi_assert(gsize <= (MI_ARENA_SLICE_SIZE/8)); + return gsize; +} + size_t _mi_os_virtual_address_bits(void) { - return mi_os_mem_config.virtual_address_bits; + const size_t vbits = mi_os_mem_config.virtual_address_bits; + mi_assert(vbits <= MI_MAX_VABITS); + return vbits; } bool _mi_os_use_large_page(size_t size, size_t alignment) { @@ -99,6 +107,50 @@ void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size) { return NULL; } +// In secure mode, return the size of a guard page, otherwise 0 +size_t _mi_os_secure_guard_page_size(void) { + #if MI_SECURE > 0 + return _mi_os_guard_page_size(); + #else + return 0; + #endif +} + +// In secure mode, try to decommit an area and output a warning if this fails. +bool _mi_os_secure_guard_page_set_at(void* addr, bool is_pinned) { + if (addr == NULL) return true; + #if MI_SECURE > 0 + const bool ok = (is_pinned ? false : _mi_os_decommit(addr, _mi_os_secure_guard_page_size())); + if (!ok) { + _mi_error_message(EINVAL, "secure level %d, but failed to commit guard page (at %p of size %zu)\n", MI_SECURE, addr, _mi_os_secure_guard_page_size()); + } + return ok; + #else + MI_UNUSED(is_pinned); + return true; + #endif +} + +// In secure mode, try to decommit an area and output a warning if this fails. +bool _mi_os_secure_guard_page_set_before(void* addr, bool is_pinned) { + return _mi_os_secure_guard_page_set_at((uint8_t*)addr - _mi_os_secure_guard_page_size(), is_pinned); +} + +// In secure mode, try to recommit an area +bool _mi_os_secure_guard_page_reset_at(void* addr) { + if (addr == NULL) return true; + #if MI_SECURE > 0 + return _mi_os_commit(addr, _mi_os_secure_guard_page_size(), NULL); + #else + return true; + #endif +} + +// In secure mode, try to recommit an area +bool _mi_os_secure_guard_page_reset_before(void* addr) { + return _mi_os_secure_guard_page_reset_at((uint8_t*)addr - _mi_os_secure_guard_page_size()); +} + /* ----------------------------------------------------------- Free memory From c65c6d83bd0a1c3d00bcbe8ce4fc1bc10ddc947e Mon Sep 17 00:00:00 2001 From: daanx Date: Mon, 23 Dec 2024 16:31:42 -0800 Subject: [PATCH 4/5] fix guard page size --- ide/vs2022/mimalloc.vcxproj | 2 +- src/arena.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ide/vs2022/mimalloc.vcxproj b/ide/vs2022/mimalloc.vcxproj index 87e866bb..63bc7d1d 100644 --- a/ide/vs2022/mimalloc.vcxproj +++ b/ide/vs2022/mimalloc.vcxproj @@ -190,7 +190,7 @@ true Default ../../include - MI_DEBUG=3;MI_GUARDED=0;%(PreprocessorDefinitions); + MI_DEBUG=3;MI_GUARDED=0;MI_SECURE=4;%(PreprocessorDefinitions); CompileAsCpp false stdcpp20 diff --git a/src/arena.c b/src/arena.c index 3349abb1..9ae44d85 100644 --- a/src/arena.c +++ b/src/arena.c @@ -720,10 +720,10 @@ static mi_page_t* mi_arenas_page_singleton_alloc(mi_heap_t* heap, size_t block_s mi_tld_t* const tld = heap->tld; const bool os_align = (block_alignment > MI_PAGE_MAX_OVERALLOC_ALIGN); const size_t info_size = (os_align ? MI_PAGE_ALIGN : mi_page_info_size()); - #if MI_ARENA_GUARD_PAGE_SIZE == 0 + #if MI_SECURE < 2 const size_t slice_count = mi_slice_count_of_size(info_size + block_size); #else - const size_t slice_count = mi_slice_count_of_size(_mi_align_up(info_size + block_size, MI_ARENA_GUARD_PAGE_SIZE) + MI_ARENA_GUARD_PAGE_SIZE); + const size_t slice_count = mi_slice_count_of_size(_mi_align_up(info_size + block_size, _mi_os_secure_guard_page_size()) + _mi_os_secure_guard_page_size()); #endif mi_page_t* page = mi_arenas_page_alloc_fresh(tld->subproc, slice_count, block_size, block_alignment, req_arena, tld->thread_seq); From 9bad269c518a4104ac13584bc9474e0e357efd1c Mon Sep 17 00:00:00 2001 From: daanx Date: Mon, 23 Dec 2024 16:47:01 -0800 Subject: [PATCH 5/5] fix purge delay check for arenas --- src/arena.c | 2 +- src/options.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/arena.c b/src/arena.c index 9ae44d85..af0d1d0a 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1551,7 +1551,7 @@ static void mi_arenas_try_purge(bool force, bool visit_all, mi_tld_t* tld) mi_subproc_t* subproc = tld->subproc; const mi_msecs_t now = _mi_clock_now(); mi_msecs_t arenas_expire = mi_atomic_load_acquire(&subproc->purge_expire); - if (!force && (arenas_expire == 0 || arenas_expire < now)) return; + if (!force && (arenas_expire == 0 || arenas_expire > now)) return; const size_t max_arena = mi_arenas_get_count(subproc); if (max_arena == 0) return; diff --git a/src/options.c b/src/options.c index 7562cd46..63d8a68f 100644 --- a/src/options.c +++ b/src/options.c @@ -144,7 +144,7 @@ static mi_option_desc_t options[_mi_option_last] = #else { 1, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed (but per page in the segment on demand) #endif - { 1000,UNINIT, MI_OPTION_LEGACY(purge_delay,reset_delay) }, // purge delay in milli-seconds + { 500, UNINIT, MI_OPTION_LEGACY(purge_delay,reset_delay) }, // purge delay in milli-seconds { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. { 0, UNINIT, MI_OPTION_LEGACY(disallow_os_alloc,limit_os_alloc) }, // 1 = do not use OS memory for allocation (but only reserved arenas) { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose