From a527f751619303c89c8967f64ed75015839d0a56 Mon Sep 17 00:00:00 2001 From: Daan Date: Mon, 22 Apr 2024 10:10:20 -0700 Subject: [PATCH 1/3] add disallow_arena_alloc option --- include/mimalloc.h | 52 ++++++++++++++++++++++++---------------------- src/arena.c | 30 +++++++++++++------------- src/options.c | 7 ++++--- 3 files changed, 47 insertions(+), 42 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index b3f60a34..9848d531 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -317,41 +317,43 @@ mi_decl_export int mi_reserve_huge_os_pages(size_t pages, double max_secs, size typedef enum mi_option_e { // stable options - mi_option_show_errors, // print error messages - mi_option_show_stats, // print statistics on termination - mi_option_verbose, // print verbose messages - // the following options are experimental (see src/options.h) - mi_option_eager_commit, // eager commit segments? (after `eager_commit_delay` segments) (=1) - mi_option_arena_eager_commit, // eager commit arenas? Use 2 to enable just on overcommit systems (=2) - mi_option_purge_decommits, // should a memory purge decommit (or only reset) (=1) - mi_option_allow_large_os_pages, // allow large (2MiB) OS pages, implies eager commit - mi_option_reserve_huge_os_pages, // reserve N huge OS pages (1GiB/page) at startup - mi_option_reserve_huge_os_pages_at, // reserve huge OS pages at a specific NUMA node - mi_option_reserve_os_memory, // reserve specified amount of OS memory in an arena at startup + mi_option_show_errors, // print error messages + mi_option_show_stats, // print statistics on termination + mi_option_verbose, // print verbose messages + // advanced options + mi_option_eager_commit, // eager commit segments? (after `eager_commit_delay` segments) (=1) + mi_option_arena_eager_commit, // eager commit arenas? Use 2 to enable just on overcommit systems (=2) + mi_option_purge_decommits, // should a memory purge decommit? (=1). Set to 0 to use memory reset on a purge (instead of decommit) + mi_option_allow_large_os_pages, // allow large (2 or 4 MiB) OS pages, implies eager commit. If false, also disables THP for the process. + mi_option_reserve_huge_os_pages, // reserve N huge OS pages (1GiB pages) at startup + mi_option_reserve_huge_os_pages_at, // reserve huge OS pages at a specific NUMA node + mi_option_reserve_os_memory, // reserve specified amount of OS memory in an arena at startup mi_option_deprecated_segment_cache, mi_option_deprecated_page_reset, - mi_option_abandoned_page_purge, // immediately purge delayed purges on thread termination + mi_option_abandoned_page_purge, // immediately purge delayed purges on thread termination mi_option_deprecated_segment_reset, - mi_option_eager_commit_delay, - mi_option_purge_delay, // memory purging is delayed by N milli seconds; use 0 for immediate purging or -1 for no purging at all. - mi_option_use_numa_nodes, // 0 = use all available numa nodes, otherwise use at most N nodes. - mi_option_limit_os_alloc, // 1 = do not use OS memory for allocation (but only programmatically reserved arenas) - mi_option_os_tag, // tag used for OS logging (macOS only for now) - mi_option_max_errors, // issue at most N error messages - mi_option_max_warnings, // issue at most N warning messages - mi_option_max_segment_reclaim, - mi_option_destroy_on_exit, // if set, release all memory on exit; sometimes used for dynamic unloading but can be unsafe. - mi_option_arena_reserve, // initial memory size in KiB for arena reservation (1GiB on 64-bit) - mi_option_arena_purge_mult, + mi_option_eager_commit_delay, // the first N segments per thread are not eagerly committed (but per page in the segment on demand) + mi_option_purge_delay, // memory purging is delayed by N milli seconds; use 0 for immediate purging or -1 for no purging at all. (=10) + mi_option_use_numa_nodes, // 0 = use all available numa nodes, otherwise use at most N nodes. + mi_option_disallow_os_alloc, // 1 = do not use OS memory for allocation (but only programmatically reserved arenas) + mi_option_os_tag, // tag used for OS logging (macOS only for now) (=100) + mi_option_max_errors, // issue at most N error messages + mi_option_max_warnings, // issue at most N warning messages + mi_option_max_segment_reclaim, // max. percentage of the abandoned segments can be reclaimed per try (=10%) + mi_option_destroy_on_exit, // if set, release all memory on exit; sometimes used for dynamic unloading but can be unsafe + mi_option_arena_reserve, // initial memory size in KiB for arena reservation (= 1 GiB on 64-bit) + mi_option_arena_purge_mult, // multiplier for `purge_delay` for the purging delay for arenas (=10) mi_option_purge_extend_delay, - mi_option_abandoned_reclaim_on_free, // reclaim abandoned segments on a free + mi_option_abandoned_reclaim_on_free, // allow to reclaim an abandoned segment on a free (=1) + mi_option_disallow_arena_alloc, // 1 = do not use arena's for allocation (except if using specific arena id's) _mi_option_last, // legacy option names mi_option_large_os_pages = mi_option_allow_large_os_pages, mi_option_eager_region_commit = mi_option_arena_eager_commit, mi_option_reset_decommits = mi_option_purge_decommits, mi_option_reset_delay = mi_option_purge_delay, - mi_option_abandoned_page_reset = mi_option_abandoned_page_purge + mi_option_abandoned_page_reset = mi_option_abandoned_page_purge, + mi_option_limit_os_alloc = mi_option_disallow_os_alloc } mi_option_t; diff --git a/src/arena.c b/src/arena.c index fccab871..511fe2fd 100644 --- a/src/arena.c +++ b/src/arena.c @@ -353,7 +353,7 @@ static bool mi_arena_reserve(size_t req_size, bool allow_large, mi_arena_id_t re if (arena_reserve == 0) return false; if (!_mi_os_has_virtual_reserve()) { - arena_reserve = arena_reserve/4; // be conservative if virtual reserve is not supported (for some embedded systems for example) + arena_reserve = arena_reserve/4; // be conservative if virtual reserve is not supported (for WASM for example) } arena_reserve = _mi_align_up(arena_reserve, MI_ARENA_BLOCK_SIZE); if (arena_count >= 8 && arena_count <= 128) { @@ -366,7 +366,7 @@ static bool mi_arena_reserve(size_t req_size, bool allow_large, mi_arena_id_t re if (mi_option_get(mi_option_arena_eager_commit) == 2) { arena_commit = _mi_os_has_overcommit(); } else if (mi_option_get(mi_option_arena_eager_commit) == 1) { arena_commit = true; } - return (mi_reserve_os_memory_ex(arena_reserve, arena_commit, allow_large, false /* exclusive */, arena_id) == 0); + return (mi_reserve_os_memory_ex(arena_reserve, arena_commit, allow_large, false /* exclusive? */, arena_id) == 0); } @@ -380,24 +380,26 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset const int numa_node = _mi_os_numa_node(tld); // current numa node // try to allocate in an arena if the alignment is small enough and the object is not too small (as for heap meta data) - if (size >= MI_ARENA_MIN_OBJ_SIZE && alignment <= MI_SEGMENT_ALIGN && align_offset == 0) { - void* p = mi_arena_try_alloc(numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld); - if (p != NULL) return p; + if (!mi_option_is_enabled(mi_option_disallow_arena_alloc) || req_arena_id != _mi_arena_id_none()) { // is arena allocation allowed? + if (size >= MI_ARENA_MIN_OBJ_SIZE && alignment <= MI_SEGMENT_ALIGN && align_offset == 0) { + void* p = mi_arena_try_alloc(numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld); + if (p != NULL) return p; - // otherwise, try to first eagerly reserve a new arena - if (req_arena_id == _mi_arena_id_none()) { - mi_arena_id_t arena_id = 0; - if (mi_arena_reserve(size, allow_large, req_arena_id, &arena_id)) { - // and try allocate in there - mi_assert_internal(req_arena_id == _mi_arena_id_none()); - p = mi_arena_try_alloc_at_id(arena_id, true, numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld); - if (p != NULL) return p; + // otherwise, try to first eagerly reserve a new arena + if (req_arena_id == _mi_arena_id_none()) { + mi_arena_id_t arena_id = 0; + if (mi_arena_reserve(size, allow_large, req_arena_id, &arena_id)) { + // and try allocate in there + mi_assert_internal(req_arena_id == _mi_arena_id_none()); + p = mi_arena_try_alloc_at_id(arena_id, true, numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld); + if (p != NULL) return p; + } } } } // if we cannot use OS allocation, return NULL - if (mi_option_is_enabled(mi_option_limit_os_alloc) || req_arena_id != _mi_arena_id_none()) { + if (mi_option_is_enabled(mi_option_disallow_os_alloc) || req_arena_id != _mi_arena_id_none()) { errno = ENOMEM; return NULL; } diff --git a/src/options.c b/src/options.c index f8e928d0..78e9377c 100644 --- a/src/options.c +++ b/src/options.c @@ -65,7 +65,7 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION_LEGACY(allow_large_os_pages,large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, // per 1GiB huge pages {-1, UNINIT, MI_OPTION(reserve_huge_os_pages_at) }, // reserve huge pages at node N - { 0, UNINIT, MI_OPTION(reserve_os_memory) }, + { 0, UNINIT, MI_OPTION(reserve_os_memory) }, // reserve OS memory in advance { 0, UNINIT, MI_OPTION(deprecated_segment_cache) }, // cache N segments per thread { 0, UNINIT, MI_OPTION(deprecated_page_reset) }, // reset page memory on free { 0, UNINIT, MI_OPTION(abandoned_page_purge) }, // purge free page memory when a thread terminates @@ -77,7 +77,7 @@ static mi_option_desc_t options[_mi_option_last] = #endif { 10, UNINIT, MI_OPTION_LEGACY(purge_delay,reset_delay) }, // purge delay in milli-seconds { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. - { 0, UNINIT, MI_OPTION(limit_os_alloc) }, // 1 = do not use OS memory for allocation (but only reserved arenas) + { 0, UNINIT, MI_OPTION_LEGACY(disallow_os_alloc,limit_os_alloc) }, // 1 = do not use OS memory for allocation (but only reserved arenas) { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose { 16, UNINIT, MI_OPTION(max_errors) }, // maximum errors that are output { 16, UNINIT, MI_OPTION(max_warnings) }, // maximum warnings that are output @@ -91,7 +91,8 @@ static mi_option_desc_t options[_mi_option_last] = { 10, UNINIT, MI_OPTION(arena_purge_mult) }, // purge delay multiplier for arena's { 1, UNINIT, MI_OPTION_LEGACY(purge_extend_delay, decommit_extend_delay) }, - { 1, UNINIT, MI_OPTION(abandoned_reclaim_on_free) }, // reclaim an abandoned segment on a free + { 1, UNINIT, MI_OPTION(abandoned_reclaim_on_free) },// reclaim an abandoned segment on a free + { 0, UNINIT, MI_OPTION(disallow_arena_alloc) }, // 1 = do not use arena's for allocation (except if using specific arena id's) }; static void mi_option_init(mi_option_desc_t* desc); From c469e3d519e2b5da8876d22d644240f67b47933f Mon Sep 17 00:00:00 2001 From: Daan Date: Mon, 22 Apr 2024 10:32:53 -0700 Subject: [PATCH 2/3] add release notes --- readme.md | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/readme.md b/readme.md index 4dea1086..00e1c389 100644 --- a/readme.md +++ b/readme.md @@ -80,6 +80,13 @@ Note: the `v2.x` version has a new algorithm for managing internal mimalloc page and fragmentation compared to mimalloc `v1.x` (especially for large workloads). Should otherwise have similar performance (see [below](#performance)); please report if you observe any significant performance regression. +* 2024-04-22, `v1.8.4`, `v2.1.4`: Fixes various bugs and build issues. Improved performance on aligned allocation. + Free-ing code is refactored into a separate module (`free.c`). New approach to collection of abandoned segments: When + a thread terminates the segments it owns are abandoned (containing still live objects) and these can be + reclaimed by other threads. We no longer use a list of abandoned segments but this is now done using bitmaps in arena's + which is more concurrent (and more aggressive). Abandoned memory can now also be reclaimed if a thread frees an object in + an abandoned page (which can be disabled using `mi_option_abandoned_reclaim_on_free`). + * 2023-04-24, `v1.8.2`, `v2.1.2`: Fixes build issues on freeBSD, musl, and C17 (UE 5.1.1). Reduce code size/complexity by removing regions and segment-cache's and only use arenas with improved memory purging -- this may improve memory usage as well for larger services. Renamed options for consistency. Improved Valgrind and ASAN checking. @@ -298,8 +305,9 @@ Further options for large workloads and services: at runtime. Setting `N` to 1 may avoid problems in some virtual environments. Also, setting it to a lower number than the actual NUMA nodes is fine and will only cause threads to potentially allocate more memory across actual NUMA nodes (but this can happen in any case as NUMA local allocation is always a best effort but not guaranteed). -- `MIMALLOC_ALLOW_LARGE_OS_PAGES=1`: use large OS pages (2MiB) when available; for some workloads this can significantly - improve performance. Use `MIMALLOC_VERBOSE` to check if the large OS pages are enabled -- usually one needs +- `MIMALLOC_ALLOW_LARGE_OS_PAGES=1`: use large OS pages (2 or 4MiB) when available; for some workloads this can significantly + improve performance. When this option is disabled it also disables transparent huge pages (THP) for the process + (on Linux and Android). Use `MIMALLOC_VERBOSE` to check if the large OS pages are enabled -- usually one needs to explicitly allow large OS pages (as on [Windows][windows-huge] and [Linux][linux-huge]). However, sometimes the OS is very slow to reserve contiguous physical memory for large OS pages so use with care on systems that can have fragmented memory (for that reason, we generally recommend to use `MIMALLOC_RESERVE_HUGE_OS_PAGES` instead whenever possible). From 96819a3f1d8262b900dbbce5e4d2b631a3ede7e6 Mon Sep 17 00:00:00 2001 From: Daan Date: Mon, 22 Apr 2024 11:00:42 -0700 Subject: [PATCH 3/3] update readme --- readme.md | 38 ++++++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/readme.md b/readme.md index 00e1c389..b92c7d33 100644 --- a/readme.md +++ b/readme.md @@ -80,12 +80,15 @@ Note: the `v2.x` version has a new algorithm for managing internal mimalloc page and fragmentation compared to mimalloc `v1.x` (especially for large workloads). Should otherwise have similar performance (see [below](#performance)); please report if you observe any significant performance regression. -* 2024-04-22, `v1.8.4`, `v2.1.4`: Fixes various bugs and build issues. Improved performance on aligned allocation. - Free-ing code is refactored into a separate module (`free.c`). New approach to collection of abandoned segments: When +* 2024-04-22, `v1.8.4`, `v2.1.4`: Fixes various bugs and build issues. Add `MI_LIBC_MUSL` cmake flag for musl builds. + Free-ing code is refactored into a separate module (`free.c`). Mimalloc page info is simplified with the block size + directly available (and new `block_size_shift` to improve aligned block free-ing). + New approach to collection of abandoned segments: When a thread terminates the segments it owns are abandoned (containing still live objects) and these can be reclaimed by other threads. We no longer use a list of abandoned segments but this is now done using bitmaps in arena's which is more concurrent (and more aggressive). Abandoned memory can now also be reclaimed if a thread frees an object in - an abandoned page (which can be disabled using `mi_option_abandoned_reclaim_on_free`). + an abandoned page (which can be disabled using `mi_option_abandoned_reclaim_on_free`). The option `mi_option_max_segment_reclaim` + gives a maximum percentage of abandoned segments that can be reclaimed per try (=10%). * 2023-04-24, `v1.8.2`, `v2.1.2`: Fixes build issues on freeBSD, musl, and C17 (UE 5.1.1). Reduce code size/complexity by removing regions and segment-cache's and only use arenas with improved memory purging -- this may improve memory @@ -151,7 +154,7 @@ mimalloc is used in various large scale low-latency services and programs, for e ## Windows -Open `ide/vs2019/mimalloc.sln` in Visual Studio 2019 and build. +Open `ide/vs2022/mimalloc.sln` in Visual Studio 2022 and build. The `mimalloc` project builds a static library (in `out/msvc-x64`), while the `mimalloc-override` project builds a DLL for overriding malloc in the entire program. @@ -287,17 +290,23 @@ You can set further options either programmatically (using [`mi_option_set`](htt Advanced options: +- `MIMALLOC_ARENA_EAGER_COMMIT=2`: turns on eager commit for the large arenas (usually 1GiB) from which mimalloc + allocates segments and pages. Set this to 2 (default) to + only enable this on overcommit systems (e.g. Linux). Set this to 1 to enable explicitly on other systems + as well (like Windows or macOS) which may improve performance (as the whole arena is committed at once). + Note that eager commit only increases the commit but not the actual the peak resident set + (rss) so it is generally ok to enable this. - `MIMALLOC_PURGE_DELAY=N`: the delay in `N` milli-seconds (by default `10`) after which mimalloc will purge OS pages that are not in use. This signals to the OS that the underlying physical memory can be reused which can reduce memory fragmentation especially in long running (server) programs. Setting `N` to `0` purges immediately when a page becomes unused which can improve memory usage but also decreases performance. Setting `N` to a higher value like `100` can improve performance (sometimes by a lot) at the cost of potentially using more memory at times. - Setting it to `-1` disables purging completely. -- `MIMALLOC_ARENA_EAGER_COMMIT=1`: turns on eager commit for the large arenas (usually 1GiB) from which mimalloc - allocates segments and pages. This is by default - only enabled on overcommit systems (e.g. Linux) but enabling it explicitly on other systems (like Windows or macOS) - may improve performance. Note that eager commit only increases the commit but not the actual the peak resident set - (rss) so it is generally ok to enable this. + Setting it to `-1` disables purging completely. +- `MIMALLOC_PURGE_DECOMMITS=1`: By default "purging" memory means unused memory is decommitted (`MEM_DECOMMIT` on Windows, + `MADV_DONTNEED` (which decresease rss immediately) on `mmap` systems). Set this to 0 to instead "reset" unused + memory on a purge (`MEM_RESET` on Windows, generally `MADV_FREE` (which does not decrease rss immediately) on `mmap` systems). + Mimalloc generally does not "free" OS memory but only "purges" OS memory, in other words, it tries to keep virtual + address ranges and decommits within those ranges (to make the underlying physical memory available to other processes). Further options for large workloads and services: @@ -306,9 +315,9 @@ Further options for large workloads and services: the actual NUMA nodes is fine and will only cause threads to potentially allocate more memory across actual NUMA nodes (but this can happen in any case as NUMA local allocation is always a best effort but not guaranteed). - `MIMALLOC_ALLOW_LARGE_OS_PAGES=1`: use large OS pages (2 or 4MiB) when available; for some workloads this can significantly - improve performance. When this option is disabled it also disables transparent huge pages (THP) for the process + improve performance. When this option is disabled, it also disables transparent huge pages (THP) for the process (on Linux and Android). Use `MIMALLOC_VERBOSE` to check if the large OS pages are enabled -- usually one needs - to explicitly allow large OS pages (as on [Windows][windows-huge] and [Linux][linux-huge]). However, sometimes + to explicitly give permissions for large OS pages (as on [Windows][windows-huge] and [Linux][linux-huge]). However, sometimes the OS is very slow to reserve contiguous physical memory for large OS pages so use with care on systems that can have fragmented memory (for that reason, we generally recommend to use `MIMALLOC_RESERVE_HUGE_OS_PAGES` instead whenever possible). - `MIMALLOC_RESERVE_HUGE_OS_PAGES=N`: where `N` is the number of 1GiB _huge_ OS pages. This reserves the huge pages at @@ -317,11 +326,12 @@ Further options for large workloads and services: OS pages, use with care as reserving contiguous physical memory can take a long time when memory is fragmented (but reserving the huge pages is done at startup only once). - Note that we usually need to explicitly enable huge OS pages (as on [Windows][windows-huge] and [Linux][linux-huge])). + Note that we usually need to explicitly give permission for huge OS pages (as on [Windows][windows-huge] and [Linux][linux-huge])). With huge OS pages, it may be beneficial to set the setting `MIMALLOC_EAGER_COMMIT_DELAY=N` (`N` is 1 by default) to delay the initial `N` segments (of 4MiB) of a thread to not allocate in the huge OS pages; this prevents threads that are short lived - and allocate just a little to take up space in the huge OS page area (which cannot be purged). + and allocate just a little to take up space in the huge OS page area (which cannot be purged as huge OS pages are pinned + to physical memory). The huge pages are usually allocated evenly among NUMA nodes. We can use `MIMALLOC_RESERVE_HUGE_OS_PAGES_AT=N` where `N` is the numa node (starting at 0) to allocate all the huge pages at a specific numa node instead.