mirror of
https://github.com/microsoft/mimalloc.git
synced 2025-05-03 22:19:32 +03:00
merge from dev
This commit is contained in:
commit
56c0a8025a
9 changed files with 78 additions and 68 deletions
|
@ -751,8 +751,8 @@ static inline mi_memid_t _mi_memid_none(void) {
|
|||
|
||||
static inline mi_memid_t _mi_memid_create_os(bool committed, bool is_zero, bool is_large) {
|
||||
mi_memid_t memid = _mi_memid_create(MI_MEM_OS);
|
||||
memid.was_committed = committed;
|
||||
memid.was_zero = is_zero;
|
||||
memid.initially_committed = committed;
|
||||
memid.initially_zero = is_zero;
|
||||
memid.is_pinned = is_large;
|
||||
return memid;
|
||||
}
|
||||
|
|
|
@ -387,24 +387,24 @@ static inline bool mi_memkind_is_os(mi_memkind_t memkind) {
|
|||
}
|
||||
|
||||
typedef struct mi_memid_os_info {
|
||||
void* base; // actual base address of the block (used for offset aligned allocations)
|
||||
size_t alignment; // alignment at allocation
|
||||
void* base; // actual base address of the block (used for offset aligned allocations)
|
||||
size_t alignment; // alignment at allocation
|
||||
} mi_memid_os_info_t;
|
||||
|
||||
typedef struct mi_memid_arena_info {
|
||||
size_t block_index; // index in the arena
|
||||
mi_arena_id_t id; // arena id (>= 1)
|
||||
bool is_exclusive; // the arena can only be used for specific arena allocations
|
||||
size_t block_index; // index in the arena
|
||||
mi_arena_id_t id; // arena id (>= 1)
|
||||
bool is_exclusive; // the arena can only be used for specific arena allocations
|
||||
} mi_memid_arena_info_t;
|
||||
|
||||
typedef struct mi_memid_s {
|
||||
union {
|
||||
mi_memid_os_info_t os; // only used for MI_MEM_OS
|
||||
mi_memid_arena_info_t arena;// only used for MI_MEM_ARENA
|
||||
mi_memid_os_info_t os; // only used for MI_MEM_OS
|
||||
mi_memid_arena_info_t arena; // only used for MI_MEM_ARENA
|
||||
} mem;
|
||||
bool is_pinned; // `true` if we cannot decommit/reset/protect in this memory (e.g. when allocated using large OS pages)
|
||||
bool was_committed; // `true` if the memory was originally allocated as committed
|
||||
bool was_zero; // `true` if the memory was originally zero initialized
|
||||
bool is_pinned; // `true` if we cannot decommit/reset/protect in this memory (e.g. when allocated using large OS pages)
|
||||
bool initially_committed;// `true` if the memory was originally allocated as committed
|
||||
bool initially_zero; // `true` if the memory was originally zero initialized
|
||||
mi_memkind_t memkind;
|
||||
} mi_memid_t;
|
||||
|
||||
|
|
78
readme.md
78
readme.md
|
@ -12,8 +12,8 @@ is a general purpose allocator with excellent [performance](#performance) charac
|
|||
Initially developed by Daan Leijen for the runtime systems of the
|
||||
[Koka](https://koka-lang.github.io) and [Lean](https://github.com/leanprover/lean) languages.
|
||||
|
||||
Latest release tag: `v2.1.1` (2023-04-03).
|
||||
Latest stable tag: `v1.8.1` (2023-04-03).
|
||||
Latest release tag: `v2.1.2` (2023-04-24).
|
||||
Latest stable tag: `v1.8.2` (2023-04-24).
|
||||
|
||||
mimalloc is a drop-in replacement for `malloc` and can be used in other programs
|
||||
without code changes, for example, on dynamically linked ELF-based systems (Linux, BSD, etc.) you can use it as:
|
||||
|
@ -43,7 +43,7 @@ It also includes a robust way to override the default allocator in [Windows](#ov
|
|||
and the chance of contending on a single location will be low -- this is quite
|
||||
similar to randomized algorithms like skip lists where adding
|
||||
a random oracle removes the need for a more complex algorithm.
|
||||
- __eager page reset__: when a "page" becomes empty (with increased chance
|
||||
- __eager page purging__: when a "page" becomes empty (with increased chance
|
||||
due to free list sharding) the memory is marked to the OS as unused (reset or decommitted)
|
||||
reducing (real) memory pressure and fragmentation, especially in long running
|
||||
programs.
|
||||
|
@ -78,6 +78,10 @@ Note: the `v2.x` version has a new algorithm for managing internal mimalloc page
|
|||
and fragmentation compared to mimalloc `v1.x` (especially for large workloads). Should otherwise have similar performance
|
||||
(see [below](#performance)); please report if you observe any significant performance regression.
|
||||
|
||||
* 2023-04-24, `v1.8.2`, `v2.1.2`: Fixes build issues on freeBSD, musl, and C17 (UE 5.1.1). Reduce code size/complexity
|
||||
by removing regions and segment-cache's and only use arenas with improved memory purging -- this may improve memory
|
||||
usage as well for larger services. Renamed options for consistency. Improved Valgrind and ASAN checking.
|
||||
|
||||
* 2023-04-03, `v1.8.1`, `v2.1.1`: Fixes build issues on some platforms.
|
||||
|
||||
* 2023-03-29, `v1.8.0`, `v2.1.0`: Improved support dynamic overriding on Windows 11. Improved tracing precision
|
||||
|
@ -105,20 +109,6 @@ Note: the `v2.x` version has a new algorithm for managing internal mimalloc page
|
|||
improved wasm support, faster aligned allocation,
|
||||
various small fixes.
|
||||
|
||||
* 2021-11-14, `v1.7.3`, `v2.0.3` (beta): improved WASM support, improved macOS support and performance (including
|
||||
M1), improved performance for v2 for large objects, Python integration improvements, more standard
|
||||
installation directories, various small fixes.
|
||||
|
||||
* 2021-06-17, `v1.7.2`, `v2.0.2` (beta): support M1, better installation layout on Linux, fix
|
||||
thread_id on Android, prefer 2-6TiB area for aligned allocation to work better on pre-windows 8, various small fixes.
|
||||
|
||||
* 2021-04-06, `v1.7.1`, `v2.0.1` (beta): fix bug in arena allocation for huge pages, improved aslr on large allocations, initial M1 support (still experimental).
|
||||
|
||||
* 2021-01-31, `v2.0.0`: beta release 2.0: new slice algorithm for managing internal mimalloc pages.
|
||||
|
||||
* 2021-01-31, `v1.7.0`: stable release 1.7: support explicit user provided memory regions, more precise statistics,
|
||||
improve macOS overriding, initial support for Apple M1, improved DragonFly support, faster memcpy on Windows, various small fixes.
|
||||
|
||||
* [Older release notes](#older-release-notes)
|
||||
|
||||
Special thanks to:
|
||||
|
@ -280,43 +270,48 @@ completely and redirect all calls to the _mimalloc_ library instead .
|
|||
|
||||
## Environment Options
|
||||
|
||||
You can set further options either programmatically (using [`mi_option_set`](https://microsoft.github.io/mimalloc/group__options.html)),
|
||||
or via environment variables:
|
||||
You can set further options either programmatically (using [`mi_option_set`](https://microsoft.github.io/mimalloc/group__options.html)), or via environment variables:
|
||||
|
||||
- `MIMALLOC_SHOW_STATS=1`: show statistics when the program terminates.
|
||||
- `MIMALLOC_VERBOSE=1`: show verbose messages.
|
||||
- `MIMALLOC_SHOW_ERRORS=1`: show error and warning messages.
|
||||
- `MIMALLOC_PAGE_RESET=0`: by default, mimalloc will reset (or purge) OS pages that are not in use, to signal to the OS
|
||||
that the underlying physical memory can be reused. This can reduce memory fragmentation in long running (server)
|
||||
programs. By setting it to `0` this will no longer be done which can improve performance for batch-like programs.
|
||||
As an alternative, the `MIMALLOC_RESET_DELAY=`<msecs> can be set higher (100ms by default) to make the page
|
||||
reset occur less frequently instead of turning it off completely.
|
||||
|
||||
Advanced options:
|
||||
|
||||
- `MIMALLOC_PURGE_DELAY=N`: the delay in `N` milli-seconds (by default `10`) after which mimalloc will purge
|
||||
OS pages that are not in use. This signals to the OS that the underlying physical memory can be reused which
|
||||
can reduce memory fragmentation especially in long running (server) programs. Setting `N` to `0` purges immediately when
|
||||
a page becomes unused which can improve memory usage but also decreases performance. Setting `N` to a higher
|
||||
value like `100` can improve performance (sometimes by a lot) at the cost of potentially using more memory at times.
|
||||
Setting it to `-1` disables purging completely.
|
||||
- `MIMALLOC_ARENA_EAGER_COMMIT=1`: turns on eager commit for the large arenas (usually 1GiB) from which mimalloc
|
||||
allocates segments and pages. This is by default
|
||||
only enabled on overcommit systems (e.g. Linux) but enabling it explicitly on other systems (like Windows or macOS)
|
||||
may improve performance. Note that eager commit only increases the commit but not the actual the peak resident set
|
||||
(rss) so it is generally ok to enable this.
|
||||
|
||||
Further options for large workloads and services:
|
||||
|
||||
- `MIMALLOC_USE_NUMA_NODES=N`: pretend there are at most `N` NUMA nodes. If not set, the actual NUMA nodes are detected
|
||||
at runtime. Setting `N` to 1 may avoid problems in some virtual environments. Also, setting it to a lower number than
|
||||
the actual NUMA nodes is fine and will only cause threads to potentially allocate more memory across actual NUMA
|
||||
nodes (but this can happen in any case as NUMA local allocation is always a best effort but not guaranteed).
|
||||
- `MIMALLOC_LARGE_OS_PAGES=1`: use large OS pages (2MiB) when available; for some workloads this can significantly
|
||||
- `MIMALLOC_ALLOW_LARGE_OS_PAGES=1`: use large OS pages (2MiB) when available; for some workloads this can significantly
|
||||
improve performance. Use `MIMALLOC_VERBOSE` to check if the large OS pages are enabled -- usually one needs
|
||||
to explicitly allow large OS pages (as on [Windows][windows-huge] and [Linux][linux-huge]). However, sometimes
|
||||
the OS is very slow to reserve contiguous physical memory for large OS pages so use with care on systems that
|
||||
can have fragmented memory (for that reason, we generally recommend to use `MIMALLOC_RESERVE_HUGE_OS_PAGES` instead whenever possible).
|
||||
<!--
|
||||
- `MIMALLOC_EAGER_REGION_COMMIT=1`: on Windows, commit large (256MiB) regions eagerly. On Windows, these regions
|
||||
show in the working set even though usually just a small part is committed to physical memory. This is why it
|
||||
turned off by default on Windows as it looks not good in the task manager. However, turning it on has no
|
||||
real drawbacks and may improve performance by a little.
|
||||
-->
|
||||
- `MIMALLOC_RESERVE_HUGE_OS_PAGES=N`: where N is the number of 1GiB _huge_ OS pages. This reserves the huge pages at
|
||||
can have fragmented memory (for that reason, we generally recommend to use `MIMALLOC_RESERVE_HUGE_OS_PAGES` instead whenever possible).
|
||||
- `MIMALLOC_RESERVE_HUGE_OS_PAGES=N`: where `N` is the number of 1GiB _huge_ OS pages. This reserves the huge pages at
|
||||
startup and sometimes this can give a large (latency) performance improvement on big workloads.
|
||||
Usually it is better to not use
|
||||
`MIMALLOC_LARGE_OS_PAGES` in combination with this setting. Just like large OS pages, use with care as reserving
|
||||
Usually it is better to not use `MIMALLOC_ALLOW_LARGE_OS_PAGES=1` in combination with this setting. Just like large
|
||||
OS pages, use with care as reserving
|
||||
contiguous physical memory can take a long time when memory is fragmented (but reserving the huge pages is done at
|
||||
startup only once).
|
||||
Note that we usually need to explicitly enable huge OS pages (as on [Windows][windows-huge] and [Linux][linux-huge])).
|
||||
With huge OS pages, it may be beneficial to set the setting
|
||||
`MIMALLOC_EAGER_COMMIT_DELAY=N` (`N` is 1 by default) to delay the initial `N` segments (of 4MiB)
|
||||
of a thread to not allocate in the huge OS pages; this prevents threads that are short lived
|
||||
and allocate just a little to take up space in the huge OS page area (which cannot be reset).
|
||||
and allocate just a little to take up space in the huge OS page area (which cannot be purged).
|
||||
The huge pages are usually allocated evenly among NUMA nodes.
|
||||
We can use `MIMALLOC_RESERVE_HUGE_OS_PAGES_AT=N` where `N` is the numa node (starting at 0) to allocate all
|
||||
the huge pages at a specific numa node instead.
|
||||
|
@ -794,6 +789,16 @@ provided by the bot. You will only need to do this once across all repos using o
|
|||
|
||||
# Older Release Notes
|
||||
|
||||
* 2021-11-14, `v1.7.3`, `v2.0.3` (beta): improved WASM support, improved macOS support and performance (including
|
||||
M1), improved performance for v2 for large objects, Python integration improvements, more standard
|
||||
installation directories, various small fixes.
|
||||
* 2021-06-17, `v1.7.2`, `v2.0.2` (beta): support M1, better installation layout on Linux, fix
|
||||
thread_id on Android, prefer 2-6TiB area for aligned allocation to work better on pre-windows 8, various small fixes.
|
||||
* 2021-04-06, `v1.7.1`, `v2.0.1` (beta): fix bug in arena allocation for huge pages, improved aslr on large allocations, initial M1 support (still experimental).
|
||||
* 2021-01-31, `v2.0.0`: beta release 2.0: new slice algorithm for managing internal mimalloc pages.
|
||||
* 2021-01-31, `v1.7.0`: stable release 1.7: support explicit user provided memory regions, more precise statistics,
|
||||
improve macOS overriding, initial support for Apple M1, improved DragonFly support, faster memcpy on Windows, various small fixes.
|
||||
|
||||
* 2020-09-24, `v1.6.7`: stable release 1.6: using standard C atomics, passing tsan testing, improved
|
||||
handling of failing to commit on Windows, add [`mi_process_info`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc.h#L156) api call.
|
||||
* 2020-08-06, `v1.6.4`: stable release 1.6: improved error recovery in low-memory situations,
|
||||
|
@ -815,6 +820,7 @@ provided by the bot. You will only need to do this once across all repos using o
|
|||
more eager concurrent free, addition of STL allocator, fixed potential memory leak.
|
||||
* 2020-01-15, `v1.3.0`: stable release 1.3: bug fixes, improved randomness and [stronger
|
||||
free list encoding](https://github.com/microsoft/mimalloc/blob/783e3377f79ee82af43a0793910a9f2d01ac7863/include/mimalloc-internal.h#L396) in secure mode.
|
||||
|
||||
* 2019-12-22, `v1.2.2`: stable release 1.2: minor updates.
|
||||
* 2019-11-22, `v1.2.0`: stable release 1.2: bug fixes, improved secure mode (free list corruption checks, double free mitigation). Improved dynamic overriding on Windows.
|
||||
* 2019-10-07, `v1.1.0`: stable release 1.1.
|
||||
|
|
|
@ -245,11 +245,13 @@ extern "C" {
|
|||
int posix_memalign(void** p, size_t alignment, size_t size) { return mi_posix_memalign(p, alignment, size); }
|
||||
|
||||
// `aligned_alloc` is only available when __USE_ISOC11 is defined.
|
||||
// Note: it seems __USE_ISOC11 is not defined in musl (and perhaps other libc's) so we only check
|
||||
// for it if using glibc.
|
||||
// Note: Conda has a custom glibc where `aligned_alloc` is declared `static inline` and we cannot
|
||||
// override it, but both _ISOC11_SOURCE and __USE_ISOC11 are undefined in Conda GCC7 or GCC9.
|
||||
// Fortunately, in the case where `aligned_alloc` is declared as `static inline` it
|
||||
// uses internally `memalign`, `posix_memalign`, or `_aligned_malloc` so we can avoid overriding it ourselves.
|
||||
#if __USE_ISOC11
|
||||
#if !defined(__GLIBC__) || __USE_ISOC11
|
||||
void* aligned_alloc(size_t alignment, size_t size) { return mi_aligned_alloc(alignment, size); }
|
||||
#endif
|
||||
#endif
|
||||
|
|
22
src/arena.c
22
src/arena.c
|
@ -237,33 +237,33 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t ar
|
|||
}
|
||||
|
||||
// set the dirty bits (todo: no need for an atomic op here?)
|
||||
if (arena->memid.was_zero && arena->blocks_dirty != NULL) {
|
||||
memid->was_zero = _mi_bitmap_claim_across(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL);
|
||||
if (arena->memid.initially_zero && arena->blocks_dirty != NULL) {
|
||||
memid->initially_zero = _mi_bitmap_claim_across(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL);
|
||||
}
|
||||
|
||||
// set commit state
|
||||
if (arena->blocks_committed == NULL) {
|
||||
// always committed
|
||||
memid->was_committed = true;
|
||||
memid->initially_committed = true;
|
||||
}
|
||||
else if (commit) {
|
||||
// commit requested, but the range may not be committed as a whole: ensure it is committed now
|
||||
memid->was_committed = true;
|
||||
memid->initially_committed = true;
|
||||
bool any_uncommitted;
|
||||
_mi_bitmap_claim_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted);
|
||||
if (any_uncommitted) {
|
||||
bool commit_zero = false;
|
||||
if (!_mi_os_commit(p, mi_arena_block_size(needed_bcount), &commit_zero, tld->stats)) {
|
||||
memid->was_committed = false;
|
||||
memid->initially_committed = false;
|
||||
}
|
||||
else {
|
||||
if (commit_zero) { memid->was_zero = true; }
|
||||
if (commit_zero) { memid->initially_zero = true; }
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
// no need to commit, but check if already fully committed
|
||||
memid->was_committed = _mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index);
|
||||
memid->initially_committed = _mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index);
|
||||
}
|
||||
|
||||
return p;
|
||||
|
@ -752,7 +752,7 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int
|
|||
if (size < MI_ARENA_BLOCK_SIZE) return false;
|
||||
|
||||
if (is_large) {
|
||||
mi_assert_internal(memid.was_committed && memid.is_pinned);
|
||||
mi_assert_internal(memid.initially_committed && memid.is_pinned);
|
||||
}
|
||||
|
||||
const size_t bcount = size / MI_ARENA_BLOCK_SIZE;
|
||||
|
@ -781,7 +781,7 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int
|
|||
arena->blocks_committed = (arena->memid.is_pinned ? NULL : &arena->blocks_inuse[2*fields]); // just after dirty bitmap
|
||||
arena->blocks_purge = (arena->memid.is_pinned ? NULL : &arena->blocks_inuse[3*fields]); // just after committed bitmap
|
||||
// initialize committed bitmap?
|
||||
if (arena->blocks_committed != NULL && arena->memid.was_committed) {
|
||||
if (arena->blocks_committed != NULL && arena->memid.initially_committed) {
|
||||
memset((void*)arena->blocks_committed, 0xFF, fields*sizeof(mi_bitmap_field_t)); // cast to void* to avoid atomic warning
|
||||
}
|
||||
|
||||
|
@ -799,8 +799,8 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int
|
|||
|
||||
bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept {
|
||||
mi_memid_t memid = _mi_memid_create(MI_MEM_EXTERNAL);
|
||||
memid.was_committed = is_committed;
|
||||
memid.was_zero = is_zero;
|
||||
memid.initially_committed = is_committed;
|
||||
memid.initially_zero = is_zero;
|
||||
memid.is_pinned = is_large;
|
||||
return mi_manage_os_memory_ex2(start,size,is_large,numa_node,exclusive,memid, arena_id);
|
||||
}
|
||||
|
|
|
@ -244,7 +244,7 @@ static mi_thread_data_t* mi_thread_data_zalloc(void) {
|
|||
}
|
||||
if (td != NULL) {
|
||||
td->memid = memid;
|
||||
is_zero = memid.was_zero;
|
||||
is_zero = memid.initially_zero;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -387,9 +387,11 @@ int _mi_prim_decommit(void* start, size_t size, bool* needs_recommit) {
|
|||
}
|
||||
|
||||
int _mi_prim_reset(void* start, size_t size) {
|
||||
// We always use MADV_DONTNEED if possible even if it may be a bit more expensive as MADV_FREE
|
||||
// as this guarantees that we see the actual rss reflected in tools like `top`.
|
||||
#if 0 && defined(MADV_FREE)
|
||||
// We try to use `MADV_FREE` as that is the fastest. A drawback though is that it
|
||||
// will not reduce the `rss` stats in tools like `top` even though the memory is available
|
||||
// to other processes. With the default `MIMALLOC_PURGE_DECOMMITS=1` we ensure that by
|
||||
// default `MADV_DONTNEED` is used though.
|
||||
#if defined(MADV_FREE)
|
||||
static _Atomic(size_t) advice = MI_ATOMIC_VAR_INIT(MADV_FREE);
|
||||
int oadvice = (int)mi_atomic_load_relaxed(&advice);
|
||||
int err;
|
||||
|
|
|
@ -285,9 +285,9 @@ int _mi_prim_decommit(void* addr, size_t size, bool* needs_recommit) {
|
|||
int _mi_prim_reset(void* addr, size_t size) {
|
||||
void* p = VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE);
|
||||
mi_assert_internal(p == addr);
|
||||
#if 1
|
||||
#if 0
|
||||
if (p != NULL) {
|
||||
VirtualUnlock(addr,size); // VirtualUnlock after MEM_RESET removes the memory from the working set
|
||||
VirtualUnlock(addr,size); // VirtualUnlock after MEM_RESET removes the memory directly from the working set
|
||||
}
|
||||
#endif
|
||||
return (p != NULL ? 0 : (int)GetLastError());
|
||||
|
|
|
@ -824,7 +824,7 @@ static mi_segment_t* mi_segment_os_alloc( size_t required, size_t page_alignment
|
|||
|
||||
// ensure metadata part of the segment is committed
|
||||
mi_commit_mask_t commit_mask;
|
||||
if (memid.was_committed) {
|
||||
if (memid.initially_committed) {
|
||||
mi_commit_mask_create_full(&commit_mask);
|
||||
}
|
||||
else {
|
||||
|
@ -878,7 +878,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi
|
|||
if (segment == NULL) return NULL;
|
||||
|
||||
// zero the segment info? -- not always needed as it may be zero initialized from the OS
|
||||
if (!segment->memid.was_zero) {
|
||||
if (!segment->memid.initially_zero) {
|
||||
ptrdiff_t ofs = offsetof(mi_segment_t, next);
|
||||
size_t prefix = offsetof(mi_segment_t, slices) - ofs;
|
||||
size_t zsize = prefix + (sizeof(mi_slice_t) * (segment_slices + 1)); // one more
|
||||
|
|
Loading…
Add table
Reference in a new issue