This commit is contained in:
Daan 2021-12-12 10:35:19 -08:00
commit c858690dea
9 changed files with 48 additions and 31 deletions

View file

@ -181,10 +181,7 @@ if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU")
list(APPEND mi_cflags -Wall -Wextra -Wno-unknown-pragmas -fvisibility=hidden) list(APPEND mi_cflags -Wall -Wextra -Wno-unknown-pragmas -fvisibility=hidden)
if(NOT MI_USE_CXX) if(NOT MI_USE_CXX)
list(APPEND mi_cflags -Wstrict-prototypes) list(APPEND mi_cflags -Wstrict-prototypes)
endif() endif()
if(CMAKE_C_COMPILER_ID MATCHES "GNU")
list(APPEND mi_cflags -Wno-invalid-memory-model)
endif()
if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang") if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang")
list(APPEND mi_cflags -Wpedantic -Wno-static-in-inline) list(APPEND mi_cflags -Wpedantic -Wno-static-in-inline)
endif() endif()

View file

@ -782,6 +782,7 @@ typedef enum mi_option_e {
mi_option_eager_region_commit, ///< Eagerly commit large (256MiB) memory regions (enabled by default, except on Windows) mi_option_eager_region_commit, ///< Eagerly commit large (256MiB) memory regions (enabled by default, except on Windows)
mi_option_large_os_pages, ///< Use large OS pages (2MiB in size) if possible mi_option_large_os_pages, ///< Use large OS pages (2MiB in size) if possible
mi_option_reserve_huge_os_pages, ///< The number of huge OS pages (1GiB in size) to reserve at the start of the program. mi_option_reserve_huge_os_pages, ///< The number of huge OS pages (1GiB in size) to reserve at the start of the program.
mi_option_reserve_huge_os_pages_at, ///< Reserve huge OS pages at node N.
mi_option_segment_cache, ///< The number of segments per thread to keep cached. mi_option_segment_cache, ///< The number of segments per thread to keep cached.
mi_option_page_reset, ///< Reset page memory after \a mi_option_reset_delay milliseconds when it becomes free. mi_option_page_reset, ///< Reset page memory after \a mi_option_reset_delay milliseconds when it becomes free.
mi_option_segment_reset, ///< Experimental mi_option_segment_reset, ///< Experimental
@ -1053,6 +1054,8 @@ or via environment variables.
`MIMALLOC_EAGER_COMMIT_DELAY=N` (`N` is 1 by default) to delay the initial `N` segments (of 4MiB) `MIMALLOC_EAGER_COMMIT_DELAY=N` (`N` is 1 by default) to delay the initial `N` segments (of 4MiB)
of a thread to not allocate in the huge OS pages; this prevents threads that are short lived of a thread to not allocate in the huge OS pages; this prevents threads that are short lived
and allocate just a little to take up space in the huge OS page area (which cannot be reset). and allocate just a little to take up space in the huge OS page area (which cannot be reset).
- `MIMALLOC_RESERVE_HUGE_OS_PAGES_AT=N`: where N is the numa node. This reserves the huge pages at a specific numa node.
(`N` is -1 by default to reserve huge pages evenly among the given number of numa nodes (or use the available ones as detected))
Use caution when using `fork` in combination with either large or huge OS pages: on a fork, the OS uses copy-on-write Use caution when using `fork` in combination with either large or huge OS pages: on a fork, the OS uses copy-on-write
for all pages in the original process including the huge OS pages. When any memory is now written in that area, the for all pages in the original process including the huge OS pages. When any memory is now written in that area, the

View file

@ -271,7 +271,6 @@ mi_decl_export int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size
mi_decl_export int mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noexcept; mi_decl_export int mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noexcept;
mi_decl_export bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept; mi_decl_export bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept;
// deprecated // deprecated
mi_decl_export int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept; mi_decl_export int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept;
@ -310,6 +309,7 @@ typedef enum mi_option_e {
mi_option_reset_decommits, mi_option_reset_decommits,
mi_option_large_os_pages, // implies eager commit mi_option_large_os_pages, // implies eager commit
mi_option_reserve_huge_os_pages, mi_option_reserve_huge_os_pages,
mi_option_reserve_huge_os_pages_at,
mi_option_reserve_os_memory, mi_option_reserve_os_memory,
mi_option_segment_cache, mi_option_segment_cache,
mi_option_page_reset, mi_option_page_reset,

View file

@ -12,13 +12,13 @@ is a general purpose allocator with excellent [performance](#performance) charac
Initially developed by Daan Leijen for the run-time systems of the Initially developed by Daan Leijen for the run-time systems of the
[Koka](https://koka-lang.github.io) and [Lean](https://github.com/leanprover/lean) languages. [Koka](https://koka-lang.github.io) and [Lean](https://github.com/leanprover/lean) languages.
Latest release tag: `v2.0.2` (beta, 2021-06-17). Latest release tag: `v2.0.3` (beta, 2021-11-14).
Latest stable tag: `v1.7.2` (2021-06-17). Latest stable tag: `v1.7.3` (2021-11-14).
mimalloc is a drop-in replacement for `malloc` and can be used in other programs mimalloc is a drop-in replacement for `malloc` and can be used in other programs
without code changes, for example, on dynamically linked ELF-based systems (Linux, BSD, etc.) you can use it as: without code changes, for example, on dynamically linked ELF-based systems (Linux, BSD, etc.) you can use it as:
``` ```
> LD_PRELOAD=/usr/bin/libmimalloc.so myprogram > LD_PRELOAD=/usr/lib/libmimalloc.so myprogram
``` ```
It also has an easy way to override the default allocator in [Windows](#override_on_windows). Notable aspects of the design include: It also has an easy way to override the default allocator in [Windows](#override_on_windows). Notable aspects of the design include:
@ -77,6 +77,10 @@ Note: the `v2.x` beta has a new algorithm for managing internal mimalloc pages t
and fragmentation compared to mimalloc `v1.x` (especially for large workloads). Should otherwise have similar performance and fragmentation compared to mimalloc `v1.x` (especially for large workloads). Should otherwise have similar performance
(see [below](#performance)); please report if you observe any significant performance regression. (see [below](#performance)); please report if you observe any significant performance regression.
* 2021-11-14, `v1.7.3`, `v2.0.3` (beta): improved WASM support, improved macOS support and performance (including
M1), improved performance for v2 for large objects, Python integration improvements, more standard
installation directories, various small fixes.
* 2021-06-17, `v1.7.2`, `v2.0.2` (beta): support M1, better installation layout on Linux, fix * 2021-06-17, `v1.7.2`, `v2.0.2` (beta): support M1, better installation layout on Linux, fix
thread_id on Android, prefer 2-6TiB area for aligned allocation to work better on pre-windows 8, various small fixes. thread_id on Android, prefer 2-6TiB area for aligned allocation to work better on pre-windows 8, various small fixes.
@ -142,7 +146,7 @@ mimalloc is used in various large scale low-latency services and programs, for e
## Windows ## Windows
Open `ide/vs2019/mimalloc.sln` in Visual Studio 2019 and build (or `ide/vs2017/mimalloc.sln`). Open `ide/vs2019/mimalloc.sln` in Visual Studio 2019 and build.
The `mimalloc` project builds a static library (in `out/msvc-x64`), while the The `mimalloc` project builds a static library (in `out/msvc-x64`), while the
`mimalloc-override` project builds a DLL for overriding malloc `mimalloc-override` project builds a DLL for overriding malloc
in the entire program. in the entire program.
@ -191,6 +195,11 @@ Notes:
2. Install CCMake: `sudo apt-get install cmake-curses-gui` 2. Install CCMake: `sudo apt-get install cmake-curses-gui`
## Single source
You can also directly build the single `src/static.c` file as part of your project without
needing `cmake` at all. Make sure to also add the mimalloc `include` directory to the include path.
# Using the library # Using the library
@ -302,6 +311,9 @@ or via environment variables:
`MIMALLOC_EAGER_COMMIT_DELAY=N` (`N` is 1 by default) to delay the initial `N` segments (of 4MiB) `MIMALLOC_EAGER_COMMIT_DELAY=N` (`N` is 1 by default) to delay the initial `N` segments (of 4MiB)
of a thread to not allocate in the huge OS pages; this prevents threads that are short lived of a thread to not allocate in the huge OS pages; this prevents threads that are short lived
and allocate just a little to take up space in the huge OS page area (which cannot be reset). and allocate just a little to take up space in the huge OS page area (which cannot be reset).
The huge pages are usually allocated evenly among NUMA nodes.
We can use `MIMALLOC_RESERVE_HUGE_OS_PAGES_AT=N` where `N` is the numa node (starting at 0) to allocate all
the huge pages at a specific numa node instead.
Use caution when using `fork` in combination with either large or huge OS pages: on a fork, the OS uses copy-on-write Use caution when using `fork` in combination with either large or huge OS pages: on a fork, the OS uses copy-on-write
for all pages in the original process including the huge OS pages. When any memory is now written in that area, the for all pages in the original process including the huge OS pages. When any memory is now written in that area, the
@ -337,9 +349,9 @@ When _mimalloc_ is built using debug mode, various checks are done at runtime to
- Corrupted free-lists and some forms of use-after-free are detected. - Corrupted free-lists and some forms of use-after-free are detected.
# Overriding Malloc # Overriding Standard Malloc
Overriding the standard `malloc` can be done either _dynamically_ or _statically_. Overriding the standard `malloc` (and `new`) can be done either _dynamically_ or _statically_.
## Dynamic override ## Dynamic override
@ -370,13 +382,12 @@ On macOS we can also preload the mimalloc shared
library so all calls to the standard `malloc` interface are library so all calls to the standard `malloc` interface are
resolved to the _mimalloc_ library. resolved to the _mimalloc_ library.
``` ```
> env DYLD_FORCE_FLAT_NAMESPACE=1 DYLD_INSERT_LIBRARIES=/usr/lib/libmimalloc.dylib myprogram > env DYLD_INSERT_LIBRARIES=/usr/lib/libmimalloc.dylib myprogram
``` ```
Note that certain security restrictions may apply when doing this from Note that certain security restrictions may apply when doing this from
the [shell](https://stackoverflow.com/questions/43941322/dyld-insert-libraries-ignored-when-calling-application-through-bash). the [shell](https://stackoverflow.com/questions/43941322/dyld-insert-libraries-ignored-when-calling-application-through-bash).
(Note: macOS support for dynamic overriding is recent, please report any issues.)
### Override on Windows ### Override on Windows
@ -386,7 +397,7 @@ the (dynamic) C runtime allocator, including those from other DLL's or libraries
The overriding on Windows requires that you link your program explicitly with The overriding on Windows requires that you link your program explicitly with
the mimalloc DLL and use the C-runtime library as a DLL (using the `/MD` or `/MDd` switch). the mimalloc DLL and use the C-runtime library as a DLL (using the `/MD` or `/MDd` switch).
Also, the `mimalloc-redirect.dll` (or `mimalloc-redirect32.dll`) must be available Also, the `mimalloc-redirect.dll` (or `mimalloc-redirect32.dll`) must be put
in the same folder as the main `mimalloc-override.dll` at runtime (as it is a dependency). in the same folder as the main `mimalloc-override.dll` at runtime (as it is a dependency).
The redirection DLL ensures that all calls to the C runtime malloc API get redirected to The redirection DLL ensures that all calls to the C runtime malloc API get redirected to
mimalloc (in `mimalloc-override.dll`). mimalloc (in `mimalloc-override.dll`).

View file

@ -475,14 +475,13 @@ static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* ms
return segment; return segment;
} }
// Free a block
// Free a block
void mi_free(void* p) mi_attr_noexcept void mi_free(void* p) mi_attr_noexcept
{ {
const mi_segment_t* const segment = mi_checked_ptr_segment(p,"mi_free"); const mi_segment_t* const segment = mi_checked_ptr_segment(p,"mi_free");
if (mi_unlikely(segment == NULL)) return; if (mi_unlikely(segment == NULL)) return;
const mi_threadid_t tid = _mi_thread_id(); mi_threadid_t tid = _mi_thread_id();
mi_page_t* const page = _mi_segment_page_of(segment, p); mi_page_t* const page = _mi_segment_page_of(segment, p);
mi_block_t* const block = (mi_block_t*)p; mi_block_t* const block = (mi_block_t*)p;

View file

@ -504,7 +504,12 @@ void mi_process_init(void) mi_attr_noexcept {
if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) {
size_t pages = mi_option_get(mi_option_reserve_huge_os_pages); size_t pages = mi_option_get(mi_option_reserve_huge_os_pages);
mi_reserve_huge_os_pages_interleave(pages, 0, pages*500); long reserve_at = mi_option_get(mi_option_reserve_huge_os_pages_at);
if (reserve_at != -1) {
mi_reserve_huge_os_pages_at(pages, reserve_at, pages*500);
} else {
mi_reserve_huge_os_pages_interleave(pages, 0, pages*500);
}
} }
if (mi_option_is_enabled(mi_option_reserve_os_memory)) { if (mi_option_is_enabled(mi_option_reserve_os_memory)) {
long ksize = mi_option_get(mi_option_reserve_os_memory); long ksize = mi_option_get(mi_option_reserve_os_memory);

View file

@ -76,6 +76,7 @@ static mi_option_desc_t options[_mi_option_last] =
#endif #endif
{ 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's
{ 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, // per 1GiB huge pages { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, // per 1GiB huge pages
{ -1, UNINIT, MI_OPTION(reserve_huge_os_pages_at) }, // reserve huge pages at node N
{ 0, UNINIT, MI_OPTION(reserve_os_memory) }, { 0, UNINIT, MI_OPTION(reserve_os_memory) },
{ 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread
{ 1, UNINIT, MI_OPTION(page_reset) }, // reset page memory on free { 1, UNINIT, MI_OPTION(page_reset) }, // reset page memory on free

View file

@ -578,29 +578,29 @@ static mi_decl_cache_align _Atomic(uintptr_t) aligned_base;
// (otherwise an initial large allocation of say 2TiB has a 50% chance to include (known) addresses // (otherwise an initial large allocation of say 2TiB has a 50% chance to include (known) addresses
// in the middle of the 2TiB - 6TiB address range (see issue #372)) // in the middle of the 2TiB - 6TiB address range (see issue #372))
#define KK_HINT_BASE ((uintptr_t)2 << 40) // 2TiB start #define MI_HINT_BASE ((uintptr_t)2 << 40) // 2TiB start
#define KK_HINT_AREA ((uintptr_t)4 << 40) // upto 6TiB (since before win8 there is "only" 8TiB available to processes) #define MI_HINT_AREA ((uintptr_t)4 << 40) // upto 6TiB (since before win8 there is "only" 8TiB available to processes)
#define KK_HINT_MAX ((uintptr_t)30 << 40) // wrap after 30TiB (area after 32TiB is used for huge OS pages) #define MI_HINT_MAX ((uintptr_t)30 << 40) // wrap after 30TiB (area after 32TiB is used for huge OS pages)
static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size)
{ {
if (try_alignment == 0 || try_alignment > MI_SEGMENT_SIZE) return NULL; if (try_alignment == 0 || try_alignment > MI_SEGMENT_SIZE) return NULL;
if ((size%MI_SEGMENT_SIZE) != 0) return NULL; size = _mi_align_up(size, MI_SEGMENT_SIZE);
if (size > 1*MI_GiB) return NULL; // guarantee the chance of fixed valid address is at most 1/(KK_HINT_AREA / 1<<30) = 1/4096. if (size > 1*MI_GiB) return NULL; // guarantee the chance of fixed valid address is at most 1/(MI_HINT_AREA / 1<<30) = 1/4096.
#if (MI_SECURE>0) #if (MI_SECURE>0)
size += MI_SEGMENT_SIZE; // put in `MI_SEGMENT_SIZE` virtual gaps between hinted blocks; this splits VLA's but increases guarded areas. size += MI_SEGMENT_SIZE; // put in `MI_SEGMENT_SIZE` virtual gaps between hinted blocks; this splits VLA's but increases guarded areas.
#endif #endif
uintptr_t hint = mi_atomic_add_acq_rel(&aligned_base, size); uintptr_t hint = mi_atomic_add_acq_rel(&aligned_base, size);
if (hint == 0 || hint > KK_HINT_MAX) { // wrap or initialize if (hint == 0 || hint > MI_HINT_MAX) { // wrap or initialize
uintptr_t init = KK_HINT_BASE; uintptr_t init = MI_HINT_BASE;
#if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of aligned allocations unless in debug mode #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of aligned allocations unless in debug mode
uintptr_t r = _mi_heap_random_next(mi_get_default_heap()); uintptr_t r = _mi_heap_random_next(mi_get_default_heap());
init = init + ((MI_SEGMENT_SIZE * ((r>>17) & 0xFFFFF)) % KK_HINT_AREA); // (randomly 20 bits)*4MiB == 0 to 4TiB init = init + ((MI_SEGMENT_SIZE * ((r>>17) & 0xFFFFF)) % MI_HINT_AREA); // (randomly 20 bits)*4MiB == 0 to 4TiB
#endif #endif
uintptr_t expected = hint + size; uintptr_t expected = hint + size;
mi_atomic_cas_strong_acq_rel(&aligned_base, &expected, init); mi_atomic_cas_strong_acq_rel(&aligned_base, &expected, init);
hint = mi_atomic_add_acq_rel(&aligned_base, size); // this may still give 0 or > KK_HINT_MAX but that is ok, it is a hint after all hint = mi_atomic_add_acq_rel(&aligned_base, size); // this may still give 0 or > MI_HINT_MAX but that is ok, it is a hint after all
} }
if (hint%try_alignment != 0) return NULL; if (hint%try_alignment != 0) return NULL;
return (void*)hint; return (void*)hint;
@ -638,11 +638,11 @@ static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, boo
if (commit) flags |= MEM_COMMIT; if (commit) flags |= MEM_COMMIT;
p = mi_win_virtual_alloc(NULL, size, try_alignment, flags, false, allow_large, is_large); p = mi_win_virtual_alloc(NULL, size, try_alignment, flags, false, allow_large, is_large);
#elif defined(MI_USE_SBRK) #elif defined(MI_USE_SBRK)
KK_UNUSED(allow_large); MI_UNUSED(allow_large);
*is_large = false; *is_large = false;
p = mi_sbrk_heap_grow(size, try_alignment); p = mi_sbrk_heap_grow(size, try_alignment);
#elif defined(__wasi__) #elif defined(__wasi__)
KK_UNUSED(allow_large); MI_UNUSED(allow_large);
*is_large = false; *is_large = false;
p = mi_wasm_heap_grow(size, try_alignment); p = mi_wasm_heap_grow(size, try_alignment);
#else #else

View file

@ -167,8 +167,9 @@ If we cannot get good randomness, we fall back to weak randomness based on a tim
#if defined(_WIN32) #if defined(_WIN32)
#if !defined(MI_USE_RTLGENRANDOM) #if defined(MI_USE_BCRYPTGENRANDOM)
// We prefer BCryptGenRandom over RtlGenRandom // We would like to use BCryptGenRandom instead of RtlGenRandom but it can lead to a deadlock
// under the VS debugger when using dynamic overriding.
#pragma comment (lib,"bcrypt.lib") #pragma comment (lib,"bcrypt.lib")
#include <bcrypt.h> #include <bcrypt.h>
static bool os_random_buf(void* buf, size_t buf_len) { static bool os_random_buf(void* buf, size_t buf_len) {