diff --git a/azure-pipelines.yml b/azure-pipelines.yml index b1b996b3..83d6a482 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -6,10 +6,8 @@ trigger: branches: include: - - master - - dev3 - - dev2 - - dev + - main + - dev* tags: include: - v* @@ -185,35 +183,6 @@ jobs: # Other OS versions (just debug mode) # ---------------------------------------------------------- -- job: - displayName: Windows 2019 - pool: - vmImage: - windows-2019 - strategy: - matrix: - Debug: - BuildType: debug - cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON - MSBuildConfiguration: Debug - Release: - BuildType: release - cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release - MSBuildConfiguration: Release - steps: - - task: CMake@1 - inputs: - workingDirectory: $(BuildType) - cmakeArgs: .. $(cmakeExtraArgs) - - task: MSBuild@1 - inputs: - solution: $(BuildType)/libmimalloc.sln - configuration: '$(MSBuildConfiguration)' - msbuildArguments: -m - - script: ctest --verbose --timeout 240 -C $(MSBuildConfiguration) - workingDirectory: $(BuildType) - displayName: CTest - - job: displayName: Ubuntu 24.04 pool: diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 7af1e6e1..0b515281 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -8,7 +8,6 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MIMALLOC_INTERNAL_H #define MIMALLOC_INTERNAL_H - // -------------------------------------------------------------------------- // This file contains the internal API's of mimalloc and various utility // functions and macros. @@ -17,50 +16,88 @@ terms of the MIT license. A copy of the license can be found in the file #include "types.h" #include "track.h" + +// -------------------------------------------------------------------------- +// Compiler defines +// -------------------------------------------------------------------------- + #if (MI_DEBUG>0) #define mi_trace_message(...) _mi_trace_message(__VA_ARGS__) #else #define mi_trace_message(...) #endif -#define MI_CACHE_LINE 64 +#define mi_decl_cache_align mi_decl_align(64) + #if defined(_MSC_VER) #pragma warning(disable:4127) // suppress constant conditional warning (due to MI_SECURE paths) #pragma warning(disable:26812) // unscoped enum warning #define mi_decl_noinline __declspec(noinline) #define mi_decl_thread __declspec(thread) -#define mi_decl_cache_align __declspec(align(MI_CACHE_LINE)) +#define mi_decl_align(a) __declspec(align(a)) +#define mi_decl_noreturn __declspec(noreturn) #define mi_decl_weak #define mi_decl_hidden +#define mi_decl_cold #elif (defined(__GNUC__) && (__GNUC__ >= 3)) || defined(__clang__) // includes clang and icc #define mi_decl_noinline __attribute__((noinline)) #define mi_decl_thread __thread -#define mi_decl_cache_align __attribute__((aligned(MI_CACHE_LINE))) +#define mi_decl_align(a) __attribute__((aligned(a))) +#define mi_decl_noreturn __attribute__((noreturn)) #define mi_decl_weak __attribute__((weak)) #define mi_decl_hidden __attribute__((visibility("hidden"))) +#if (__GNUC__ >= 4) || defined(__clang__) +#define mi_decl_cold __attribute__((cold)) +#else +#define mi_decl_cold +#endif #elif __cplusplus >= 201103L // c++11 #define mi_decl_noinline #define mi_decl_thread thread_local -#define mi_decl_cache_align alignas(MI_CACHE_LINE) +#define mi_decl_align(a) alignas(a) +#define mi_decl_noreturn [[noreturn]] #define mi_decl_weak #define mi_decl_hidden +#define mi_decl_cold #else #define mi_decl_noinline #define mi_decl_thread __thread // hope for the best :-) -#define mi_decl_cache_align +#define mi_decl_align(a) +#define mi_decl_noreturn #define mi_decl_weak #define mi_decl_hidden +#define mi_decl_cold +#endif + +#if defined(__GNUC__) || defined(__clang__) +#define mi_unlikely(x) (__builtin_expect(!!(x),false)) +#define mi_likely(x) (__builtin_expect(!!(x),true)) +#elif (defined(__cplusplus) && (__cplusplus >= 202002L)) || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L) +#define mi_unlikely(x) (x) [[unlikely]] +#define mi_likely(x) (x) [[likely]] +#else +#define mi_unlikely(x) (x) +#define mi_likely(x) (x) +#endif + +#ifndef __has_builtin +#define __has_builtin(x) 0 +#endif + +#if defined(__cplusplus) +#define mi_decl_externc extern "C" +#else +#define mi_decl_externc #endif #if defined(__EMSCRIPTEN__) && !defined(__wasi__) #define __wasi__ #endif -#if defined(__cplusplus) -#define mi_decl_externc extern "C" -#else -#define mi_decl_externc -#endif + +// -------------------------------------------------------------------------- +// Internal functions +// -------------------------------------------------------------------------- // "libc.c" #include @@ -126,13 +163,14 @@ bool _mi_os_has_overcommit(void); bool _mi_os_has_virtual_reserve(void); bool _mi_os_reset(void* addr, size_t size); -bool _mi_os_commit(void* p, size_t size, bool* is_zero); -bool _mi_os_commit_ex(void* addr, size_t size, bool* is_zero, size_t stat_size); bool _mi_os_decommit(void* addr, size_t size); -bool _mi_os_protect(void* addr, size_t size); bool _mi_os_unprotect(void* addr, size_t size); bool _mi_os_purge(void* p, size_t size); bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, size_t stat_size); +void _mi_os_reuse(void* p, size_t size); +mi_decl_nodiscard bool _mi_os_commit(void* p, size_t size, bool* is_zero); +mi_decl_nodiscard bool _mi_os_commit_ex(void* addr, size_t size, bool* is_zero, size_t stat_size); +bool _mi_os_protect(void* addr, size_t size); void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, mi_memid_t* memid); void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_memid_t* memid); @@ -258,26 +296,6 @@ bool _mi_page_is_valid(mi_page_t* page); #endif -// ------------------------------------------------------ -// Branches -// ------------------------------------------------------ - -#if defined(__GNUC__) || defined(__clang__) -#define mi_unlikely(x) (__builtin_expect(!!(x),false)) -#define mi_likely(x) (__builtin_expect(!!(x),true)) -#elif (defined(__cplusplus) && (__cplusplus >= 202002L)) || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L) -#define mi_unlikely(x) (x) [[unlikely]] -#define mi_likely(x) (x) [[likely]] -#else -#define mi_unlikely(x) (x) -#define mi_likely(x) (x) -#endif - -#ifndef __has_builtin -#define __has_builtin(x) 0 -#endif - - /* ----------------------------------------------------------- Error codes passed to `_mi_fatal_error` All are recoverable but EFAULT is a serious error and aborts by default in secure mode. @@ -302,6 +320,32 @@ bool _mi_page_is_valid(mi_page_t* page); #endif +// ------------------------------------------------------ +// Assertions +// ------------------------------------------------------ + +#if (MI_DEBUG) +// use our own assertion to print without memory allocation +mi_decl_noreturn mi_decl_cold void _mi_assert_fail(const char* assertion, const char* fname, unsigned int line, const char* func) mi_attr_noexcept; +#define mi_assert(expr) ((expr) ? (void)0 : _mi_assert_fail(#expr,__FILE__,__LINE__,__func__)) +#else +#define mi_assert(x) +#endif + +#if (MI_DEBUG>1) +#define mi_assert_internal mi_assert +#else +#define mi_assert_internal(x) +#endif + +#if (MI_DEBUG>2) +#define mi_assert_expensive mi_assert +#else +#define mi_assert_expensive(x) +#endif + + + /* ----------------------------------------------------------- Inlined definitions ----------------------------------------------------------- */ diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h index 3d8f1806..c71678cc 100644 --- a/include/mimalloc/prim.h +++ b/include/mimalloc/prim.h @@ -63,6 +63,11 @@ int _mi_prim_decommit(void* addr, size_t size, bool* needs_recommit); // Returns error code or 0 on success. int _mi_prim_reset(void* addr, size_t size); +// Reuse memory. This is called for memory that is already committed but +// may have been reset (`_mi_prim_reset`) or decommitted (`_mi_prim_decommit`) where `needs_recommit` was false. +// Returns error code or 0 on success. On most platforms this is a no-op. +int _mi_prim_reuse(void* addr, size_t size); + // Protect memory. Returns error code or 0 on success. int _mi_prim_protect(void* addr, size_t size, bool protect); diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index ce8b64d8..a15d9cba 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -632,7 +632,6 @@ struct mi_tld_s { }; - // ------------------------------------------------------ // Debug // ------------------------------------------------------ @@ -647,26 +646,6 @@ struct mi_tld_s { #define MI_DEBUG_PADDING (0xDE) #endif -#if (MI_DEBUG) -// use our own assertion to print without memory allocation -void _mi_assert_fail(const char* assertion, const char* fname, unsigned int line, const char* func ); -#define mi_assert(expr) ((expr) ? (void)0 : _mi_assert_fail(#expr,__FILE__,__LINE__,__func__)) -#else -#define mi_assert(x) -#endif - -#if (MI_DEBUG>1) -#define mi_assert_internal mi_assert -#else -#define mi_assert_internal(x) -#endif - -#if (MI_DEBUG>2) -#define mi_assert_expensive mi_assert -#else -#define mi_assert_expensive(x) -#endif - // ------------------------------------------------------ // Statistics diff --git a/readme.md b/readme.md index cee78898..601a7e24 100644 --- a/readme.md +++ b/readme.md @@ -72,15 +72,14 @@ Enjoy! ### Branches -* `master`: latest stable release (still based on `dev2`). +* `main`: latest stable release (still based on `dev2`). * `dev`: development branch for mimalloc v1. Use this branch for submitting PR's. * `dev2`: development branch for mimalloc v2. This branch is downstream of `dev` (and is essentially equal to `dev` except for `src/segment.c`). Uses larger sliced segments to manage mimalloc pages that can reduce fragmentation. * `dev3`: development branch for mimalloc v3-beta. This branch is downstream of `dev`. This version simplifies the lock-free ownership of previous versions, has no thread-local segments any more. - This improves sharing of memory between threads, and on certain large workloads may use less memory - with less fragmentation. + This improves sharing of memory between threads, and on certain large workloads may use (much) less memory. ### Releases diff --git a/src/arena.c b/src/arena.c index a14d78b9..98b6b408 100644 --- a/src/arena.c +++ b/src/arena.c @@ -270,12 +270,12 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t ar else if (commit) { // commit requested, but the range may not be committed as a whole: ensure it is committed now memid->initially_committed = true; + const size_t commit_size = mi_arena_block_size(needed_bcount); bool any_uncommitted; size_t already_committed = 0; _mi_bitmap_claim_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted, &already_committed); if (any_uncommitted) { mi_assert_internal(already_committed < needed_bcount); - const size_t commit_size = mi_arena_block_size(needed_bcount); const size_t stat_commit_size = commit_size - mi_arena_block_size(already_committed); bool commit_zero = false; if (!_mi_os_commit_ex(p, commit_size, &commit_zero, stat_commit_size)) { @@ -285,6 +285,10 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t ar if (commit_zero) { memid->initially_zero = true; } } } + else { + // all are already committed: signal that we are reusing memory in case it was purged before + _mi_os_reuse( p, commit_size ); + } } else { // no need to commit, but check if already fully committed diff --git a/src/options.c b/src/options.c index 4759e0b0..c2331ccb 100644 --- a/src/options.c +++ b/src/options.c @@ -525,7 +525,7 @@ void _mi_warning_message(const char* fmt, ...) { #if MI_DEBUG -void _mi_assert_fail(const char* assertion, const char* fname, unsigned line, const char* func ) { +mi_decl_noreturn mi_decl_cold void _mi_assert_fail(const char* assertion, const char* fname, unsigned line, const char* func ) mi_attr_noexcept { _mi_fprintf(NULL, NULL, "mimalloc: assertion failed: at \"%s\":%u, %s\n assertion: \"%s\"\n", fname, line, (func==NULL?"":func), assertion); abort(); } diff --git a/src/os.c b/src/os.c index ba0a6419..faca0d23 100644 --- a/src/os.c +++ b/src/os.c @@ -166,7 +166,7 @@ static void mi_os_prim_free(void* addr, size_t size, size_t commit_size) { void _mi_os_free_ex(void* addr, size_t size, bool still_committed, mi_memid_t memid) { if (mi_memkind_is_os(memid.memkind)) { size_t csize = memid.mem.os.size; - if (csize==0) { _mi_os_good_alloc_size(size); } + if (csize==0) { csize = _mi_os_good_alloc_size(size); } size_t commit_size = (still_committed ? csize : 0); void* base = addr; // different base? (due to alignment) @@ -285,7 +285,10 @@ static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit // explicitly commit only the aligned part if (commit) { - _mi_os_commit(p, size, NULL); + if (!_mi_os_commit(p, size, NULL)) { + mi_os_prim_free(*base, over_size, 0); + return NULL; + } } } else { // mmap can free inside an allocation @@ -494,6 +497,17 @@ bool _mi_os_reset(void* addr, size_t size) { } +void _mi_os_reuse( void* addr, size_t size ) { + // page align conservatively within the range + size_t csize = 0; + void* const start = mi_os_page_align_area_conservative(addr, size, &csize); + if (csize == 0) return; + const int err = _mi_prim_reuse(start, csize); + if (err != 0) { + _mi_warning_message("cannot reuse OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize); + } +} + // either resets or decommits memory, returns true if the memory needs // to be recommitted if it is to be re-used later on. bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, size_t stat_size) diff --git a/src/page.c b/src/page.c index 2bbc2ed1..a5a10503 100644 --- a/src/page.c +++ b/src/page.c @@ -37,7 +37,7 @@ static inline mi_block_t* mi_page_block_at(const mi_page_t* page, void* page_sta } static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t size, mi_tld_t* tld); -static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld); +static bool mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld); #if (MI_DEBUG>=3) static size_t mi_page_list_count(mi_page_t* page, mi_block_t* head) { @@ -632,15 +632,14 @@ static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, co // Note: we also experimented with "bump" allocation on the first // allocations but this did not speed up any benchmark (due to an // extra test in malloc? or cache effects?) -static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld) { - MI_UNUSED(tld); +static bool mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld) { mi_assert_expensive(mi_page_is_valid_init(page)); #if (MI_SECURE<=2) mi_assert(page->free == NULL); mi_assert(page->local_free == NULL); - if (page->free != NULL) return; + if (page->free != NULL) return true; #endif - if (page->capacity >= page->reserved) return; + if (page->capacity >= page->reserved) return true; mi_stat_counter_increase(tld->stats.pages_extended, 1); @@ -673,6 +672,7 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld) page->capacity += (uint16_t)extend; mi_stat_increase(tld->stats.page_committed, extend * bsize); mi_assert_expensive(mi_page_is_valid_init(page)); + return true; } // Initialize a fresh page @@ -727,8 +727,10 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_expensive(mi_page_is_valid_init(page)); // initialize an initial free list - mi_page_extend_free(heap,page,tld); - mi_assert(mi_page_immediate_available(page)); + if (mi_page_extend_free(heap,page,tld)) { + mi_assert(mi_page_immediate_available(page)); + } + return; } @@ -820,9 +822,14 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p if (page_candidate != NULL) { page = page_candidate; } - if (page != NULL && !mi_page_immediate_available(page)) { - mi_assert_internal(mi_page_is_expandable(page)); - mi_page_extend_free(heap, page, heap->tld); + if (page != NULL) { + if (!mi_page_immediate_available(page)) { + mi_assert_internal(mi_page_is_expandable(page)); + if (!mi_page_extend_free(heap, page, heap->tld)) { + page = NULL; // failed to extend + } + } + mi_assert_internal(page == NULL || mi_page_immediate_available(page)); } if (page == NULL) { diff --git a/src/prim/emscripten/prim.c b/src/prim/emscripten/prim.c index a8677cbc..c4cfc35d 100644 --- a/src/prim/emscripten/prim.c +++ b/src/prim/emscripten/prim.c @@ -114,6 +114,11 @@ int _mi_prim_reset(void* addr, size_t size) { return 0; } +int _mi_prim_reuse(void* addr, size_t size) { + MI_UNUSED(addr); MI_UNUSED(size); + return 0; +} + int _mi_prim_protect(void* addr, size_t size, bool protect) { MI_UNUSED(addr); MI_UNUSED(size); MI_UNUSED(protect); return 0; diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c index 99215494..c1fa9f7c 100644 --- a/src/prim/unix/prim.c +++ b/src/prim/unix/prim.c @@ -429,13 +429,27 @@ int _mi_prim_commit(void* start, size_t size, bool* is_zero) { return err; } +int _mi_prim_reuse(void* start, size_t size) { + #if defined(__APPLE__) && defined(MADV_FREE_REUSE) + return unix_madvise(start, size, MADV_FREE_REUSE); + #endif + return 0; +} + int _mi_prim_decommit(void* start, size_t size, bool* needs_recommit) { int err = 0; - // decommit: use MADV_DONTNEED as it decreases rss immediately (unlike MADV_FREE) - err = unix_madvise(start, size, MADV_DONTNEED); #if !MI_DEBUG && MI_SECURE<=2 *needs_recommit = false; + #if defined(__APPLE__) && defined(MADV_FREE_REUSABLE) + // decommit on macOS: use MADV_FREE_REUSABLE as it does immediate rss accounting (issue #1097) + err = unix_madvise(start, size, MADV_FREE_REUSABLE); + #else + // decommit: use MADV_DONTNEED as it decreases rss immediately (unlike MADV_FREE) + err = unix_madvise(start, size, MADV_DONTNEED); + #endif #else + // note: don't use MADV_FREE_REUSABLE as the range may contain protected areas + err = unix_madvise(start, size, MADV_DONTNEED); *needs_recommit = true; mprotect(start, size, PROT_NONE); #endif @@ -450,14 +464,21 @@ int _mi_prim_decommit(void* start, size_t size, bool* needs_recommit) { } int _mi_prim_reset(void* start, size_t size) { - // We try to use `MADV_FREE` as that is the fastest. A drawback though is that it + int err = 0; + #if defined(__APPLE__) && defined(MADV_FREE_REUSABLE) + // on macOS we try to use MADV_FREE_REUSABLE as it seems the fastest + err = unix_madvise(start, size, MADV_FREE_REUSABLE); + if (err == 0) return 0; + // fall through + #endif + + #if defined(MADV_FREE) + // Otherwise, we try to use `MADV_FREE` as that is the fastest. A drawback though is that it // will not reduce the `rss` stats in tools like `top` even though the memory is available // to other processes. With the default `MIMALLOC_PURGE_DECOMMITS=1` we ensure that by // default `MADV_DONTNEED` is used though. - #if defined(MADV_FREE) static _Atomic(size_t) advice = MI_ATOMIC_VAR_INIT(MADV_FREE); int oadvice = (int)mi_atomic_load_relaxed(&advice); - int err; while ((err = unix_madvise(start, size, oadvice)) != 0 && errno == EAGAIN) { errno = 0; }; if (err != 0 && errno == EINVAL && oadvice == MADV_FREE) { // if MADV_FREE is not supported, fall back to MADV_DONTNEED from now on @@ -465,7 +486,7 @@ int _mi_prim_reset(void* start, size_t size) { err = unix_madvise(start, size, MADV_DONTNEED); } #else - int err = unix_madvise(start, size, MADV_DONTNEED); + err = unix_madvise(start, size, MADV_DONTNEED); #endif return err; } diff --git a/src/prim/wasi/prim.c b/src/prim/wasi/prim.c index e1e7de5e..745a41fd 100644 --- a/src/prim/wasi/prim.c +++ b/src/prim/wasi/prim.c @@ -149,6 +149,11 @@ int _mi_prim_reset(void* addr, size_t size) { return 0; } +int _mi_prim_reuse(void* addr, size_t size) { + MI_UNUSED(addr); MI_UNUSED(size); + return 0; +} + int _mi_prim_protect(void* addr, size_t size, bool protect) { MI_UNUSED(addr); MI_UNUSED(size); MI_UNUSED(protect); return 0; diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c index b82918c1..df941af9 100644 --- a/src/prim/windows/prim.c +++ b/src/prim/windows/prim.c @@ -352,6 +352,11 @@ int _mi_prim_reset(void* addr, size_t size) { return (p != NULL ? 0 : (int)GetLastError()); } +int _mi_prim_reuse(void* addr, size_t size) { + MI_UNUSED(addr); MI_UNUSED(size); + return 0; +} + int _mi_prim_protect(void* addr, size_t size, bool protect) { DWORD oldprotect = 0; BOOL ok = VirtualProtect(addr, size, protect ? PAGE_NOACCESS : PAGE_READWRITE, &oldprotect);