From 6b6607ff5906be4afdc768c8c0676b595ec1afe2 Mon Sep 17 00:00:00 2001 From: Daan Date: Mon, 22 Apr 2024 11:16:27 -0700 Subject: [PATCH 01/18] update readme --- readme.md | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/readme.md b/readme.md index 5ca499d6..05268a87 100644 --- a/readme.md +++ b/readme.md @@ -12,8 +12,8 @@ is a general purpose allocator with excellent [performance](#performance) charac Initially developed by Daan Leijen for the runtime systems of the [Koka](https://koka-lang.github.io) and [Lean](https://github.com/leanprover/lean) languages. -Latest release tag: `v2.1.4` (2024-04-22). -Latest stable tag: `v1.8.4` (2024-04-22). +Latest release tag: `v2.1.4` (2024-04-22). +Latest v1 tag: `v1.8.4` (2024-04-22). mimalloc is a drop-in replacement for `malloc` and can be used in other programs without code changes, for example, on dynamically linked ELF-based systems (Linux, BSD, etc.) you can use it as: @@ -72,11 +72,13 @@ Enjoy! * `master`: latest stable release (based on `dev-slice`). * `dev`: development branch for mimalloc v1. Use this branch for submitting PR's. -* `dev-slice`: development branch for mimalloc v2. This branch is downstream of `dev`. +* `dev-slice`: development branch for mimalloc v2. This branch is downstream of `dev` (and is essentially equal to `dev` except for +`src/segment.c`) ### Releases -Note: the `v2.x` version has a new algorithm for managing internal mimalloc pages that tends to use reduce memory usage +Note: the `v2.x` version has a different algorithm for managing internal mimalloc pages (as slices) that tends to use reduce +memory usage and fragmentation compared to mimalloc `v1.x` (especially for large workloads). Should otherwise have similar performance (see [below](#performance)); please report if you observe any significant performance regression. From 77eb3a366b0355a0a90e2a0a8b6ac03757b4a1f6 Mon Sep 17 00:00:00 2001 From: Daan Date: Mon, 22 Apr 2024 11:25:26 -0700 Subject: [PATCH 02/18] bump version to 1.8.5 for further development --- cmake/mimalloc-config-version.cmake | 2 +- include/mimalloc.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/mimalloc-config-version.cmake b/cmake/mimalloc-config-version.cmake index e9b7d113..af85d979 100644 --- a/cmake/mimalloc-config-version.cmake +++ b/cmake/mimalloc-config-version.cmake @@ -1,6 +1,6 @@ set(mi_version_major 1) set(mi_version_minor 8) -set(mi_version_patch 4) +set(mi_version_patch 5) set(mi_version ${mi_version_major}.${mi_version_minor}) set(PACKAGE_VERSION ${mi_version}) diff --git a/include/mimalloc.h b/include/mimalloc.h index 9848d531..7523c8a2 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MIMALLOC_H #define MIMALLOC_H -#define MI_MALLOC_VERSION 184 // major + 2 digits minor +#define MI_MALLOC_VERSION 185 // major + 2 digits minor // ------------------------------------------------------ // Compiler specific attributes From 146f9d2333bc0aeed1593dc659b17e65b38a33e7 Mon Sep 17 00:00:00 2001 From: Daan Date: Wed, 24 Apr 2024 19:48:04 -0700 Subject: [PATCH 03/18] make TLS slot default instead of __builtin_thread_pointer. Potentially fixes build errors on various platforms (see issue #883) --- include/mimalloc/prim.h | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h index f8a40323..6063d44a 100644 --- a/include/mimalloc/prim.h +++ b/include/mimalloc/prim.h @@ -203,11 +203,14 @@ static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexce #endif -// Do we have __builtin_thread_pointer? (do not make this a compound test as it fails on older gcc's, see issue #851) -#if defined(__has_builtin) -#if __has_builtin(__builtin_thread_pointer) -#define MI_HAS_BUILTIN_THREAD_POINTER 1 -#endif +// Do we have __builtin_thread_pointer? (do not make this a compound test as that fails on older gcc's, see issue #851) +#ifdef __has_builtin + #if __has_builtin(__builtin_thread_pointer) + #if (!defined(__APPLE__)) && /* on apple (M1) the wrong register is read (tpidr_el0 instead of tpidrro_el0) so fall back to TLS slot assembly ()*/ \ + (!defined(__clang_major__) || __clang_major__ >= 14) // older clang versions emit bad code; fall back to using the TLS slot () + #define MI_HAS_BUILTIN_THREAD_POINTER 1 + #endif + #endif #elif defined(__GNUC__) && (__GNUC__ >= 7) && defined(__aarch64__) // special case aarch64 for older gcc versions (issue #851) #define MI_HAS_BUILTIN_THREAD_POINTER 1 #endif @@ -231,15 +234,6 @@ static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { return (uintptr_t)NtCurrentTeb(); } -#elif MI_HAS_BUILTIN_THREAD_POINTER && \ - (!defined(__APPLE__)) && /* on apple (M1) the wrong register is read (tpidr_el0 instead of tpidrro_el0) so fall back to TLS slot assembly ()*/ \ - (!defined(__clang_major__) || __clang_major__ >= 14) // older clang versions emit bad code; fall back to using the TLS slot () - -static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { - // Works on most Unix based platforms - return (uintptr_t)__builtin_thread_pointer(); -} - #elif defined(MI_HAS_TLS_SLOT) static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { @@ -255,6 +249,13 @@ static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { #endif } +#elif MI_HAS_BUILTIN_THREAD_POINTER + +static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { + // Works on most Unix based platforms + return (uintptr_t)__builtin_thread_pointer(); +} + #else // otherwise use portable C, taking the address of a thread local variable (this is still very fast on most platforms). From cc3c14f2ed374f908e60a3bf29c1dff84fc8cfc2 Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 26 Apr 2024 23:34:10 -0700 Subject: [PATCH 04/18] use builtin_thread_pointer only on non-apple arm64 (issue #883 and #851) --- include/mimalloc/prim.h | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h index 6063d44a..89266817 100644 --- a/include/mimalloc/prim.h +++ b/include/mimalloc/prim.h @@ -203,16 +203,14 @@ static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexce #endif -// Do we have __builtin_thread_pointer? (do not make this a compound test as that fails on older gcc's, see issue #851) -#ifdef __has_builtin - #if __has_builtin(__builtin_thread_pointer) - #if (!defined(__APPLE__)) && /* on apple (M1) the wrong register is read (tpidr_el0 instead of tpidrro_el0) so fall back to TLS slot assembly ()*/ \ - (!defined(__clang_major__) || __clang_major__ >= 14) // older clang versions emit bad code; fall back to using the TLS slot () - #define MI_HAS_BUILTIN_THREAD_POINTER 1 - #endif - #endif -#elif defined(__GNUC__) && (__GNUC__ >= 7) && defined(__aarch64__) // special case aarch64 for older gcc versions (issue #851) -#define MI_HAS_BUILTIN_THREAD_POINTER 1 +// Do we have __builtin_thread_pointer? This would be the preferred way to get a unique thread id +// but unfortunately, it seems we cannot test for this reliably at this time (see issue #883) +// Nevertheless, it seems needed on older graviton platforms (see issue #851). +// For now, we only enable this for specific platforms. +#if defined(__GNUC__) && (__GNUC__ >= 7) && defined(__aarch64__) /* special case aarch64 for older gcc versions (issue #851) */ \ + && !defined(__APPLE__) /* on apple (M1) the wrong register is read (tpidr_el0 instead of tpidrro_el0) so fall back to TLS slot assembly ()*/ \ + && (!defined(__clang_major__) || __clang_major__ >= 14) /* older clang versions emit bad code; fall back to using the TLS slot () */ +#define MI_USE_BUILTIN_THREAD_POINTER 1 #endif @@ -234,6 +232,13 @@ static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { return (uintptr_t)NtCurrentTeb(); } +#elif MI_USE_BUILTIN_THREAD_POINTER + +static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { + // Works on most Unix based platforms with recent compilers + return (uintptr_t)__builtin_thread_pointer(); +} + #elif defined(MI_HAS_TLS_SLOT) static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { @@ -249,13 +254,6 @@ static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { #endif } -#elif MI_HAS_BUILTIN_THREAD_POINTER - -static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { - // Works on most Unix based platforms - return (uintptr_t)__builtin_thread_pointer(); -} - #else // otherwise use portable C, taking the address of a thread local variable (this is still very fast on most platforms). From 0d22807e91f3bc416cb70bcb0a39b0f05bc515eb Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 7 May 2024 13:22:13 -0700 Subject: [PATCH 05/18] Emscripten: Remove no-longer-needed minimum emmalloc alignment --- src/prim/emscripten/prim.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/prim/emscripten/prim.c b/src/prim/emscripten/prim.c index c0fa0f4a..8b011b4d 100644 --- a/src/prim/emscripten/prim.c +++ b/src/prim/emscripten/prim.c @@ -78,17 +78,10 @@ int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_la // That assumes no one else uses sbrk but us (they could go up, // scribble, and then down), but we could assert on that perhaps. *is_zero = false; - // emmalloc has some limitations on alignment size. - // TODO: Why does mimalloc ask for an align of 4MB? that ends up allocating - // 8, which wastes quite a lot for us in wasm. If that is unavoidable, - // we may want to improve emmalloc to support such alignment. See also - // https://github.com/emscripten-core/emscripten/issues/20645 + // emmalloc has a minimum alignment size. #define MIN_EMMALLOC_ALIGN 8 - #define MAX_EMMALLOC_ALIGN (1024*1024) if (try_alignment < MIN_EMMALLOC_ALIGN) { try_alignment = MIN_EMMALLOC_ALIGN; - } else if (try_alignment > MAX_EMMALLOC_ALIGN) { - try_alignment = MAX_EMMALLOC_ALIGN; } void* p = emmalloc_memalign(try_alignment, size); *addr = p; From 1ebc28a8ff169aa484c38a795b8f29ae6a983cdd Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 10 May 2024 15:58:37 -0700 Subject: [PATCH 06/18] update comment --- include/mimalloc/prim.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h index 89266817..4ee6d43f 100644 --- a/include/mimalloc/prim.h +++ b/include/mimalloc/prim.h @@ -130,8 +130,9 @@ void _mi_prim_thread_associate_default_heap(mi_heap_t* heap); // If you test on another platform and it works please send a PR :-) // see also https://akkadia.org/drepper/tls.pdf for more info on the TLS register. // -// Note: on most platforms this is not actually used anymore as we prefer `__builtin_thread_pointer()` nowadays. -// However, we do still use it with older clang compilers and Apple OS (as we use TLS slot for the default heap there). +// Note: we would like to prefer `__builtin_thread_pointer()` nowadays instead of using assembly, +// but unfortunately we can not detect support reliably (see issue #883) +// We also use it on Apple OS as we use a TLS slot for the default heap there. #if defined(__GNUC__) && ( \ (defined(__GLIBC__) && (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))) \ || (defined(__APPLE__) && (defined(__x86_64__) || defined(__aarch64__) || defined(__POWERPC__))) \ From e5267a31b03b5ada98b544a421c2a0fbd082bf39 Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 10 May 2024 17:03:42 -0700 Subject: [PATCH 07/18] only override strdup/strndup if those are not macros (issue #885) --- src/alloc-override.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/alloc-override.c b/src/alloc-override.c index 75afc202..12837cdd 100644 --- a/src/alloc-override.c +++ b/src/alloc-override.c @@ -136,8 +136,11 @@ typedef void* mi_nothrow_t; mi_decl_export void* realloc(void* p, size_t newsize) MI_FORWARD2(mi_realloc, p, newsize) mi_decl_export void free(void* p) MI_FORWARD0(mi_free, p) // In principle we do not need to forward `strdup`/`strndup` but on some systems these do not use `malloc` internally (but a more primitive call) + // We only override if `strdup` is not a macro (as on some older libc's, see issue #885) + #if !defined(strdup) mi_decl_export char* strdup(const char* str) MI_FORWARD1(mi_strdup, str) - #if !defined(__APPLE__) || (defined(MAC_OS_X_VERSION_10_7) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_7) + #endif + #if !defined(strndup) && (!defined(__APPLE__) || (defined(MAC_OS_X_VERSION_10_7) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_7)) mi_decl_export char* strndup(const char* str, size_t n) MI_FORWARD2(mi_strndup, str, n) #endif #endif From 6c5d6e1f721cd3eb369b93e4a1931d180a55a873 Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 10 May 2024 17:24:52 -0700 Subject: [PATCH 08/18] fix max allocation size on 32-bit systems (issue #882) --- include/mimalloc/types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index 97438569..e2b9ce38 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -214,7 +214,7 @@ typedef int32_t mi_ssize_t; // we never allocate more than PTRDIFF_MAX (see also ) // on 64-bit+ systems we also limit the maximum allocation size such that the slice count fits in 32-bits. (issue #877) -#if PTRDIFF_MAX >= (MI_SEGMENT_SLIZE_SIZE * UINT32_MAX) +#if (PTRDIFF_MAX > INT32_MAX) && (PTRDIFF_MAX >= (MI_SEGMENT_SLIZE_SIZE * UINT32_MAX)) #define MI_MAX_ALLOC_SIZE (MI_SEGMENT_SLICE_SIZE * (UINT32_MAX-1)) #else #define MI_MAX_ALLOC_SIZE PTRDIFF_MAX From 605c354bd43450679d617f56fcab0262ac58be11 Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 10 May 2024 17:31:00 -0700 Subject: [PATCH 09/18] increase segment map for asan builds (issue #881) --- src/segment-map.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/segment-map.c b/src/segment-map.c index a306ec67..1efb1e23 100644 --- a/src/segment-map.c +++ b/src/segment-map.c @@ -16,7 +16,9 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc/internal.h" #include "mimalloc/atomic.h" -#if (MI_INTPTR_SIZE==8) +#if (MI_INTPTR_SIZE>=8) && MI_TRACK_ASAN +#define MI_MAX_ADDRESS ((size_t)140 << 40) // 140TB (see issue #881) +#elif (MI_INTPTR_SIZE >= 8) #define MI_MAX_ADDRESS ((size_t)40 << 40) // 40TB (to include huge page areas) #else #define MI_MAX_ADDRESS ((size_t)2 << 30) // 2Gb From c70c1df16a48f214f877573ddfed68dc183ef113 Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 10 May 2024 20:19:17 -0700 Subject: [PATCH 10/18] better fast path for aligned allocation; check max alloc size correctly in the aligned fallback --- src/alloc-aligned.c | 33 ++++++++++++++++----------------- test/test-api.c | 17 +++++++++++++++++ 2 files changed, 33 insertions(+), 17 deletions(-) diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index b63c5e43..e5a42357 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -15,17 +15,23 @@ terms of the MIT license. A copy of the license can be found in the file // Aligned Allocation // ------------------------------------------------------ +static inline bool mi_is_naturally_aligned( size_t size, size_t alignment ) { + // objects up to `MI_MEDIUM_OBJ_SIZE_MAX` are allocated aligned to their size (see `segment.c:_mi_segment_page_start`). + // note: the size may not be not an actual bin-size but it turns out the test below is still correct for our + // powers of two bin spacing (see test-api.c:test-aligned13). + mi_assert_internal(_mi_is_power_of_two(alignment) && (alignment > 0)); + return (size <= MI_MEDIUM_OBJ_SIZE_MAX && alignment <= size && ((size + MI_PADDING_SIZE) & (alignment-1)) == 0); +} + + // Fallback primitive aligned allocation -- split out for better codegen static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t* const heap, const size_t size, const size_t alignment, const size_t offset, const bool zero) mi_attr_noexcept { - mi_assert_internal(size <= PTRDIFF_MAX); + mi_assert_internal(size <= (MI_MAX_ALLOC_SIZE - MI_PADDING_SIZE)); mi_assert_internal(alignment != 0 && _mi_is_power_of_two(alignment)); - const uintptr_t align_mask = alignment - 1; // for any x, `(x & align_mask) == (x % alignment)` - const size_t padsize = size + MI_PADDING_SIZE; - - // use regular allocation if it is guaranteed to fit the alignment constraints - if (offset == 0 && alignment <= padsize && padsize <= MI_MEDIUM_OBJ_SIZE_MAX && (padsize & align_mask) == 0) { + // use regular allocation if it is guaranteed to fit the alignment constraints. + if (offset == 0 && mi_is_naturally_aligned(size,alignment)) { void* p = _mi_heap_malloc_zero(heap, size, zero); mi_assert_internal(p == NULL || ((uintptr_t)p % alignment) == 0); return p; @@ -57,6 +63,7 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t* } // .. and align within the allocation + const uintptr_t align_mask = alignment - 1; // for any x, `(x & align_mask) == (x % alignment)` const uintptr_t poffset = ((uintptr_t)p + offset) & align_mask; const uintptr_t adjust = (poffset == 0 ? 0 : alignment - poffset); mi_assert_internal(adjust < alignment); @@ -100,14 +107,14 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t return NULL; } - if mi_unlikely(size > PTRDIFF_MAX) { // we don't allocate more than PTRDIFF_MAX (see ) + if mi_unlikely(size > (MI_MAX_ALLOC_SIZE - MI_PADDING_SIZE)) { // we don't allocate more than MI_MAX_ALLOC_SIZE (see ) #if MI_DEBUG > 0 _mi_error_message(EOVERFLOW, "aligned allocation request is too large (size %zu, alignment %zu)\n", size, alignment); #endif return NULL; } const uintptr_t align_mask = alignment-1; // for any x, `(x & align_mask) == (x % alignment)` - const size_t padsize = size + MI_PADDING_SIZE; // note: cannot overflow due to earlier size > PTRDIFF_MAX check + const size_t padsize = size + MI_PADDING_SIZE; // note: cannot overflow due to earlier size check // try first if there happens to be a small block available with just the right alignment if mi_likely(padsize <= MI_SMALL_SIZE_MAX && alignment <= padsize) { @@ -140,15 +147,7 @@ mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc_aligned_at(mi_heap_t* he mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept { if (alignment == 0 || !_mi_is_power_of_two(alignment)) return NULL; - #if !MI_PADDING - // without padding, any small sized allocation is naturally aligned (see also `_mi_segment_page_start`) - if mi_likely(_mi_is_power_of_two(size) && size >= alignment && size <= MI_SMALL_SIZE_MAX) - #else - // with padding, we can only guarantee this for fixed alignments - if mi_likely((alignment == sizeof(void*) || (alignment == MI_MAX_ALIGN_SIZE && size > (MI_MAX_ALIGN_SIZE/2))) - && size <= MI_SMALL_SIZE_MAX) - #endif - { + if (size <= MI_SMALL_SIZE_MAX && mi_is_naturally_aligned(size,alignment)) { // fast path for common alignment and size return mi_heap_malloc_small(heap, size); } diff --git a/test/test-api.c b/test/test-api.c index 75955c49..34bfa0e6 100644 --- a/test/test-api.c +++ b/test/test-api.c @@ -230,6 +230,23 @@ int main(void) { result = (((uintptr_t)p % 0x100) == 0); // #602 mi_free(p); } + CHECK_BODY("mimalloc-aligned13") { + bool ok = true; + for( size_t size = 1; size <= MI_SMALL_SIZE_MAX && ok; size++ ) { + for(size_t align = 1; align <= size && ok; align *= 2 ) { + void* p = mi_malloc_aligned(size,align); + ok = (p != NULL && ((uintptr_t)p % align) == 0); + mi_free(p); + /* + if (ok && align <= size && ((size + MI_PADDING_SIZE) & (align-1)) == 0) { + size_t bsize = mi_good_size(size); + ok = (align <= bsize && ((bsize + MI_PADDING_SIZE) & (align-1)) == 0); + } + */ + } + } + result = ok; + } CHECK_BODY("malloc-aligned-at1") { void* p = mi_malloc_aligned_at(48,32,0); result = (p != NULL && ((uintptr_t)(p) + 0) % 32 == 0); mi_free(p); }; From 7128db7bbaf52dea5c68e88fd4f42c6ee02b9d60 Mon Sep 17 00:00:00 2001 From: Daan Date: Sat, 11 May 2024 06:43:52 -0700 Subject: [PATCH 11/18] simplified aligned allocation; improved codegen; fix mi_good_size with padding included; add MI_MAX_ALIGN_GUARANTEE --- include/mimalloc/internal.h | 4 +- include/mimalloc/types.h | 3 ++ src/alloc-aligned.c | 75 +++++++++++++++++++------------------ src/alloc.c | 22 +++++++---- src/page-queue.c | 4 +- src/page.c | 4 +- src/segment.c | 12 +++--- 7 files changed, 69 insertions(+), 55 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index c944a126..688dba0b 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -196,7 +196,9 @@ mi_msecs_t _mi_clock_end(mi_msecs_t start); mi_msecs_t _mi_clock_start(void); // "alloc.c" -void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept; // called from `_mi_malloc_generic` +void* _mi_page_malloc_zero(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept; // called from `_mi_malloc_generic` +void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept; // called from `_mi_heap_malloc_aligned` +void* _mi_page_malloc_zeroed(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept; // called from `_mi_heap_malloc_aligned` void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept; void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept; // called from `_mi_heap_malloc_aligned` void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) mi_attr_noexcept; diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index 761c7278..bccd6115 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -200,6 +200,9 @@ typedef int32_t mi_ssize_t; #error "mimalloc internal: define more bins" #endif +// Maximum block size for which blocks are guarenteed to be block size aligned. (see `segment.c:_mi_segment_page_start`) +#define MI_MAX_ALIGN_GUARANTEE (MI_MEDIUM_OBJ_SIZE_MAX) + // Alignments over MI_BLOCK_ALIGNMENT_MAX are allocated in dedicated huge page segments #define MI_BLOCK_ALIGNMENT_MAX (MI_SEGMENT_SIZE >> 1) diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index e5a42357..d80a6753 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -15,15 +15,15 @@ terms of the MIT license. A copy of the license can be found in the file // Aligned Allocation // ------------------------------------------------------ -static inline bool mi_is_naturally_aligned( size_t size, size_t alignment ) { - // objects up to `MI_MEDIUM_OBJ_SIZE_MAX` are allocated aligned to their size (see `segment.c:_mi_segment_page_start`). - // note: the size may not be not an actual bin-size but it turns out the test below is still correct for our - // powers of two bin spacing (see test-api.c:test-aligned13). +static bool mi_malloc_is_naturally_aligned( size_t size, size_t alignment ) { + // objects up to `MI_MAX_ALIGN_GUARANTEE` are allocated aligned to their size (see `segment.c:_mi_segment_page_start`). mi_assert_internal(_mi_is_power_of_two(alignment) && (alignment > 0)); - return (size <= MI_MEDIUM_OBJ_SIZE_MAX && alignment <= size && ((size + MI_PADDING_SIZE) & (alignment-1)) == 0); + if (alignment > size) return false; + if (alignment <= MI_MAX_ALIGN_SIZE) return true; + const size_t bsize = mi_good_size(size); + return (bsize <= MI_MAX_ALIGN_GUARANTEE && (bsize & (alignment-1)) == 0); } - // Fallback primitive aligned allocation -- split out for better codegen static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t* const heap, const size_t size, const size_t alignment, const size_t offset, const bool zero) mi_attr_noexcept { @@ -31,10 +31,18 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t* mi_assert_internal(alignment != 0 && _mi_is_power_of_two(alignment)); // use regular allocation if it is guaranteed to fit the alignment constraints. - if (offset == 0 && mi_is_naturally_aligned(size,alignment)) { + if (offset == 0 && mi_malloc_is_naturally_aligned(size,alignment)) { void* p = _mi_heap_malloc_zero(heap, size, zero); mi_assert_internal(p == NULL || ((uintptr_t)p % alignment) == 0); - return p; + const bool is_aligned_or_null = (((uintptr_t)p) & (alignment-1))==0; + if mi_likely(is_aligned_or_null) { + return p; + } + else { + // this should never happen if the `mi_malloc_is_naturally_aligned` check is correct.. + mi_assert(false); + mi_free(p); + } } void* p; @@ -106,33 +114,35 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t #endif return NULL; } + + // try first if there happens to be a small block available with just the right alignment + if mi_likely(size <= MI_SMALL_SIZE_MAX && alignment <= size) { + const uintptr_t align_mask = alignment-1; // for any x, `(x & align_mask) == (x % alignment)` + const size_t padsize = size + MI_PADDING_SIZE; + mi_page_t* page = _mi_heap_get_free_small_page(heap, padsize); + if mi_likely(page->free != NULL) { + const bool is_aligned = (((uintptr_t)page->free + offset) & align_mask)==0; + if mi_likely(is_aligned) + { + #if MI_STAT>1 + mi_heap_stat_increase(heap, malloc, size); + #endif + void* p = (zero ? _mi_page_malloc_zeroed(heap,page,padsize) : _mi_page_malloc(heap,page,padsize)); // call specific page malloc for better codegen + mi_assert_internal(p != NULL); + mi_assert_internal(((uintptr_t)p + offset) % alignment == 0); + mi_track_malloc(p,size,zero); + return p; + } + } + } + // fallback if mi_unlikely(size > (MI_MAX_ALLOC_SIZE - MI_PADDING_SIZE)) { // we don't allocate more than MI_MAX_ALLOC_SIZE (see ) #if MI_DEBUG > 0 _mi_error_message(EOVERFLOW, "aligned allocation request is too large (size %zu, alignment %zu)\n", size, alignment); #endif return NULL; } - const uintptr_t align_mask = alignment-1; // for any x, `(x & align_mask) == (x % alignment)` - const size_t padsize = size + MI_PADDING_SIZE; // note: cannot overflow due to earlier size check - - // try first if there happens to be a small block available with just the right alignment - if mi_likely(padsize <= MI_SMALL_SIZE_MAX && alignment <= padsize) { - mi_page_t* page = _mi_heap_get_free_small_page(heap, padsize); - const bool is_aligned = (((uintptr_t)page->free+offset) & align_mask)==0; - if mi_likely(page->free != NULL && is_aligned) - { - #if MI_STAT>1 - mi_heap_stat_increase(heap, malloc, size); - #endif - void* p = _mi_page_malloc(heap, page, padsize, zero); // TODO: inline _mi_page_malloc - mi_assert_internal(p != NULL); - mi_assert_internal(((uintptr_t)p + offset) % alignment == 0); - mi_track_malloc(p,size,zero); - return p; - } - } - // fallback return mi_heap_malloc_zero_aligned_at_fallback(heap, size, alignment, offset, zero); } @@ -146,14 +156,7 @@ mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc_aligned_at(mi_heap_t* he } mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept { - if (alignment == 0 || !_mi_is_power_of_two(alignment)) return NULL; - if (size <= MI_SMALL_SIZE_MAX && mi_is_naturally_aligned(size,alignment)) { - // fast path for common alignment and size - return mi_heap_malloc_small(heap, size); - } - else { - return mi_heap_malloc_aligned_at(heap, size, alignment, 0); - } + return mi_heap_malloc_aligned_at(heap, size, alignment, 0); } // ------------------------------------------------------ diff --git a/src/alloc.c b/src/alloc.c index 32175b0c..ab30fd53 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -28,7 +28,7 @@ terms of the MIT license. A copy of the license can be found in the file // Fast allocation in a page: just pop from the free list. // Fall back to generic allocation only if the list is empty. // Note: in release mode the (inlined) routine is about 7 instructions with a single test. -extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept +extern inline void* _mi_page_malloc_zero(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept { mi_assert_internal(page->block_size == 0 /* empty heap */ || mi_page_block_size(page) >= size); mi_block_t* const block = page->free; @@ -85,14 +85,14 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz #endif #if MI_PADDING // && !MI_TRACK_ENABLED - mi_padding_t* const padding = (mi_padding_t*)((uint8_t*)block + mi_page_usable_block_size(page)); - ptrdiff_t delta = ((uint8_t*)padding - (uint8_t*)block - (size - MI_PADDING_SIZE)); + mi_padding_t* const padding = (mi_padding_t*)((uint8_t*)block + mi_page_usable_block_size(page)); + ptrdiff_t delta = ((uint8_t*)padding - (uint8_t*)block - (size - MI_PADDING_SIZE)); #if (MI_DEBUG>=2) mi_assert_internal(delta >= 0 && mi_page_usable_block_size(page) >= (size - MI_PADDING_SIZE + delta)); #endif - mi_track_mem_defined(padding,sizeof(mi_padding_t)); // note: re-enable since mi_page_usable_block_size may set noaccess - padding->canary = (uint32_t)(mi_ptr_encode(page,block,page->keys)); - padding->delta = (uint32_t)(delta); + mi_track_mem_defined(padding,sizeof(mi_padding_t)); // note: re-enable since mi_page_usable_block_size may set noaccess + padding->canary = (uint32_t)(mi_ptr_encode(page,block,page->keys)); + padding->delta = (uint32_t)(delta); #if MI_PADDING_CHECK if (!mi_page_is_huge(page)) { uint8_t* fill = (uint8_t*)padding - delta; @@ -105,6 +105,14 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz return block; } +// extra entries for improved efficiency in `alloc-aligned.c`. +extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept { + return _mi_page_malloc_zero(heap,page,size,false); +} +extern inline void* _mi_page_malloc_zeroed(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept { + return _mi_page_malloc_zero(heap,page,size,true); +} + static inline mi_decl_restrict void* mi_heap_malloc_small_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept { mi_assert(heap != NULL); #if MI_DEBUG @@ -117,7 +125,7 @@ static inline mi_decl_restrict void* mi_heap_malloc_small_zero(mi_heap_t* heap, #endif mi_page_t* page = _mi_heap_get_free_small_page(heap, size + MI_PADDING_SIZE); - void* const p = _mi_page_malloc(heap, page, size + MI_PADDING_SIZE, zero); + void* const p = _mi_page_malloc_zero(heap, page, size + MI_PADDING_SIZE, zero); mi_track_malloc(p,size,zero); #if MI_STAT>1 diff --git a/src/page-queue.c b/src/page-queue.c index e4bfde14..02a8008d 100644 --- a/src/page-queue.c +++ b/src/page-queue.c @@ -113,10 +113,10 @@ size_t _mi_bin_size(uint8_t bin) { // Good size for allocation size_t mi_good_size(size_t size) mi_attr_noexcept { if (size <= MI_LARGE_OBJ_SIZE_MAX) { - return _mi_bin_size(mi_bin(size)); + return _mi_bin_size(mi_bin(size + MI_PADDING_SIZE)); } else { - return _mi_align_up(size,_mi_os_page_size()); + return _mi_align_up(size + MI_PADDING_SIZE,_mi_os_page_size()); } } diff --git a/src/page.c b/src/page.c index 7e188522..5a18b780 100644 --- a/src/page.c +++ b/src/page.c @@ -914,12 +914,12 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_al // and try again, this time succeeding! (i.e. this should never recurse through _mi_page_malloc) if mi_unlikely(zero && page->block_size == 0) { // note: we cannot call _mi_page_malloc with zeroing for huge blocks; we zero it afterwards in that case. - void* p = _mi_page_malloc(heap, page, size, false); + void* p = _mi_page_malloc(heap, page, size); mi_assert_internal(p != NULL); _mi_memzero_aligned(p, mi_page_usable_block_size(page)); return p; } else { - return _mi_page_malloc(heap, page, size, zero); + return _mi_page_malloc_zero(heap, page, size, zero); } } diff --git a/src/segment.c b/src/segment.c index b3fc60ee..cfd6c1a3 100644 --- a/src/segment.c +++ b/src/segment.c @@ -426,15 +426,13 @@ uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* pa size_t psize; uint8_t* p = mi_segment_raw_page_start(segment, page, &psize); const size_t block_size = mi_page_block_size(page); - if (page->segment_idx == 0 && block_size > 0 && segment->page_kind <= MI_PAGE_MEDIUM) { + if (/*page->segment_idx == 0 &&*/ block_size > 0 && block_size <= MI_MAX_ALIGN_GUARANTEE) { // for small and medium objects, ensure the page start is aligned with the block size (PR#66 by kickunderscore) + mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM); size_t adjust = block_size - ((uintptr_t)p % block_size); - if (psize - adjust >= block_size) { - if (adjust < block_size) { - p += adjust; - psize -= adjust; - // if (pre_size != NULL) *pre_size = adjust; - } + if (adjust < block_size && psize >= block_size + adjust) { + p += adjust; + psize -= adjust; mi_assert_internal((uintptr_t)p % block_size == 0); } } From 0dcdc55bbd47e055a0402c7e26cfbbb884e4156d Mon Sep 17 00:00:00 2001 From: Daan Date: Sat, 11 May 2024 07:09:30 -0700 Subject: [PATCH 12/18] better aligned test --- test/test-api.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/test/test-api.c b/test/test-api.c index 34bfa0e6..76101980 100644 --- a/test/test-api.c +++ b/test/test-api.c @@ -232,15 +232,20 @@ int main(void) { } CHECK_BODY("mimalloc-aligned13") { bool ok = true; - for( size_t size = 1; size <= MI_SMALL_SIZE_MAX && ok; size++ ) { + for( size_t size = 1; size <= (MI_SMALL_SIZE_MAX * 2) && ok; size++ ) { for(size_t align = 1; align <= size && ok; align *= 2 ) { - void* p = mi_malloc_aligned(size,align); - ok = (p != NULL && ((uintptr_t)p % align) == 0); - mi_free(p); + void* p[10]; + for(int i = 0; i < 10 && ok; i++) { + p[i] = mi_malloc_aligned(size,align);; + ok = (p[i] != NULL && ((uintptr_t)(p[i]) % align) == 0); + } + for(int i = 0; i < 10 && ok; i++) { + mi_free(p[i]); + } /* if (ok && align <= size && ((size + MI_PADDING_SIZE) & (align-1)) == 0) { size_t bsize = mi_good_size(size); - ok = (align <= bsize && ((bsize + MI_PADDING_SIZE) & (align-1)) == 0); + ok = (align <= bsize && (bsize & (align-1)) == 0); } */ } From b1ec1d5e5a1120cb2210f71e7276fcb678c4123e Mon Sep 17 00:00:00 2001 From: Daan Date: Sat, 11 May 2024 07:22:56 -0700 Subject: [PATCH 13/18] refactor aligned allocation --- include/mimalloc/types.h | 4 +-- src/alloc-aligned.c | 62 ++++++++++++++++++++++++---------------- 2 files changed, 39 insertions(+), 27 deletions(-) diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index bccd6115..35a3965e 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -200,13 +200,13 @@ typedef int32_t mi_ssize_t; #error "mimalloc internal: define more bins" #endif -// Maximum block size for which blocks are guarenteed to be block size aligned. (see `segment.c:_mi_segment_page_start`) +// Maximum block size for which blocks are guaranteed to be block size aligned. (see `segment.c:_mi_segment_page_start`) #define MI_MAX_ALIGN_GUARANTEE (MI_MEDIUM_OBJ_SIZE_MAX) // Alignments over MI_BLOCK_ALIGNMENT_MAX are allocated in dedicated huge page segments #define MI_BLOCK_ALIGNMENT_MAX (MI_SEGMENT_SIZE >> 1) -// we never allocate more than PTRDIFF_MAX (see also ) +// We never allocate more than PTRDIFF_MAX (see also ) #define MI_MAX_ALLOC_SIZE PTRDIFF_MAX // ------------------------------------------------------ diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index d80a6753..20c36044 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -24,27 +24,12 @@ static bool mi_malloc_is_naturally_aligned( size_t size, size_t alignment ) { return (bsize <= MI_MAX_ALIGN_GUARANTEE && (bsize & (alignment-1)) == 0); } -// Fallback primitive aligned allocation -- split out for better codegen -static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t* const heap, const size_t size, const size_t alignment, const size_t offset, const bool zero) mi_attr_noexcept +// Fallback aligned allocation that over-allocates -- split out for better codegen +static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_overalloc(mi_heap_t* const heap, const size_t size, const size_t alignment, const size_t offset, const bool zero) mi_attr_noexcept { mi_assert_internal(size <= (MI_MAX_ALLOC_SIZE - MI_PADDING_SIZE)); mi_assert_internal(alignment != 0 && _mi_is_power_of_two(alignment)); - // use regular allocation if it is guaranteed to fit the alignment constraints. - if (offset == 0 && mi_malloc_is_naturally_aligned(size,alignment)) { - void* p = _mi_heap_malloc_zero(heap, size, zero); - mi_assert_internal(p == NULL || ((uintptr_t)p % alignment) == 0); - const bool is_aligned_or_null = (((uintptr_t)p) & (alignment-1))==0; - if mi_likely(is_aligned_or_null) { - return p; - } - else { - // this should never happen if the `mi_malloc_is_naturally_aligned` check is correct.. - mi_assert(false); - mi_free(p); - } - } - void* p; size_t oversize; if mi_unlikely(alignment > MI_BLOCK_ALIGNMENT_MAX) { @@ -104,6 +89,39 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t* return aligned_p; } +// Generic primitive aligned allocation -- split out for better codegen +static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_generic(mi_heap_t* const heap, const size_t size, const size_t alignment, const size_t offset, const bool zero) mi_attr_noexcept +{ + mi_assert_internal(alignment != 0 && _mi_is_power_of_two(alignment)); + // we don't allocate more than MI_MAX_ALLOC_SIZE (see ) + if mi_unlikely(size > (MI_MAX_ALLOC_SIZE - MI_PADDING_SIZE)) { + #if MI_DEBUG > 0 + _mi_error_message(EOVERFLOW, "aligned allocation request is too large (size %zu, alignment %zu)\n", size, alignment); + #endif + return NULL; + } + + // use regular allocation if it is guaranteed to fit the alignment constraints. + // this is important to try as the fast path in `mi_heap_malloc_zero_aligned` only works when there exist + // a page with the right block size, and if we always use the over-alloc fallback that would never happen. + if (offset == 0 && mi_malloc_is_naturally_aligned(size,alignment)) { + void* p = _mi_heap_malloc_zero(heap, size, zero); + mi_assert_internal(p == NULL || ((uintptr_t)p % alignment) == 0); + const bool is_aligned_or_null = (((uintptr_t)p) & (alignment-1))==0; + if mi_likely(is_aligned_or_null) { + return p; + } + else { + // this should never happen if the `mi_malloc_is_naturally_aligned` check is correct.. + mi_assert(false); + mi_free(p); + } + } + + // fall back to over-allocation + return mi_heap_malloc_zero_aligned_at_overalloc(heap,size,alignment,offset,zero); +} + // Primitive aligned allocation static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t size, const size_t alignment, const size_t offset, const bool zero) mi_attr_noexcept { @@ -136,14 +154,8 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t } } - // fallback - if mi_unlikely(size > (MI_MAX_ALLOC_SIZE - MI_PADDING_SIZE)) { // we don't allocate more than MI_MAX_ALLOC_SIZE (see ) - #if MI_DEBUG > 0 - _mi_error_message(EOVERFLOW, "aligned allocation request is too large (size %zu, alignment %zu)\n", size, alignment); - #endif - return NULL; - } - return mi_heap_malloc_zero_aligned_at_fallback(heap, size, alignment, offset, zero); + // fallback to generic aligned allocation + return mi_heap_malloc_zero_aligned_at_generic(heap, size, alignment, offset, zero); } From 6c7cda592c2191c75ed53e940678e5d25ec1adb2 Mon Sep 17 00:00:00 2001 From: Daan Date: Sat, 11 May 2024 07:39:06 -0700 Subject: [PATCH 14/18] make page_malloc_zero externals not inline to avoid link errors in C++ mode --- src/alloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/alloc.c b/src/alloc.c index ab30fd53..ce24b8ec 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -106,10 +106,10 @@ extern inline void* _mi_page_malloc_zero(mi_heap_t* heap, mi_page_t* page, size_ } // extra entries for improved efficiency in `alloc-aligned.c`. -extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept { +extern void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept { return _mi_page_malloc_zero(heap,page,size,false); } -extern inline void* _mi_page_malloc_zeroed(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept { +extern void* _mi_page_malloc_zeroed(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept { return _mi_page_malloc_zero(heap,page,size,true); } From bb3802801cc95fac50a969d05f554d9737429e08 Mon Sep 17 00:00:00 2001 From: Daan Date: Sat, 11 May 2024 07:53:12 -0700 Subject: [PATCH 15/18] clarify parameters for emmalloc_memalign --- src/prim/emscripten/prim.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/prim/emscripten/prim.c b/src/prim/emscripten/prim.c index 8b011b4d..1f60a1bb 100644 --- a/src/prim/emscripten/prim.c +++ b/src/prim/emscripten/prim.c @@ -68,7 +68,7 @@ int _mi_prim_free(void* addr, size_t size) { // Allocation //--------------------------------------------- -extern void* emmalloc_memalign(size_t, size_t); +extern void* emmalloc_memalign(size_t alignment, size_t size); // Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned. int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) { From 0f6d8293c74796fa913e4b5eb4361f1e4734f7c6 Mon Sep 17 00:00:00 2001 From: Daan Date: Sat, 11 May 2024 08:08:03 -0700 Subject: [PATCH 16/18] use local dynamic tls for static MUSL builds (see issue #644) --- CMakeLists.txt | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a6c95dc4..2cc2fc46 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -58,6 +58,9 @@ set(mi_sources src/prim/prim.c) set(mi_cflags "") +set(mi_cflags_static "") # extra flags for a static library build +set(mi_cflags_dynamic "") # extra flags for a shared-object library build +set(mi_defines "") set(mi_libraries "") # ----------------------------------------------------------------------------- @@ -288,8 +291,7 @@ if(CMAKE_SYSTEM_NAME MATCHES "Linux|Android") endif() if(MI_LIBC_MUSL) - message(STATUS "Assume using musl libc (MI_LIBC_MUSL=ON) (this implies MI_LOCAL_DYNAMIC_TLS=ON)") - set(MI_LOCAL_DYNAMIC_TLS "ON") + message(STATUS "Assume using musl libc (MI_LIBC_MUSL=ON)") list(APPEND mi_defines MI_LIBC_MUSL=1) endif() @@ -318,7 +320,14 @@ if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU|Intel" AND NOT CMAKE_SYSTEM if(MI_LOCAL_DYNAMIC_TLS) list(APPEND mi_cflags -ftls-model=local-dynamic) else() - list(APPEND mi_cflags -ftls-model=initial-exec) + if(MI_LIBC_MUSL) + # with musl we use local-dynamic for the static build, see issue #644 + list(APPEND mi_cflags_static -ftls-model=local-dynamic) + list(APPEND mi_cflags_dynamic -ftls-model=initial-exec) + message(STATUS "Use local dynamic TLS for the static build (since MI_LIBC_MUSL=ON)") + else() + list(APPEND mi_cflags -ftls-model=initial-exec) + endif() endif() if(MI_OVERRIDE) list(APPEND mi_cflags -fno-builtin-malloc) @@ -426,7 +435,7 @@ if(MI_BUILD_SHARED) add_library(mimalloc SHARED ${mi_sources}) set_target_properties(mimalloc PROPERTIES VERSION ${mi_version} SOVERSION ${mi_version_major} OUTPUT_NAME ${mi_basename} ) target_compile_definitions(mimalloc PRIVATE ${mi_defines} MI_SHARED_LIB MI_SHARED_LIB_EXPORT) - target_compile_options(mimalloc PRIVATE ${mi_cflags}) + target_compile_options(mimalloc PRIVATE ${mi_cflags} ${mi_cflags_dynamic}) target_link_libraries(mimalloc PRIVATE ${mi_libraries}) target_include_directories(mimalloc PUBLIC $ @@ -456,7 +465,7 @@ if (MI_BUILD_STATIC) add_library(mimalloc-static STATIC ${mi_sources}) set_property(TARGET mimalloc-static PROPERTY POSITION_INDEPENDENT_CODE ON) target_compile_definitions(mimalloc-static PRIVATE ${mi_defines} MI_STATIC_LIB) - target_compile_options(mimalloc-static PRIVATE ${mi_cflags}) + target_compile_options(mimalloc-static PRIVATE ${mi_cflags} ${mi_cflags_static}) target_link_libraries(mimalloc-static PRIVATE ${mi_libraries}) target_include_directories(mimalloc-static PUBLIC $ @@ -488,7 +497,7 @@ if (MI_BUILD_OBJECT) add_library(mimalloc-obj OBJECT src/static.c) set_property(TARGET mimalloc-obj PROPERTY POSITION_INDEPENDENT_CODE ON) target_compile_definitions(mimalloc-obj PRIVATE ${mi_defines}) - target_compile_options(mimalloc-obj PRIVATE ${mi_cflags}) + target_compile_options(mimalloc-obj PRIVATE ${mi_cflags} ${mi_cflags_static}) target_include_directories(mimalloc-obj PUBLIC $ $ From 13e519202a678a9cb698be9fd38bf28d3c3b9143 Mon Sep 17 00:00:00 2001 From: Daan Date: Mon, 13 May 2024 09:02:40 -0700 Subject: [PATCH 17/18] fix comments; nicer strdup/strndup --- include/mimalloc/prim.h | 10 +++++----- src/alloc.c | 18 ++++++++---------- src/prim/windows/prim.c | 2 +- 3 files changed, 14 insertions(+), 16 deletions(-) diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h index 4ee6d43f..4d813b7f 100644 --- a/include/mimalloc/prim.h +++ b/include/mimalloc/prim.h @@ -14,17 +14,17 @@ terms of the MIT license. A copy of the license can be found in the file // Each OS/host needs to implement these primitives, see `src/prim` // for implementations on Window, macOS, WASI, and Linux/Unix. // -// note: on all primitive functions, we always have result parameters != NUL, and: +// note: on all primitive functions, we always have result parameters != NULL, and: // addr != NULL and page aligned // size > 0 and page aligned -// return value is an error code an int where 0 is success. +// the return value is an error code as an `int` where 0 is success // -------------------------------------------------------------------------- // OS memory configuration typedef struct mi_os_mem_config_s { - size_t page_size; // 4KiB - size_t large_page_size; // 2MiB - size_t alloc_granularity; // smallest allocation size (on Windows 64KiB) + size_t page_size; // default to 4KiB + size_t large_page_size; // 0 if not supported, usually 2MiB (4MiB on Windows) + size_t alloc_granularity; // smallest allocation size (usually 4KiB, on Windows 64KiB) bool has_overcommit; // can we reserve more memory than can be actually committed? bool must_free_whole; // must allocated blocks be freed as a whole (false for mmap, true for VirtualAlloc) bool has_virtual_reserve; // supports virtual address space reservation? (if true we can reserve virtual address space without using commit or physical memory) diff --git a/src/alloc.c b/src/alloc.c index ce24b8ec..6c9c5baf 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -329,11 +329,11 @@ mi_decl_nodiscard void* mi_recalloc(void* p, size_t count, size_t size) mi_attr_ // `strdup` using mi_malloc mi_decl_nodiscard mi_decl_restrict char* mi_heap_strdup(mi_heap_t* heap, const char* s) mi_attr_noexcept { if (s == NULL) return NULL; - size_t n = strlen(s); - char* t = (char*)mi_heap_malloc(heap,n+1); + size_t len = _mi_strlen(s); + char* t = (char*)mi_heap_malloc(heap,len+1); if (t == NULL) return NULL; - _mi_memcpy(t, s, n); - t[n] = 0; + _mi_memcpy(t, s, len); + t[len] = 0; return t; } @@ -344,13 +344,11 @@ mi_decl_nodiscard mi_decl_restrict char* mi_strdup(const char* s) mi_attr_noexce // `strndup` using mi_malloc mi_decl_nodiscard mi_decl_restrict char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n) mi_attr_noexcept { if (s == NULL) return NULL; - const char* end = (const char*)memchr(s, 0, n); // find end of string in the first `n` characters (returns NULL if not found) - const size_t m = (end != NULL ? (size_t)(end - s) : n); // `m` is the minimum of `n` or the end-of-string - mi_assert_internal(m <= n); - char* t = (char*)mi_heap_malloc(heap, m+1); + const size_t len = _mi_strnlen(s,n); // len <= n + char* t = (char*)mi_heap_malloc(heap, len+1); if (t == NULL) return NULL; - _mi_memcpy(t, s, m); - t[m] = 0; + _mi_memcpy(t, s, len); + t[len] = 0; return t; } diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c index e6b61079..2dd7c602 100644 --- a/src/prim/windows/prim.c +++ b/src/prim/windows/prim.c @@ -482,7 +482,7 @@ void _mi_prim_out_stderr( const char* msg ) // on windows with redirection, the C runtime cannot handle locale dependent output // after the main thread closes so we use direct console output. if (!_mi_preloading()) { - // _cputs(msg); // _cputs cannot be used at is aborts if it fails to lock the console + // _cputs(msg); // _cputs cannot be used as it aborts when failing to lock the console static HANDLE hcon = INVALID_HANDLE_VALUE; static bool hconIsConsole; if (hcon == INVALID_HANDLE_VALUE) { From eb2bab87d5d93f18eb3beb16d909ea31a40e5b33 Mon Sep 17 00:00:00 2001 From: Daan Date: Mon, 13 May 2024 09:12:38 -0700 Subject: [PATCH 18/18] update readme for new release --- readme.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/readme.md b/readme.md index 05268a87..c9c2cc00 100644 --- a/readme.md +++ b/readme.md @@ -12,8 +12,8 @@ is a general purpose allocator with excellent [performance](#performance) charac Initially developed by Daan Leijen for the runtime systems of the [Koka](https://koka-lang.github.io) and [Lean](https://github.com/leanprover/lean) languages. -Latest release tag: `v2.1.4` (2024-04-22). -Latest v1 tag: `v1.8.4` (2024-04-22). +Latest release tag: `v2.1.5` (2024-05-13). +Latest v1 tag: `v1.8.5` (2024-05-13). mimalloc is a drop-in replacement for `malloc` and can be used in other programs without code changes, for example, on dynamically linked ELF-based systems (Linux, BSD, etc.) you can use it as: @@ -82,6 +82,7 @@ memory usage and fragmentation compared to mimalloc `v1.x` (especially for large workloads). Should otherwise have similar performance (see [below](#performance)); please report if you observe any significant performance regression. +* 2024-05-13, `v1.8.5`, `v2.1.5`: Fix build errors on various (older) platforms. Refactored aligned allocation. * 2024-04-22, `v1.8.4`, `v2.1.4`: Fixes various bugs and build issues. Add `MI_LIBC_MUSL` cmake flag for musl builds. Free-ing code is refactored into a separate module (`free.c`). Mimalloc page info is simplified with the block size directly available (and new `block_size_shift` to improve aligned block free-ing).