diff --git a/CMakeLists.txt b/CMakeLists.txt index a6c95dc4..2cc2fc46 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -58,6 +58,9 @@ set(mi_sources src/prim/prim.c) set(mi_cflags "") +set(mi_cflags_static "") # extra flags for a static library build +set(mi_cflags_dynamic "") # extra flags for a shared-object library build +set(mi_defines "") set(mi_libraries "") # ----------------------------------------------------------------------------- @@ -288,8 +291,7 @@ if(CMAKE_SYSTEM_NAME MATCHES "Linux|Android") endif() if(MI_LIBC_MUSL) - message(STATUS "Assume using musl libc (MI_LIBC_MUSL=ON) (this implies MI_LOCAL_DYNAMIC_TLS=ON)") - set(MI_LOCAL_DYNAMIC_TLS "ON") + message(STATUS "Assume using musl libc (MI_LIBC_MUSL=ON)") list(APPEND mi_defines MI_LIBC_MUSL=1) endif() @@ -318,7 +320,14 @@ if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU|Intel" AND NOT CMAKE_SYSTEM if(MI_LOCAL_DYNAMIC_TLS) list(APPEND mi_cflags -ftls-model=local-dynamic) else() - list(APPEND mi_cflags -ftls-model=initial-exec) + if(MI_LIBC_MUSL) + # with musl we use local-dynamic for the static build, see issue #644 + list(APPEND mi_cflags_static -ftls-model=local-dynamic) + list(APPEND mi_cflags_dynamic -ftls-model=initial-exec) + message(STATUS "Use local dynamic TLS for the static build (since MI_LIBC_MUSL=ON)") + else() + list(APPEND mi_cflags -ftls-model=initial-exec) + endif() endif() if(MI_OVERRIDE) list(APPEND mi_cflags -fno-builtin-malloc) @@ -426,7 +435,7 @@ if(MI_BUILD_SHARED) add_library(mimalloc SHARED ${mi_sources}) set_target_properties(mimalloc PROPERTIES VERSION ${mi_version} SOVERSION ${mi_version_major} OUTPUT_NAME ${mi_basename} ) target_compile_definitions(mimalloc PRIVATE ${mi_defines} MI_SHARED_LIB MI_SHARED_LIB_EXPORT) - target_compile_options(mimalloc PRIVATE ${mi_cflags}) + target_compile_options(mimalloc PRIVATE ${mi_cflags} ${mi_cflags_dynamic}) target_link_libraries(mimalloc PRIVATE ${mi_libraries}) target_include_directories(mimalloc PUBLIC $ @@ -456,7 +465,7 @@ if (MI_BUILD_STATIC) add_library(mimalloc-static STATIC ${mi_sources}) set_property(TARGET mimalloc-static PROPERTY POSITION_INDEPENDENT_CODE ON) target_compile_definitions(mimalloc-static PRIVATE ${mi_defines} MI_STATIC_LIB) - target_compile_options(mimalloc-static PRIVATE ${mi_cflags}) + target_compile_options(mimalloc-static PRIVATE ${mi_cflags} ${mi_cflags_static}) target_link_libraries(mimalloc-static PRIVATE ${mi_libraries}) target_include_directories(mimalloc-static PUBLIC $ @@ -488,7 +497,7 @@ if (MI_BUILD_OBJECT) add_library(mimalloc-obj OBJECT src/static.c) set_property(TARGET mimalloc-obj PROPERTY POSITION_INDEPENDENT_CODE ON) target_compile_definitions(mimalloc-obj PRIVATE ${mi_defines}) - target_compile_options(mimalloc-obj PRIVATE ${mi_cflags}) + target_compile_options(mimalloc-obj PRIVATE ${mi_cflags} ${mi_cflags_static}) target_include_directories(mimalloc-obj PUBLIC $ $ diff --git a/cmake/mimalloc-config-version.cmake b/cmake/mimalloc-config-version.cmake index db881213..80158aed 100644 --- a/cmake/mimalloc-config-version.cmake +++ b/cmake/mimalloc-config-version.cmake @@ -1,6 +1,6 @@ set(mi_version_major 2) set(mi_version_minor 1) -set(mi_version_patch 4) +set(mi_version_patch 5) set(mi_version ${mi_version_major}.${mi_version_minor}) set(PACKAGE_VERSION ${mi_version}) diff --git a/include/mimalloc.h b/include/mimalloc.h index 1fa430d3..8f287783 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MIMALLOC_H #define MIMALLOC_H -#define MI_MALLOC_VERSION 214 // major + 2 digits minor +#define MI_MALLOC_VERSION 215 // major + 2 digits minor // ------------------------------------------------------ // Compiler specific attributes diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 3f03b460..44f4cafe 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -199,7 +199,9 @@ mi_msecs_t _mi_clock_end(mi_msecs_t start); mi_msecs_t _mi_clock_start(void); // "alloc.c" -void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept; // called from `_mi_malloc_generic` +void* _mi_page_malloc_zero(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept; // called from `_mi_malloc_generic` +void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept; // called from `_mi_heap_malloc_aligned` +void* _mi_page_malloc_zeroed(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept; // called from `_mi_heap_malloc_aligned` void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept; void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept; // called from `_mi_heap_malloc_aligned` void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) mi_attr_noexcept; diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h index f8a40323..4d813b7f 100644 --- a/include/mimalloc/prim.h +++ b/include/mimalloc/prim.h @@ -14,17 +14,17 @@ terms of the MIT license. A copy of the license can be found in the file // Each OS/host needs to implement these primitives, see `src/prim` // for implementations on Window, macOS, WASI, and Linux/Unix. // -// note: on all primitive functions, we always have result parameters != NUL, and: +// note: on all primitive functions, we always have result parameters != NULL, and: // addr != NULL and page aligned // size > 0 and page aligned -// return value is an error code an int where 0 is success. +// the return value is an error code as an `int` where 0 is success // -------------------------------------------------------------------------- // OS memory configuration typedef struct mi_os_mem_config_s { - size_t page_size; // 4KiB - size_t large_page_size; // 2MiB - size_t alloc_granularity; // smallest allocation size (on Windows 64KiB) + size_t page_size; // default to 4KiB + size_t large_page_size; // 0 if not supported, usually 2MiB (4MiB on Windows) + size_t alloc_granularity; // smallest allocation size (usually 4KiB, on Windows 64KiB) bool has_overcommit; // can we reserve more memory than can be actually committed? bool must_free_whole; // must allocated blocks be freed as a whole (false for mmap, true for VirtualAlloc) bool has_virtual_reserve; // supports virtual address space reservation? (if true we can reserve virtual address space without using commit or physical memory) @@ -130,8 +130,9 @@ void _mi_prim_thread_associate_default_heap(mi_heap_t* heap); // If you test on another platform and it works please send a PR :-) // see also https://akkadia.org/drepper/tls.pdf for more info on the TLS register. // -// Note: on most platforms this is not actually used anymore as we prefer `__builtin_thread_pointer()` nowadays. -// However, we do still use it with older clang compilers and Apple OS (as we use TLS slot for the default heap there). +// Note: we would like to prefer `__builtin_thread_pointer()` nowadays instead of using assembly, +// but unfortunately we can not detect support reliably (see issue #883) +// We also use it on Apple OS as we use a TLS slot for the default heap there. #if defined(__GNUC__) && ( \ (defined(__GLIBC__) && (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))) \ || (defined(__APPLE__) && (defined(__x86_64__) || defined(__aarch64__) || defined(__POWERPC__))) \ @@ -203,13 +204,14 @@ static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexce #endif -// Do we have __builtin_thread_pointer? (do not make this a compound test as it fails on older gcc's, see issue #851) -#if defined(__has_builtin) -#if __has_builtin(__builtin_thread_pointer) -#define MI_HAS_BUILTIN_THREAD_POINTER 1 -#endif -#elif defined(__GNUC__) && (__GNUC__ >= 7) && defined(__aarch64__) // special case aarch64 for older gcc versions (issue #851) -#define MI_HAS_BUILTIN_THREAD_POINTER 1 +// Do we have __builtin_thread_pointer? This would be the preferred way to get a unique thread id +// but unfortunately, it seems we cannot test for this reliably at this time (see issue #883) +// Nevertheless, it seems needed on older graviton platforms (see issue #851). +// For now, we only enable this for specific platforms. +#if defined(__GNUC__) && (__GNUC__ >= 7) && defined(__aarch64__) /* special case aarch64 for older gcc versions (issue #851) */ \ + && !defined(__APPLE__) /* on apple (M1) the wrong register is read (tpidr_el0 instead of tpidrro_el0) so fall back to TLS slot assembly ()*/ \ + && (!defined(__clang_major__) || __clang_major__ >= 14) /* older clang versions emit bad code; fall back to using the TLS slot () */ +#define MI_USE_BUILTIN_THREAD_POINTER 1 #endif @@ -231,12 +233,10 @@ static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { return (uintptr_t)NtCurrentTeb(); } -#elif MI_HAS_BUILTIN_THREAD_POINTER && \ - (!defined(__APPLE__)) && /* on apple (M1) the wrong register is read (tpidr_el0 instead of tpidrro_el0) so fall back to TLS slot assembly ()*/ \ - (!defined(__clang_major__) || __clang_major__ >= 14) // older clang versions emit bad code; fall back to using the TLS slot () +#elif MI_USE_BUILTIN_THREAD_POINTER static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { - // Works on most Unix based platforms + // Works on most Unix based platforms with recent compilers return (uintptr_t)__builtin_thread_pointer(); } diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index 97438569..cc807ee9 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -203,8 +203,8 @@ typedef int32_t mi_ssize_t; #error "mimalloc internal: define more bins" #endif -// blocks up to this size are always allocated aligned -#define MI_MAX_ALIGN_GUARANTEE (8*MI_MAX_ALIGN_SIZE) +// Maximum block size for which blocks are guaranteed to be block size aligned. (see `segment.c:_mi_segment_page_start`) +#define MI_MAX_ALIGN_GUARANTEE (MI_MEDIUM_OBJ_SIZE_MAX) // Alignments over MI_BLOCK_ALIGNMENT_MAX are allocated in dedicated huge page segments #define MI_BLOCK_ALIGNMENT_MAX (MI_SEGMENT_SIZE >> 1) @@ -214,7 +214,7 @@ typedef int32_t mi_ssize_t; // we never allocate more than PTRDIFF_MAX (see also ) // on 64-bit+ systems we also limit the maximum allocation size such that the slice count fits in 32-bits. (issue #877) -#if PTRDIFF_MAX >= (MI_SEGMENT_SLIZE_SIZE * UINT32_MAX) +#if (PTRDIFF_MAX > INT32_MAX) && (PTRDIFF_MAX >= (MI_SEGMENT_SLIZE_SIZE * UINT32_MAX)) #define MI_MAX_ALLOC_SIZE (MI_SEGMENT_SLICE_SIZE * (UINT32_MAX-1)) #else #define MI_MAX_ALLOC_SIZE PTRDIFF_MAX diff --git a/readme.md b/readme.md index e87141bc..e626bee8 100644 --- a/readme.md +++ b/readme.md @@ -12,8 +12,8 @@ is a general purpose allocator with excellent [performance](#performance) charac Initially developed by Daan Leijen for the runtime systems of the [Koka](https://koka-lang.github.io) and [Lean](https://github.com/leanprover/lean) languages. -Latest release tag: `v2.1.4` (2024-04-22). -Latest stable tag: `v1.8.4` (2024-04-22). +Latest release tag: `v2.1.5` (2024-05-13). +Latest v1 tag: `v1.8.5` (2024-05-13). mimalloc is a drop-in replacement for `malloc` and can be used in other programs without code changes, for example, on dynamically linked ELF-based systems (Linux, BSD, etc.) you can use it as: @@ -72,14 +72,17 @@ Enjoy! * `master`: latest stable release (based on `dev-slice`). * `dev`: development branch for mimalloc v1. Use this branch for submitting PR's. -* `dev-slice`: development branch for mimalloc v2. This branch is downstream of `dev`. +* `dev-slice`: development branch for mimalloc v2. This branch is downstream of `dev` (and is essentially equal to `dev` except for +`src/segment.c`) ### Releases -Note: the `v2.x` version has a new algorithm for managing internal mimalloc pages that tends to reduce memory usage +Note: the `v2.x` version has a different algorithm for managing internal mimalloc pages (as slices) that tends to use reduce +memory usage and fragmentation compared to mimalloc `v1.x` (especially for large workloads). Should otherwise have similar performance (see [below](#performance)); please report if you observe any significant performance regression. +* 2024-05-13, `v1.8.5`, `v2.1.5`: Fix build errors on various (older) platforms. Refactored aligned allocation. * 2024-04-22, `v1.8.4`, `v2.1.4`: Fixes various bugs and build issues. Add `MI_LIBC_MUSL` cmake flag for musl builds. Free-ing code is refactored into a separate module (`free.c`). Mimalloc page info is simplified with the block size directly available (and new `block_size_shift` to improve aligned block free-ing). diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index b7e589ea..ba629ef3 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -15,22 +15,21 @@ terms of the MIT license. A copy of the license can be found in the file // Aligned Allocation // ------------------------------------------------------ -// Fallback primitive aligned allocation -- split out for better codegen -static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t* const heap, const size_t size, const size_t alignment, const size_t offset, const bool zero) mi_attr_noexcept +static bool mi_malloc_is_naturally_aligned( size_t size, size_t alignment ) { + // objects up to `MI_MAX_ALIGN_GUARANTEE` are allocated aligned to their size (see `segment.c:_mi_segment_page_start`). + mi_assert_internal(_mi_is_power_of_two(alignment) && (alignment > 0)); + if (alignment > size) return false; + if (alignment <= MI_MAX_ALIGN_SIZE) return true; + const size_t bsize = mi_good_size(size); + return (bsize <= MI_MAX_ALIGN_GUARANTEE && (bsize & (alignment-1)) == 0); +} + +// Fallback aligned allocation that over-allocates -- split out for better codegen +static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_overalloc(mi_heap_t* const heap, const size_t size, const size_t alignment, const size_t offset, const bool zero) mi_attr_noexcept { - mi_assert_internal(size <= PTRDIFF_MAX); + mi_assert_internal(size <= (MI_MAX_ALLOC_SIZE - MI_PADDING_SIZE)); mi_assert_internal(alignment != 0 && _mi_is_power_of_two(alignment)); - const uintptr_t align_mask = alignment - 1; // for any x, `(x & align_mask) == (x % alignment)` - const size_t padsize = size + MI_PADDING_SIZE; - - // use regular allocation if it is guaranteed to fit the alignment constraints - if (offset==0 && alignment<=padsize && padsize<=MI_MAX_ALIGN_GUARANTEE && (padsize&align_mask)==0) { - void* p = _mi_heap_malloc_zero(heap, size, zero); - mi_assert_internal(p == NULL || ((uintptr_t)p % alignment) == 0); - return p; - } - void* p; size_t oversize; if mi_unlikely(alignment > MI_BLOCK_ALIGNMENT_MAX) { @@ -57,6 +56,7 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t* } // .. and align within the allocation + const uintptr_t align_mask = alignment - 1; // for any x, `(x & align_mask) == (x % alignment)` const uintptr_t poffset = ((uintptr_t)p + offset) & align_mask; const uintptr_t adjust = (poffset == 0 ? 0 : alignment - poffset); mi_assert_internal(adjust < alignment); @@ -76,7 +76,7 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t* // now zero the block if needed if (alignment > MI_BLOCK_ALIGNMENT_MAX) { - // for the tracker, on huge aligned allocations only from the start of the large block is defined + // for the tracker, on huge aligned allocations only the memory from the start of the large block is defined mi_track_mem_undefined(aligned_p, size); if (zero) { _mi_memzero_aligned(aligned_p, mi_usable_size(aligned_p)); @@ -89,6 +89,39 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t* return aligned_p; } +// Generic primitive aligned allocation -- split out for better codegen +static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_generic(mi_heap_t* const heap, const size_t size, const size_t alignment, const size_t offset, const bool zero) mi_attr_noexcept +{ + mi_assert_internal(alignment != 0 && _mi_is_power_of_two(alignment)); + // we don't allocate more than MI_MAX_ALLOC_SIZE (see ) + if mi_unlikely(size > (MI_MAX_ALLOC_SIZE - MI_PADDING_SIZE)) { + #if MI_DEBUG > 0 + _mi_error_message(EOVERFLOW, "aligned allocation request is too large (size %zu, alignment %zu)\n", size, alignment); + #endif + return NULL; + } + + // use regular allocation if it is guaranteed to fit the alignment constraints. + // this is important to try as the fast path in `mi_heap_malloc_zero_aligned` only works when there exist + // a page with the right block size, and if we always use the over-alloc fallback that would never happen. + if (offset == 0 && mi_malloc_is_naturally_aligned(size,alignment)) { + void* p = _mi_heap_malloc_zero(heap, size, zero); + mi_assert_internal(p == NULL || ((uintptr_t)p % alignment) == 0); + const bool is_aligned_or_null = (((uintptr_t)p) & (alignment-1))==0; + if mi_likely(is_aligned_or_null) { + return p; + } + else { + // this should never happen if the `mi_malloc_is_naturally_aligned` check is correct.. + mi_assert(false); + mi_free(p); + } + } + + // fall back to over-allocation + return mi_heap_malloc_zero_aligned_at_overalloc(heap,size,alignment,offset,zero); +} + // Primitive aligned allocation static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t size, const size_t alignment, const size_t offset, const bool zero) mi_attr_noexcept { @@ -99,34 +132,30 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t #endif return NULL; } - - if mi_unlikely(size > PTRDIFF_MAX) { // we don't allocate more than PTRDIFF_MAX (see ) - #if MI_DEBUG > 0 - _mi_error_message(EOVERFLOW, "aligned allocation request is too large (size %zu, alignment %zu)\n", size, alignment); - #endif - return NULL; - } - const uintptr_t align_mask = alignment-1; // for any x, `(x & align_mask) == (x % alignment)` - const size_t padsize = size + MI_PADDING_SIZE; // note: cannot overflow due to earlier size > PTRDIFF_MAX check - + // try first if there happens to be a small block available with just the right alignment - if mi_likely(padsize <= MI_SMALL_SIZE_MAX && alignment <= padsize) { + if mi_likely(size <= MI_SMALL_SIZE_MAX && alignment <= size) { + const uintptr_t align_mask = alignment-1; // for any x, `(x & align_mask) == (x % alignment)` + const size_t padsize = size + MI_PADDING_SIZE; mi_page_t* page = _mi_heap_get_free_small_page(heap, padsize); - const bool is_aligned = (((uintptr_t)page->free+offset) & align_mask)==0; - if mi_likely(page->free != NULL && is_aligned) - { - #if MI_STAT>1 - mi_heap_stat_increase(heap, malloc, size); - #endif - void* p = _mi_page_malloc(heap, page, padsize, zero); // TODO: inline _mi_page_malloc - mi_assert_internal(p != NULL); - mi_assert_internal(((uintptr_t)p + offset) % alignment == 0); - mi_track_malloc(p,size,zero); - return p; + if mi_likely(page->free != NULL) { + const bool is_aligned = (((uintptr_t)page->free + offset) & align_mask)==0; + if mi_likely(is_aligned) + { + #if MI_STAT>1 + mi_heap_stat_increase(heap, malloc, size); + #endif + void* p = (zero ? _mi_page_malloc_zeroed(heap,page,padsize) : _mi_page_malloc(heap,page,padsize)); // call specific page malloc for better codegen + mi_assert_internal(p != NULL); + mi_assert_internal(((uintptr_t)p + offset) % alignment == 0); + mi_track_malloc(p,size,zero); + return p; + } } } - // fallback - return mi_heap_malloc_zero_aligned_at_fallback(heap, size, alignment, offset, zero); + + // fallback to generic aligned allocation + return mi_heap_malloc_zero_aligned_at_generic(heap, size, alignment, offset, zero); } @@ -139,22 +168,7 @@ mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc_aligned_at(mi_heap_t* he } mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept { - if mi_unlikely(alignment == 0 || !_mi_is_power_of_two(alignment)) return NULL; - #if !MI_PADDING - // without padding, any small sized allocation is naturally aligned (see also `_mi_segment_page_start`) - if mi_likely(_mi_is_power_of_two(size) && size >= alignment && size <= MI_SMALL_SIZE_MAX) - #else - // with padding, we can only guarantee this for fixed alignments - if mi_likely((alignment == sizeof(void*) || (alignment == MI_MAX_ALIGN_SIZE && size > (MI_MAX_ALIGN_SIZE/2))) - && size <= MI_SMALL_SIZE_MAX) - #endif - { - // fast path for common alignment and size - return mi_heap_malloc_small(heap, size); - } - else { - return mi_heap_malloc_aligned_at(heap, size, alignment, 0); - } + return mi_heap_malloc_aligned_at(heap, size, alignment, 0); } // ensure a definition is emitted diff --git a/src/alloc-override.c b/src/alloc-override.c index 75afc202..12837cdd 100644 --- a/src/alloc-override.c +++ b/src/alloc-override.c @@ -136,8 +136,11 @@ typedef void* mi_nothrow_t; mi_decl_export void* realloc(void* p, size_t newsize) MI_FORWARD2(mi_realloc, p, newsize) mi_decl_export void free(void* p) MI_FORWARD0(mi_free, p) // In principle we do not need to forward `strdup`/`strndup` but on some systems these do not use `malloc` internally (but a more primitive call) + // We only override if `strdup` is not a macro (as on some older libc's, see issue #885) + #if !defined(strdup) mi_decl_export char* strdup(const char* str) MI_FORWARD1(mi_strdup, str) - #if !defined(__APPLE__) || (defined(MAC_OS_X_VERSION_10_7) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_7) + #endif + #if !defined(strndup) && (!defined(__APPLE__) || (defined(MAC_OS_X_VERSION_10_7) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_7)) mi_decl_export char* strndup(const char* str, size_t n) MI_FORWARD2(mi_strndup, str, n) #endif #endif diff --git a/src/alloc.c b/src/alloc.c index 2a2aeb8d..86aaae75 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -28,7 +28,7 @@ terms of the MIT license. A copy of the license can be found in the file // Fast allocation in a page: just pop from the free list. // Fall back to generic allocation only if the list is empty. // Note: in release mode the (inlined) routine is about 7 instructions with a single test. -extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept +extern inline void* _mi_page_malloc_zero(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept { mi_assert_internal(page->block_size == 0 /* empty heap */ || mi_page_block_size(page) >= size); mi_block_t* const block = page->free; @@ -85,14 +85,14 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz #endif #if MI_PADDING // && !MI_TRACK_ENABLED - mi_padding_t* const padding = (mi_padding_t*)((uint8_t*)block + mi_page_usable_block_size(page)); - ptrdiff_t delta = ((uint8_t*)padding - (uint8_t*)block - (size - MI_PADDING_SIZE)); + mi_padding_t* const padding = (mi_padding_t*)((uint8_t*)block + mi_page_usable_block_size(page)); + ptrdiff_t delta = ((uint8_t*)padding - (uint8_t*)block - (size - MI_PADDING_SIZE)); #if (MI_DEBUG>=2) mi_assert_internal(delta >= 0 && mi_page_usable_block_size(page) >= (size - MI_PADDING_SIZE + delta)); #endif - mi_track_mem_defined(padding,sizeof(mi_padding_t)); // note: re-enable since mi_page_usable_block_size may set noaccess - padding->canary = (uint32_t)(mi_ptr_encode(page,block,page->keys)); - padding->delta = (uint32_t)(delta); + mi_track_mem_defined(padding,sizeof(mi_padding_t)); // note: re-enable since mi_page_usable_block_size may set noaccess + padding->canary = (uint32_t)(mi_ptr_encode(page,block,page->keys)); + padding->delta = (uint32_t)(delta); #if MI_PADDING_CHECK if (!mi_page_is_huge(page)) { uint8_t* fill = (uint8_t*)padding - delta; @@ -105,6 +105,14 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz return block; } +// extra entries for improved efficiency in `alloc-aligned.c`. +extern void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept { + return _mi_page_malloc_zero(heap,page,size,false); +} +extern void* _mi_page_malloc_zeroed(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept { + return _mi_page_malloc_zero(heap,page,size,true); +} + static inline mi_decl_restrict void* mi_heap_malloc_small_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept { mi_assert(heap != NULL); #if MI_DEBUG @@ -117,7 +125,7 @@ static inline mi_decl_restrict void* mi_heap_malloc_small_zero(mi_heap_t* heap, #endif mi_page_t* page = _mi_heap_get_free_small_page(heap, size + MI_PADDING_SIZE); - void* const p = _mi_page_malloc(heap, page, size + MI_PADDING_SIZE, zero); + void* const p = _mi_page_malloc_zero(heap, page, size + MI_PADDING_SIZE, zero); mi_track_malloc(p,size,zero); #if MI_STAT>1 @@ -321,11 +329,11 @@ mi_decl_nodiscard void* mi_recalloc(void* p, size_t count, size_t size) mi_attr_ // `strdup` using mi_malloc mi_decl_nodiscard mi_decl_restrict char* mi_heap_strdup(mi_heap_t* heap, const char* s) mi_attr_noexcept { if (s == NULL) return NULL; - size_t n = strlen(s); - char* t = (char*)mi_heap_malloc(heap,n+1); + size_t len = _mi_strlen(s); + char* t = (char*)mi_heap_malloc(heap,len+1); if (t == NULL) return NULL; - _mi_memcpy(t, s, n); - t[n] = 0; + _mi_memcpy(t, s, len); + t[len] = 0; return t; } @@ -336,13 +344,11 @@ mi_decl_nodiscard mi_decl_restrict char* mi_strdup(const char* s) mi_attr_noexce // `strndup` using mi_malloc mi_decl_nodiscard mi_decl_restrict char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n) mi_attr_noexcept { if (s == NULL) return NULL; - const char* end = (const char*)memchr(s, 0, n); // find end of string in the first `n` characters (returns NULL if not found) - const size_t m = (end != NULL ? (size_t)(end - s) : n); // `m` is the minimum of `n` or the end-of-string - mi_assert_internal(m <= n); - char* t = (char*)mi_heap_malloc(heap, m+1); + const size_t len = _mi_strnlen(s,n); // len <= n + char* t = (char*)mi_heap_malloc(heap, len+1); if (t == NULL) return NULL; - _mi_memcpy(t, s, m); - t[m] = 0; + _mi_memcpy(t, s, len); + t[len] = 0; return t; } diff --git a/src/page-queue.c b/src/page-queue.c index 470d1b64..ceea91ee 100644 --- a/src/page-queue.c +++ b/src/page-queue.c @@ -113,10 +113,10 @@ size_t _mi_bin_size(uint8_t bin) { // Good size for allocation size_t mi_good_size(size_t size) mi_attr_noexcept { if (size <= MI_MEDIUM_OBJ_SIZE_MAX) { - return _mi_bin_size(mi_bin(size)); + return _mi_bin_size(mi_bin(size + MI_PADDING_SIZE)); } else { - return _mi_align_up(size,_mi_os_page_size()); + return _mi_align_up(size + MI_PADDING_SIZE,_mi_os_page_size()); } } diff --git a/src/page.c b/src/page.c index 2ea25469..871ed215 100644 --- a/src/page.c +++ b/src/page.c @@ -932,12 +932,12 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_al // and try again, this time succeeding! (i.e. this should never recurse through _mi_page_malloc) if mi_unlikely(zero && page->block_size == 0) { // note: we cannot call _mi_page_malloc with zeroing for huge blocks; we zero it afterwards in that case. - void* p = _mi_page_malloc(heap, page, size, false); + void* p = _mi_page_malloc(heap, page, size); mi_assert_internal(p != NULL); _mi_memzero_aligned(p, mi_page_usable_block_size(page)); return p; } else { - return _mi_page_malloc(heap, page, size, zero); + return _mi_page_malloc_zero(heap, page, size, zero); } } diff --git a/src/prim/emscripten/prim.c b/src/prim/emscripten/prim.c index c0fa0f4a..1f60a1bb 100644 --- a/src/prim/emscripten/prim.c +++ b/src/prim/emscripten/prim.c @@ -68,7 +68,7 @@ int _mi_prim_free(void* addr, size_t size) { // Allocation //--------------------------------------------- -extern void* emmalloc_memalign(size_t, size_t); +extern void* emmalloc_memalign(size_t alignment, size_t size); // Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned. int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) { @@ -78,17 +78,10 @@ int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_la // That assumes no one else uses sbrk but us (they could go up, // scribble, and then down), but we could assert on that perhaps. *is_zero = false; - // emmalloc has some limitations on alignment size. - // TODO: Why does mimalloc ask for an align of 4MB? that ends up allocating - // 8, which wastes quite a lot for us in wasm. If that is unavoidable, - // we may want to improve emmalloc to support such alignment. See also - // https://github.com/emscripten-core/emscripten/issues/20645 + // emmalloc has a minimum alignment size. #define MIN_EMMALLOC_ALIGN 8 - #define MAX_EMMALLOC_ALIGN (1024*1024) if (try_alignment < MIN_EMMALLOC_ALIGN) { try_alignment = MIN_EMMALLOC_ALIGN; - } else if (try_alignment > MAX_EMMALLOC_ALIGN) { - try_alignment = MAX_EMMALLOC_ALIGN; } void* p = emmalloc_memalign(try_alignment, size); *addr = p; diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c index e6b61079..2dd7c602 100644 --- a/src/prim/windows/prim.c +++ b/src/prim/windows/prim.c @@ -482,7 +482,7 @@ void _mi_prim_out_stderr( const char* msg ) // on windows with redirection, the C runtime cannot handle locale dependent output // after the main thread closes so we use direct console output. if (!_mi_preloading()) { - // _cputs(msg); // _cputs cannot be used at is aborts if it fails to lock the console + // _cputs(msg); // _cputs cannot be used as it aborts when failing to lock the console static HANDLE hcon = INVALID_HANDLE_VALUE; static bool hconIsConsole; if (hcon == INVALID_HANDLE_VALUE) { diff --git a/src/segment-map.c b/src/segment-map.c index a306ec67..1efb1e23 100644 --- a/src/segment-map.c +++ b/src/segment-map.c @@ -16,7 +16,9 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc/internal.h" #include "mimalloc/atomic.h" -#if (MI_INTPTR_SIZE==8) +#if (MI_INTPTR_SIZE>=8) && MI_TRACK_ASAN +#define MI_MAX_ADDRESS ((size_t)140 << 40) // 140TB (see issue #881) +#elif (MI_INTPTR_SIZE >= 8) #define MI_MAX_ADDRESS ((size_t)40 << 40) // 40TB (to include huge page areas) #else #define MI_MAX_ADDRESS ((size_t)2 << 30) // 2Gb diff --git a/src/segment.c b/src/segment.c index f8054697..9ac22f15 100644 --- a/src/segment.c +++ b/src/segment.c @@ -312,20 +312,28 @@ static size_t mi_segment_info_size(mi_segment_t* segment) { return segment->segment_info_slices * MI_SEGMENT_SLICE_SIZE; } -static uint8_t* _mi_segment_page_start_from_slice(const mi_segment_t* segment, const mi_slice_t* slice, size_t xblock_size, size_t* page_size) +static uint8_t* _mi_segment_page_start_from_slice(const mi_segment_t* segment, const mi_slice_t* slice, size_t block_size, size_t* page_size) { - ptrdiff_t idx = slice - segment->slices; - size_t psize = (size_t)slice->slice_count * MI_SEGMENT_SLICE_SIZE; + const ptrdiff_t idx = slice - segment->slices; + const size_t psize = (size_t)slice->slice_count * MI_SEGMENT_SLICE_SIZE; + uint8_t* const pstart = (uint8_t*)segment + (idx*MI_SEGMENT_SLICE_SIZE); // make the start not OS page aligned for smaller blocks to avoid page/cache effects - // note: the offset must always be an xblock_size multiple since we assume small allocations + // note: the offset must always be a block_size multiple since we assume small allocations // are aligned (see `mi_heap_malloc_aligned`). size_t start_offset = 0; - if (xblock_size >= MI_INTPTR_SIZE) { - if (xblock_size <= 64) { start_offset = 3*xblock_size; } - else if (xblock_size <= 512) { start_offset = xblock_size; } + if (block_size > 0 && block_size <= MI_MAX_ALIGN_GUARANTEE) { + // for small objects, ensure the page start is aligned with the block size (PR#66 by kickunderscore) + const size_t adjust = block_size - ((uintptr_t)pstart % block_size); + if (adjust < block_size && psize >= block_size + adjust) { + start_offset += adjust; + } + } + if (block_size >= MI_INTPTR_SIZE) { + if (block_size <= 64) { start_offset += 3*block_size; } + else if (block_size <= 512) { start_offset += block_size; } } if (page_size != NULL) { *page_size = psize - start_offset; } - return (uint8_t*)segment + ((idx*MI_SEGMENT_SLICE_SIZE) + start_offset); + return (pstart + start_offset); } // Start of the page available memory; can be used on uninitialized pages diff --git a/test/test-api.c b/test/test-api.c index 75955c49..76101980 100644 --- a/test/test-api.c +++ b/test/test-api.c @@ -230,6 +230,28 @@ int main(void) { result = (((uintptr_t)p % 0x100) == 0); // #602 mi_free(p); } + CHECK_BODY("mimalloc-aligned13") { + bool ok = true; + for( size_t size = 1; size <= (MI_SMALL_SIZE_MAX * 2) && ok; size++ ) { + for(size_t align = 1; align <= size && ok; align *= 2 ) { + void* p[10]; + for(int i = 0; i < 10 && ok; i++) { + p[i] = mi_malloc_aligned(size,align);; + ok = (p[i] != NULL && ((uintptr_t)(p[i]) % align) == 0); + } + for(int i = 0; i < 10 && ok; i++) { + mi_free(p[i]); + } + /* + if (ok && align <= size && ((size + MI_PADDING_SIZE) & (align-1)) == 0) { + size_t bsize = mi_good_size(size); + ok = (align <= bsize && (bsize & (align-1)) == 0); + } + */ + } + } + result = ok; + } CHECK_BODY("malloc-aligned-at1") { void* p = mi_malloc_aligned_at(48,32,0); result = (p != NULL && ((uintptr_t)(p) + 0) % 32 == 0); mi_free(p); };