merge from dev-slice

This commit is contained in:
Daan 2024-05-13 09:15:44 -07:00
commit 8b15203950
17 changed files with 191 additions and 129 deletions

View file

@ -58,6 +58,9 @@ set(mi_sources
src/prim/prim.c) src/prim/prim.c)
set(mi_cflags "") set(mi_cflags "")
set(mi_cflags_static "") # extra flags for a static library build
set(mi_cflags_dynamic "") # extra flags for a shared-object library build
set(mi_defines "")
set(mi_libraries "") set(mi_libraries "")
# ----------------------------------------------------------------------------- # -----------------------------------------------------------------------------
@ -288,8 +291,7 @@ if(CMAKE_SYSTEM_NAME MATCHES "Linux|Android")
endif() endif()
if(MI_LIBC_MUSL) if(MI_LIBC_MUSL)
message(STATUS "Assume using musl libc (MI_LIBC_MUSL=ON) (this implies MI_LOCAL_DYNAMIC_TLS=ON)") message(STATUS "Assume using musl libc (MI_LIBC_MUSL=ON)")
set(MI_LOCAL_DYNAMIC_TLS "ON")
list(APPEND mi_defines MI_LIBC_MUSL=1) list(APPEND mi_defines MI_LIBC_MUSL=1)
endif() endif()
@ -318,7 +320,14 @@ if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU|Intel" AND NOT CMAKE_SYSTEM
if(MI_LOCAL_DYNAMIC_TLS) if(MI_LOCAL_DYNAMIC_TLS)
list(APPEND mi_cflags -ftls-model=local-dynamic) list(APPEND mi_cflags -ftls-model=local-dynamic)
else() else()
list(APPEND mi_cflags -ftls-model=initial-exec) if(MI_LIBC_MUSL)
# with musl we use local-dynamic for the static build, see issue #644
list(APPEND mi_cflags_static -ftls-model=local-dynamic)
list(APPEND mi_cflags_dynamic -ftls-model=initial-exec)
message(STATUS "Use local dynamic TLS for the static build (since MI_LIBC_MUSL=ON)")
else()
list(APPEND mi_cflags -ftls-model=initial-exec)
endif()
endif() endif()
if(MI_OVERRIDE) if(MI_OVERRIDE)
list(APPEND mi_cflags -fno-builtin-malloc) list(APPEND mi_cflags -fno-builtin-malloc)
@ -426,7 +435,7 @@ if(MI_BUILD_SHARED)
add_library(mimalloc SHARED ${mi_sources}) add_library(mimalloc SHARED ${mi_sources})
set_target_properties(mimalloc PROPERTIES VERSION ${mi_version} SOVERSION ${mi_version_major} OUTPUT_NAME ${mi_basename} ) set_target_properties(mimalloc PROPERTIES VERSION ${mi_version} SOVERSION ${mi_version_major} OUTPUT_NAME ${mi_basename} )
target_compile_definitions(mimalloc PRIVATE ${mi_defines} MI_SHARED_LIB MI_SHARED_LIB_EXPORT) target_compile_definitions(mimalloc PRIVATE ${mi_defines} MI_SHARED_LIB MI_SHARED_LIB_EXPORT)
target_compile_options(mimalloc PRIVATE ${mi_cflags}) target_compile_options(mimalloc PRIVATE ${mi_cflags} ${mi_cflags_dynamic})
target_link_libraries(mimalloc PRIVATE ${mi_libraries}) target_link_libraries(mimalloc PRIVATE ${mi_libraries})
target_include_directories(mimalloc PUBLIC target_include_directories(mimalloc PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include> $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
@ -456,7 +465,7 @@ if (MI_BUILD_STATIC)
add_library(mimalloc-static STATIC ${mi_sources}) add_library(mimalloc-static STATIC ${mi_sources})
set_property(TARGET mimalloc-static PROPERTY POSITION_INDEPENDENT_CODE ON) set_property(TARGET mimalloc-static PROPERTY POSITION_INDEPENDENT_CODE ON)
target_compile_definitions(mimalloc-static PRIVATE ${mi_defines} MI_STATIC_LIB) target_compile_definitions(mimalloc-static PRIVATE ${mi_defines} MI_STATIC_LIB)
target_compile_options(mimalloc-static PRIVATE ${mi_cflags}) target_compile_options(mimalloc-static PRIVATE ${mi_cflags} ${mi_cflags_static})
target_link_libraries(mimalloc-static PRIVATE ${mi_libraries}) target_link_libraries(mimalloc-static PRIVATE ${mi_libraries})
target_include_directories(mimalloc-static PUBLIC target_include_directories(mimalloc-static PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include> $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
@ -488,7 +497,7 @@ if (MI_BUILD_OBJECT)
add_library(mimalloc-obj OBJECT src/static.c) add_library(mimalloc-obj OBJECT src/static.c)
set_property(TARGET mimalloc-obj PROPERTY POSITION_INDEPENDENT_CODE ON) set_property(TARGET mimalloc-obj PROPERTY POSITION_INDEPENDENT_CODE ON)
target_compile_definitions(mimalloc-obj PRIVATE ${mi_defines}) target_compile_definitions(mimalloc-obj PRIVATE ${mi_defines})
target_compile_options(mimalloc-obj PRIVATE ${mi_cflags}) target_compile_options(mimalloc-obj PRIVATE ${mi_cflags} ${mi_cflags_static})
target_include_directories(mimalloc-obj PUBLIC target_include_directories(mimalloc-obj PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include> $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
$<INSTALL_INTERFACE:${mi_install_incdir}> $<INSTALL_INTERFACE:${mi_install_incdir}>

View file

@ -1,6 +1,6 @@
set(mi_version_major 2) set(mi_version_major 2)
set(mi_version_minor 1) set(mi_version_minor 1)
set(mi_version_patch 4) set(mi_version_patch 5)
set(mi_version ${mi_version_major}.${mi_version_minor}) set(mi_version ${mi_version_major}.${mi_version_minor})
set(PACKAGE_VERSION ${mi_version}) set(PACKAGE_VERSION ${mi_version})

View file

@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file
#ifndef MIMALLOC_H #ifndef MIMALLOC_H
#define MIMALLOC_H #define MIMALLOC_H
#define MI_MALLOC_VERSION 214 // major + 2 digits minor #define MI_MALLOC_VERSION 215 // major + 2 digits minor
// ------------------------------------------------------ // ------------------------------------------------------
// Compiler specific attributes // Compiler specific attributes

View file

@ -199,7 +199,9 @@ mi_msecs_t _mi_clock_end(mi_msecs_t start);
mi_msecs_t _mi_clock_start(void); mi_msecs_t _mi_clock_start(void);
// "alloc.c" // "alloc.c"
void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept; // called from `_mi_malloc_generic` void* _mi_page_malloc_zero(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept; // called from `_mi_malloc_generic`
void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept; // called from `_mi_heap_malloc_aligned`
void* _mi_page_malloc_zeroed(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept; // called from `_mi_heap_malloc_aligned`
void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept; void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept;
void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept; // called from `_mi_heap_malloc_aligned` void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept; // called from `_mi_heap_malloc_aligned`
void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) mi_attr_noexcept; void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) mi_attr_noexcept;

View file

@ -14,17 +14,17 @@ terms of the MIT license. A copy of the license can be found in the file
// Each OS/host needs to implement these primitives, see `src/prim` // Each OS/host needs to implement these primitives, see `src/prim`
// for implementations on Window, macOS, WASI, and Linux/Unix. // for implementations on Window, macOS, WASI, and Linux/Unix.
// //
// note: on all primitive functions, we always have result parameters != NUL, and: // note: on all primitive functions, we always have result parameters != NULL, and:
// addr != NULL and page aligned // addr != NULL and page aligned
// size > 0 and page aligned // size > 0 and page aligned
// return value is an error code an int where 0 is success. // the return value is an error code as an `int` where 0 is success
// -------------------------------------------------------------------------- // --------------------------------------------------------------------------
// OS memory configuration // OS memory configuration
typedef struct mi_os_mem_config_s { typedef struct mi_os_mem_config_s {
size_t page_size; // 4KiB size_t page_size; // default to 4KiB
size_t large_page_size; // 2MiB size_t large_page_size; // 0 if not supported, usually 2MiB (4MiB on Windows)
size_t alloc_granularity; // smallest allocation size (on Windows 64KiB) size_t alloc_granularity; // smallest allocation size (usually 4KiB, on Windows 64KiB)
bool has_overcommit; // can we reserve more memory than can be actually committed? bool has_overcommit; // can we reserve more memory than can be actually committed?
bool must_free_whole; // must allocated blocks be freed as a whole (false for mmap, true for VirtualAlloc) bool must_free_whole; // must allocated blocks be freed as a whole (false for mmap, true for VirtualAlloc)
bool has_virtual_reserve; // supports virtual address space reservation? (if true we can reserve virtual address space without using commit or physical memory) bool has_virtual_reserve; // supports virtual address space reservation? (if true we can reserve virtual address space without using commit or physical memory)
@ -130,8 +130,9 @@ void _mi_prim_thread_associate_default_heap(mi_heap_t* heap);
// If you test on another platform and it works please send a PR :-) // If you test on another platform and it works please send a PR :-)
// see also https://akkadia.org/drepper/tls.pdf for more info on the TLS register. // see also https://akkadia.org/drepper/tls.pdf for more info on the TLS register.
// //
// Note: on most platforms this is not actually used anymore as we prefer `__builtin_thread_pointer()` nowadays. // Note: we would like to prefer `__builtin_thread_pointer()` nowadays instead of using assembly,
// However, we do still use it with older clang compilers and Apple OS (as we use TLS slot for the default heap there). // but unfortunately we can not detect support reliably (see issue #883)
// We also use it on Apple OS as we use a TLS slot for the default heap there.
#if defined(__GNUC__) && ( \ #if defined(__GNUC__) && ( \
(defined(__GLIBC__) && (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))) \ (defined(__GLIBC__) && (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))) \
|| (defined(__APPLE__) && (defined(__x86_64__) || defined(__aarch64__) || defined(__POWERPC__))) \ || (defined(__APPLE__) && (defined(__x86_64__) || defined(__aarch64__) || defined(__POWERPC__))) \
@ -203,13 +204,14 @@ static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexce
#endif #endif
// Do we have __builtin_thread_pointer? (do not make this a compound test as it fails on older gcc's, see issue #851) // Do we have __builtin_thread_pointer? This would be the preferred way to get a unique thread id
#if defined(__has_builtin) // but unfortunately, it seems we cannot test for this reliably at this time (see issue #883)
#if __has_builtin(__builtin_thread_pointer) // Nevertheless, it seems needed on older graviton platforms (see issue #851).
#define MI_HAS_BUILTIN_THREAD_POINTER 1 // For now, we only enable this for specific platforms.
#endif #if defined(__GNUC__) && (__GNUC__ >= 7) && defined(__aarch64__) /* special case aarch64 for older gcc versions (issue #851) */ \
#elif defined(__GNUC__) && (__GNUC__ >= 7) && defined(__aarch64__) // special case aarch64 for older gcc versions (issue #851) && !defined(__APPLE__) /* on apple (M1) the wrong register is read (tpidr_el0 instead of tpidrro_el0) so fall back to TLS slot assembly (<https://github.com/microsoft/mimalloc/issues/343#issuecomment-763272369>)*/ \
#define MI_HAS_BUILTIN_THREAD_POINTER 1 && (!defined(__clang_major__) || __clang_major__ >= 14) /* older clang versions emit bad code; fall back to using the TLS slot (<https://lore.kernel.org/linux-arm-kernel/202110280952.352F66D8@keescook/T/>) */
#define MI_USE_BUILTIN_THREAD_POINTER 1
#endif #endif
@ -231,12 +233,10 @@ static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
return (uintptr_t)NtCurrentTeb(); return (uintptr_t)NtCurrentTeb();
} }
#elif MI_HAS_BUILTIN_THREAD_POINTER && \ #elif MI_USE_BUILTIN_THREAD_POINTER
(!defined(__APPLE__)) && /* on apple (M1) the wrong register is read (tpidr_el0 instead of tpidrro_el0) so fall back to TLS slot assembly (<https://github.com/microsoft/mimalloc/issues/343#issuecomment-763272369>)*/ \
(!defined(__clang_major__) || __clang_major__ >= 14) // older clang versions emit bad code; fall back to using the TLS slot (<https://lore.kernel.org/linux-arm-kernel/202110280952.352F66D8@keescook/T/>)
static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
// Works on most Unix based platforms // Works on most Unix based platforms with recent compilers
return (uintptr_t)__builtin_thread_pointer(); return (uintptr_t)__builtin_thread_pointer();
} }

View file

@ -203,8 +203,8 @@ typedef int32_t mi_ssize_t;
#error "mimalloc internal: define more bins" #error "mimalloc internal: define more bins"
#endif #endif
// blocks up to this size are always allocated aligned // Maximum block size for which blocks are guaranteed to be block size aligned. (see `segment.c:_mi_segment_page_start`)
#define MI_MAX_ALIGN_GUARANTEE (8*MI_MAX_ALIGN_SIZE) #define MI_MAX_ALIGN_GUARANTEE (MI_MEDIUM_OBJ_SIZE_MAX)
// Alignments over MI_BLOCK_ALIGNMENT_MAX are allocated in dedicated huge page segments // Alignments over MI_BLOCK_ALIGNMENT_MAX are allocated in dedicated huge page segments
#define MI_BLOCK_ALIGNMENT_MAX (MI_SEGMENT_SIZE >> 1) #define MI_BLOCK_ALIGNMENT_MAX (MI_SEGMENT_SIZE >> 1)
@ -214,7 +214,7 @@ typedef int32_t mi_ssize_t;
// we never allocate more than PTRDIFF_MAX (see also <https://sourceware.org/ml/libc-announce/2019/msg00001.html>) // we never allocate more than PTRDIFF_MAX (see also <https://sourceware.org/ml/libc-announce/2019/msg00001.html>)
// on 64-bit+ systems we also limit the maximum allocation size such that the slice count fits in 32-bits. (issue #877) // on 64-bit+ systems we also limit the maximum allocation size such that the slice count fits in 32-bits. (issue #877)
#if PTRDIFF_MAX >= (MI_SEGMENT_SLIZE_SIZE * UINT32_MAX) #if (PTRDIFF_MAX > INT32_MAX) && (PTRDIFF_MAX >= (MI_SEGMENT_SLIZE_SIZE * UINT32_MAX))
#define MI_MAX_ALLOC_SIZE (MI_SEGMENT_SLICE_SIZE * (UINT32_MAX-1)) #define MI_MAX_ALLOC_SIZE (MI_SEGMENT_SLICE_SIZE * (UINT32_MAX-1))
#else #else
#define MI_MAX_ALLOC_SIZE PTRDIFF_MAX #define MI_MAX_ALLOC_SIZE PTRDIFF_MAX

View file

@ -12,8 +12,8 @@ is a general purpose allocator with excellent [performance](#performance) charac
Initially developed by Daan Leijen for the runtime systems of the Initially developed by Daan Leijen for the runtime systems of the
[Koka](https://koka-lang.github.io) and [Lean](https://github.com/leanprover/lean) languages. [Koka](https://koka-lang.github.io) and [Lean](https://github.com/leanprover/lean) languages.
Latest release tag: `v2.1.4` (2024-04-22). Latest release tag: `v2.1.5` (2024-05-13).
Latest stable tag: `v1.8.4` (2024-04-22). Latest v1 tag: `v1.8.5` (2024-05-13).
mimalloc is a drop-in replacement for `malloc` and can be used in other programs mimalloc is a drop-in replacement for `malloc` and can be used in other programs
without code changes, for example, on dynamically linked ELF-based systems (Linux, BSD, etc.) you can use it as: without code changes, for example, on dynamically linked ELF-based systems (Linux, BSD, etc.) you can use it as:
@ -72,14 +72,17 @@ Enjoy!
* `master`: latest stable release (based on `dev-slice`). * `master`: latest stable release (based on `dev-slice`).
* `dev`: development branch for mimalloc v1. Use this branch for submitting PR's. * `dev`: development branch for mimalloc v1. Use this branch for submitting PR's.
* `dev-slice`: development branch for mimalloc v2. This branch is downstream of `dev`. * `dev-slice`: development branch for mimalloc v2. This branch is downstream of `dev` (and is essentially equal to `dev` except for
`src/segment.c`)
### Releases ### Releases
Note: the `v2.x` version has a new algorithm for managing internal mimalloc pages that tends to reduce memory usage Note: the `v2.x` version has a different algorithm for managing internal mimalloc pages (as slices) that tends to use reduce
memory usage
and fragmentation compared to mimalloc `v1.x` (especially for large workloads). Should otherwise have similar performance and fragmentation compared to mimalloc `v1.x` (especially for large workloads). Should otherwise have similar performance
(see [below](#performance)); please report if you observe any significant performance regression. (see [below](#performance)); please report if you observe any significant performance regression.
* 2024-05-13, `v1.8.5`, `v2.1.5`: Fix build errors on various (older) platforms. Refactored aligned allocation.
* 2024-04-22, `v1.8.4`, `v2.1.4`: Fixes various bugs and build issues. Add `MI_LIBC_MUSL` cmake flag for musl builds. * 2024-04-22, `v1.8.4`, `v2.1.4`: Fixes various bugs and build issues. Add `MI_LIBC_MUSL` cmake flag for musl builds.
Free-ing code is refactored into a separate module (`free.c`). Mimalloc page info is simplified with the block size Free-ing code is refactored into a separate module (`free.c`). Mimalloc page info is simplified with the block size
directly available (and new `block_size_shift` to improve aligned block free-ing). directly available (and new `block_size_shift` to improve aligned block free-ing).

View file

@ -15,22 +15,21 @@ terms of the MIT license. A copy of the license can be found in the file
// Aligned Allocation // Aligned Allocation
// ------------------------------------------------------ // ------------------------------------------------------
// Fallback primitive aligned allocation -- split out for better codegen static bool mi_malloc_is_naturally_aligned( size_t size, size_t alignment ) {
static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t* const heap, const size_t size, const size_t alignment, const size_t offset, const bool zero) mi_attr_noexcept // objects up to `MI_MAX_ALIGN_GUARANTEE` are allocated aligned to their size (see `segment.c:_mi_segment_page_start`).
mi_assert_internal(_mi_is_power_of_two(alignment) && (alignment > 0));
if (alignment > size) return false;
if (alignment <= MI_MAX_ALIGN_SIZE) return true;
const size_t bsize = mi_good_size(size);
return (bsize <= MI_MAX_ALIGN_GUARANTEE && (bsize & (alignment-1)) == 0);
}
// Fallback aligned allocation that over-allocates -- split out for better codegen
static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_overalloc(mi_heap_t* const heap, const size_t size, const size_t alignment, const size_t offset, const bool zero) mi_attr_noexcept
{ {
mi_assert_internal(size <= PTRDIFF_MAX); mi_assert_internal(size <= (MI_MAX_ALLOC_SIZE - MI_PADDING_SIZE));
mi_assert_internal(alignment != 0 && _mi_is_power_of_two(alignment)); mi_assert_internal(alignment != 0 && _mi_is_power_of_two(alignment));
const uintptr_t align_mask = alignment - 1; // for any x, `(x & align_mask) == (x % alignment)`
const size_t padsize = size + MI_PADDING_SIZE;
// use regular allocation if it is guaranteed to fit the alignment constraints
if (offset==0 && alignment<=padsize && padsize<=MI_MAX_ALIGN_GUARANTEE && (padsize&align_mask)==0) {
void* p = _mi_heap_malloc_zero(heap, size, zero);
mi_assert_internal(p == NULL || ((uintptr_t)p % alignment) == 0);
return p;
}
void* p; void* p;
size_t oversize; size_t oversize;
if mi_unlikely(alignment > MI_BLOCK_ALIGNMENT_MAX) { if mi_unlikely(alignment > MI_BLOCK_ALIGNMENT_MAX) {
@ -57,6 +56,7 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t*
} }
// .. and align within the allocation // .. and align within the allocation
const uintptr_t align_mask = alignment - 1; // for any x, `(x & align_mask) == (x % alignment)`
const uintptr_t poffset = ((uintptr_t)p + offset) & align_mask; const uintptr_t poffset = ((uintptr_t)p + offset) & align_mask;
const uintptr_t adjust = (poffset == 0 ? 0 : alignment - poffset); const uintptr_t adjust = (poffset == 0 ? 0 : alignment - poffset);
mi_assert_internal(adjust < alignment); mi_assert_internal(adjust < alignment);
@ -76,7 +76,7 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t*
// now zero the block if needed // now zero the block if needed
if (alignment > MI_BLOCK_ALIGNMENT_MAX) { if (alignment > MI_BLOCK_ALIGNMENT_MAX) {
// for the tracker, on huge aligned allocations only from the start of the large block is defined // for the tracker, on huge aligned allocations only the memory from the start of the large block is defined
mi_track_mem_undefined(aligned_p, size); mi_track_mem_undefined(aligned_p, size);
if (zero) { if (zero) {
_mi_memzero_aligned(aligned_p, mi_usable_size(aligned_p)); _mi_memzero_aligned(aligned_p, mi_usable_size(aligned_p));
@ -89,6 +89,39 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t*
return aligned_p; return aligned_p;
} }
// Generic primitive aligned allocation -- split out for better codegen
static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_generic(mi_heap_t* const heap, const size_t size, const size_t alignment, const size_t offset, const bool zero) mi_attr_noexcept
{
mi_assert_internal(alignment != 0 && _mi_is_power_of_two(alignment));
// we don't allocate more than MI_MAX_ALLOC_SIZE (see <https://sourceware.org/ml/libc-announce/2019/msg00001.html>)
if mi_unlikely(size > (MI_MAX_ALLOC_SIZE - MI_PADDING_SIZE)) {
#if MI_DEBUG > 0
_mi_error_message(EOVERFLOW, "aligned allocation request is too large (size %zu, alignment %zu)\n", size, alignment);
#endif
return NULL;
}
// use regular allocation if it is guaranteed to fit the alignment constraints.
// this is important to try as the fast path in `mi_heap_malloc_zero_aligned` only works when there exist
// a page with the right block size, and if we always use the over-alloc fallback that would never happen.
if (offset == 0 && mi_malloc_is_naturally_aligned(size,alignment)) {
void* p = _mi_heap_malloc_zero(heap, size, zero);
mi_assert_internal(p == NULL || ((uintptr_t)p % alignment) == 0);
const bool is_aligned_or_null = (((uintptr_t)p) & (alignment-1))==0;
if mi_likely(is_aligned_or_null) {
return p;
}
else {
// this should never happen if the `mi_malloc_is_naturally_aligned` check is correct..
mi_assert(false);
mi_free(p);
}
}
// fall back to over-allocation
return mi_heap_malloc_zero_aligned_at_overalloc(heap,size,alignment,offset,zero);
}
// Primitive aligned allocation // Primitive aligned allocation
static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t size, const size_t alignment, const size_t offset, const bool zero) mi_attr_noexcept static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t size, const size_t alignment, const size_t offset, const bool zero) mi_attr_noexcept
{ {
@ -99,34 +132,30 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t
#endif #endif
return NULL; return NULL;
} }
if mi_unlikely(size > PTRDIFF_MAX) { // we don't allocate more than PTRDIFF_MAX (see <https://sourceware.org/ml/libc-announce/2019/msg00001.html>)
#if MI_DEBUG > 0
_mi_error_message(EOVERFLOW, "aligned allocation request is too large (size %zu, alignment %zu)\n", size, alignment);
#endif
return NULL;
}
const uintptr_t align_mask = alignment-1; // for any x, `(x & align_mask) == (x % alignment)`
const size_t padsize = size + MI_PADDING_SIZE; // note: cannot overflow due to earlier size > PTRDIFF_MAX check
// try first if there happens to be a small block available with just the right alignment // try first if there happens to be a small block available with just the right alignment
if mi_likely(padsize <= MI_SMALL_SIZE_MAX && alignment <= padsize) { if mi_likely(size <= MI_SMALL_SIZE_MAX && alignment <= size) {
const uintptr_t align_mask = alignment-1; // for any x, `(x & align_mask) == (x % alignment)`
const size_t padsize = size + MI_PADDING_SIZE;
mi_page_t* page = _mi_heap_get_free_small_page(heap, padsize); mi_page_t* page = _mi_heap_get_free_small_page(heap, padsize);
const bool is_aligned = (((uintptr_t)page->free+offset) & align_mask)==0; if mi_likely(page->free != NULL) {
if mi_likely(page->free != NULL && is_aligned) const bool is_aligned = (((uintptr_t)page->free + offset) & align_mask)==0;
{ if mi_likely(is_aligned)
#if MI_STAT>1 {
mi_heap_stat_increase(heap, malloc, size); #if MI_STAT>1
#endif mi_heap_stat_increase(heap, malloc, size);
void* p = _mi_page_malloc(heap, page, padsize, zero); // TODO: inline _mi_page_malloc #endif
mi_assert_internal(p != NULL); void* p = (zero ? _mi_page_malloc_zeroed(heap,page,padsize) : _mi_page_malloc(heap,page,padsize)); // call specific page malloc for better codegen
mi_assert_internal(((uintptr_t)p + offset) % alignment == 0); mi_assert_internal(p != NULL);
mi_track_malloc(p,size,zero); mi_assert_internal(((uintptr_t)p + offset) % alignment == 0);
return p; mi_track_malloc(p,size,zero);
return p;
}
} }
} }
// fallback
return mi_heap_malloc_zero_aligned_at_fallback(heap, size, alignment, offset, zero); // fallback to generic aligned allocation
return mi_heap_malloc_zero_aligned_at_generic(heap, size, alignment, offset, zero);
} }
@ -139,22 +168,7 @@ mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc_aligned_at(mi_heap_t* he
} }
mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept { mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept {
if mi_unlikely(alignment == 0 || !_mi_is_power_of_two(alignment)) return NULL; return mi_heap_malloc_aligned_at(heap, size, alignment, 0);
#if !MI_PADDING
// without padding, any small sized allocation is naturally aligned (see also `_mi_segment_page_start`)
if mi_likely(_mi_is_power_of_two(size) && size >= alignment && size <= MI_SMALL_SIZE_MAX)
#else
// with padding, we can only guarantee this for fixed alignments
if mi_likely((alignment == sizeof(void*) || (alignment == MI_MAX_ALIGN_SIZE && size > (MI_MAX_ALIGN_SIZE/2)))
&& size <= MI_SMALL_SIZE_MAX)
#endif
{
// fast path for common alignment and size
return mi_heap_malloc_small(heap, size);
}
else {
return mi_heap_malloc_aligned_at(heap, size, alignment, 0);
}
} }
// ensure a definition is emitted // ensure a definition is emitted

View file

@ -136,8 +136,11 @@ typedef void* mi_nothrow_t;
mi_decl_export void* realloc(void* p, size_t newsize) MI_FORWARD2(mi_realloc, p, newsize) mi_decl_export void* realloc(void* p, size_t newsize) MI_FORWARD2(mi_realloc, p, newsize)
mi_decl_export void free(void* p) MI_FORWARD0(mi_free, p) mi_decl_export void free(void* p) MI_FORWARD0(mi_free, p)
// In principle we do not need to forward `strdup`/`strndup` but on some systems these do not use `malloc` internally (but a more primitive call) // In principle we do not need to forward `strdup`/`strndup` but on some systems these do not use `malloc` internally (but a more primitive call)
// We only override if `strdup` is not a macro (as on some older libc's, see issue #885)
#if !defined(strdup)
mi_decl_export char* strdup(const char* str) MI_FORWARD1(mi_strdup, str) mi_decl_export char* strdup(const char* str) MI_FORWARD1(mi_strdup, str)
#if !defined(__APPLE__) || (defined(MAC_OS_X_VERSION_10_7) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_7) #endif
#if !defined(strndup) && (!defined(__APPLE__) || (defined(MAC_OS_X_VERSION_10_7) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_7))
mi_decl_export char* strndup(const char* str, size_t n) MI_FORWARD2(mi_strndup, str, n) mi_decl_export char* strndup(const char* str, size_t n) MI_FORWARD2(mi_strndup, str, n)
#endif #endif
#endif #endif

View file

@ -28,7 +28,7 @@ terms of the MIT license. A copy of the license can be found in the file
// Fast allocation in a page: just pop from the free list. // Fast allocation in a page: just pop from the free list.
// Fall back to generic allocation only if the list is empty. // Fall back to generic allocation only if the list is empty.
// Note: in release mode the (inlined) routine is about 7 instructions with a single test. // Note: in release mode the (inlined) routine is about 7 instructions with a single test.
extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept extern inline void* _mi_page_malloc_zero(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept
{ {
mi_assert_internal(page->block_size == 0 /* empty heap */ || mi_page_block_size(page) >= size); mi_assert_internal(page->block_size == 0 /* empty heap */ || mi_page_block_size(page) >= size);
mi_block_t* const block = page->free; mi_block_t* const block = page->free;
@ -85,14 +85,14 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz
#endif #endif
#if MI_PADDING // && !MI_TRACK_ENABLED #if MI_PADDING // && !MI_TRACK_ENABLED
mi_padding_t* const padding = (mi_padding_t*)((uint8_t*)block + mi_page_usable_block_size(page)); mi_padding_t* const padding = (mi_padding_t*)((uint8_t*)block + mi_page_usable_block_size(page));
ptrdiff_t delta = ((uint8_t*)padding - (uint8_t*)block - (size - MI_PADDING_SIZE)); ptrdiff_t delta = ((uint8_t*)padding - (uint8_t*)block - (size - MI_PADDING_SIZE));
#if (MI_DEBUG>=2) #if (MI_DEBUG>=2)
mi_assert_internal(delta >= 0 && mi_page_usable_block_size(page) >= (size - MI_PADDING_SIZE + delta)); mi_assert_internal(delta >= 0 && mi_page_usable_block_size(page) >= (size - MI_PADDING_SIZE + delta));
#endif #endif
mi_track_mem_defined(padding,sizeof(mi_padding_t)); // note: re-enable since mi_page_usable_block_size may set noaccess mi_track_mem_defined(padding,sizeof(mi_padding_t)); // note: re-enable since mi_page_usable_block_size may set noaccess
padding->canary = (uint32_t)(mi_ptr_encode(page,block,page->keys)); padding->canary = (uint32_t)(mi_ptr_encode(page,block,page->keys));
padding->delta = (uint32_t)(delta); padding->delta = (uint32_t)(delta);
#if MI_PADDING_CHECK #if MI_PADDING_CHECK
if (!mi_page_is_huge(page)) { if (!mi_page_is_huge(page)) {
uint8_t* fill = (uint8_t*)padding - delta; uint8_t* fill = (uint8_t*)padding - delta;
@ -105,6 +105,14 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz
return block; return block;
} }
// extra entries for improved efficiency in `alloc-aligned.c`.
extern void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept {
return _mi_page_malloc_zero(heap,page,size,false);
}
extern void* _mi_page_malloc_zeroed(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept {
return _mi_page_malloc_zero(heap,page,size,true);
}
static inline mi_decl_restrict void* mi_heap_malloc_small_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept { static inline mi_decl_restrict void* mi_heap_malloc_small_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept {
mi_assert(heap != NULL); mi_assert(heap != NULL);
#if MI_DEBUG #if MI_DEBUG
@ -117,7 +125,7 @@ static inline mi_decl_restrict void* mi_heap_malloc_small_zero(mi_heap_t* heap,
#endif #endif
mi_page_t* page = _mi_heap_get_free_small_page(heap, size + MI_PADDING_SIZE); mi_page_t* page = _mi_heap_get_free_small_page(heap, size + MI_PADDING_SIZE);
void* const p = _mi_page_malloc(heap, page, size + MI_PADDING_SIZE, zero); void* const p = _mi_page_malloc_zero(heap, page, size + MI_PADDING_SIZE, zero);
mi_track_malloc(p,size,zero); mi_track_malloc(p,size,zero);
#if MI_STAT>1 #if MI_STAT>1
@ -321,11 +329,11 @@ mi_decl_nodiscard void* mi_recalloc(void* p, size_t count, size_t size) mi_attr_
// `strdup` using mi_malloc // `strdup` using mi_malloc
mi_decl_nodiscard mi_decl_restrict char* mi_heap_strdup(mi_heap_t* heap, const char* s) mi_attr_noexcept { mi_decl_nodiscard mi_decl_restrict char* mi_heap_strdup(mi_heap_t* heap, const char* s) mi_attr_noexcept {
if (s == NULL) return NULL; if (s == NULL) return NULL;
size_t n = strlen(s); size_t len = _mi_strlen(s);
char* t = (char*)mi_heap_malloc(heap,n+1); char* t = (char*)mi_heap_malloc(heap,len+1);
if (t == NULL) return NULL; if (t == NULL) return NULL;
_mi_memcpy(t, s, n); _mi_memcpy(t, s, len);
t[n] = 0; t[len] = 0;
return t; return t;
} }
@ -336,13 +344,11 @@ mi_decl_nodiscard mi_decl_restrict char* mi_strdup(const char* s) mi_attr_noexce
// `strndup` using mi_malloc // `strndup` using mi_malloc
mi_decl_nodiscard mi_decl_restrict char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n) mi_attr_noexcept { mi_decl_nodiscard mi_decl_restrict char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n) mi_attr_noexcept {
if (s == NULL) return NULL; if (s == NULL) return NULL;
const char* end = (const char*)memchr(s, 0, n); // find end of string in the first `n` characters (returns NULL if not found) const size_t len = _mi_strnlen(s,n); // len <= n
const size_t m = (end != NULL ? (size_t)(end - s) : n); // `m` is the minimum of `n` or the end-of-string char* t = (char*)mi_heap_malloc(heap, len+1);
mi_assert_internal(m <= n);
char* t = (char*)mi_heap_malloc(heap, m+1);
if (t == NULL) return NULL; if (t == NULL) return NULL;
_mi_memcpy(t, s, m); _mi_memcpy(t, s, len);
t[m] = 0; t[len] = 0;
return t; return t;
} }

View file

@ -113,10 +113,10 @@ size_t _mi_bin_size(uint8_t bin) {
// Good size for allocation // Good size for allocation
size_t mi_good_size(size_t size) mi_attr_noexcept { size_t mi_good_size(size_t size) mi_attr_noexcept {
if (size <= MI_MEDIUM_OBJ_SIZE_MAX) { if (size <= MI_MEDIUM_OBJ_SIZE_MAX) {
return _mi_bin_size(mi_bin(size)); return _mi_bin_size(mi_bin(size + MI_PADDING_SIZE));
} }
else { else {
return _mi_align_up(size,_mi_os_page_size()); return _mi_align_up(size + MI_PADDING_SIZE,_mi_os_page_size());
} }
} }

View file

@ -932,12 +932,12 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_al
// and try again, this time succeeding! (i.e. this should never recurse through _mi_page_malloc) // and try again, this time succeeding! (i.e. this should never recurse through _mi_page_malloc)
if mi_unlikely(zero && page->block_size == 0) { if mi_unlikely(zero && page->block_size == 0) {
// note: we cannot call _mi_page_malloc with zeroing for huge blocks; we zero it afterwards in that case. // note: we cannot call _mi_page_malloc with zeroing for huge blocks; we zero it afterwards in that case.
void* p = _mi_page_malloc(heap, page, size, false); void* p = _mi_page_malloc(heap, page, size);
mi_assert_internal(p != NULL); mi_assert_internal(p != NULL);
_mi_memzero_aligned(p, mi_page_usable_block_size(page)); _mi_memzero_aligned(p, mi_page_usable_block_size(page));
return p; return p;
} }
else { else {
return _mi_page_malloc(heap, page, size, zero); return _mi_page_malloc_zero(heap, page, size, zero);
} }
} }

View file

@ -68,7 +68,7 @@ int _mi_prim_free(void* addr, size_t size) {
// Allocation // Allocation
//--------------------------------------------- //---------------------------------------------
extern void* emmalloc_memalign(size_t, size_t); extern void* emmalloc_memalign(size_t alignment, size_t size);
// Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned. // Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned.
int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) { int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) {
@ -78,17 +78,10 @@ int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_la
// That assumes no one else uses sbrk but us (they could go up, // That assumes no one else uses sbrk but us (they could go up,
// scribble, and then down), but we could assert on that perhaps. // scribble, and then down), but we could assert on that perhaps.
*is_zero = false; *is_zero = false;
// emmalloc has some limitations on alignment size. // emmalloc has a minimum alignment size.
// TODO: Why does mimalloc ask for an align of 4MB? that ends up allocating
// 8, which wastes quite a lot for us in wasm. If that is unavoidable,
// we may want to improve emmalloc to support such alignment. See also
// https://github.com/emscripten-core/emscripten/issues/20645
#define MIN_EMMALLOC_ALIGN 8 #define MIN_EMMALLOC_ALIGN 8
#define MAX_EMMALLOC_ALIGN (1024*1024)
if (try_alignment < MIN_EMMALLOC_ALIGN) { if (try_alignment < MIN_EMMALLOC_ALIGN) {
try_alignment = MIN_EMMALLOC_ALIGN; try_alignment = MIN_EMMALLOC_ALIGN;
} else if (try_alignment > MAX_EMMALLOC_ALIGN) {
try_alignment = MAX_EMMALLOC_ALIGN;
} }
void* p = emmalloc_memalign(try_alignment, size); void* p = emmalloc_memalign(try_alignment, size);
*addr = p; *addr = p;

View file

@ -482,7 +482,7 @@ void _mi_prim_out_stderr( const char* msg )
// on windows with redirection, the C runtime cannot handle locale dependent output // on windows with redirection, the C runtime cannot handle locale dependent output
// after the main thread closes so we use direct console output. // after the main thread closes so we use direct console output.
if (!_mi_preloading()) { if (!_mi_preloading()) {
// _cputs(msg); // _cputs cannot be used at is aborts if it fails to lock the console // _cputs(msg); // _cputs cannot be used as it aborts when failing to lock the console
static HANDLE hcon = INVALID_HANDLE_VALUE; static HANDLE hcon = INVALID_HANDLE_VALUE;
static bool hconIsConsole; static bool hconIsConsole;
if (hcon == INVALID_HANDLE_VALUE) { if (hcon == INVALID_HANDLE_VALUE) {

View file

@ -16,7 +16,9 @@ terms of the MIT license. A copy of the license can be found in the file
#include "mimalloc/internal.h" #include "mimalloc/internal.h"
#include "mimalloc/atomic.h" #include "mimalloc/atomic.h"
#if (MI_INTPTR_SIZE==8) #if (MI_INTPTR_SIZE>=8) && MI_TRACK_ASAN
#define MI_MAX_ADDRESS ((size_t)140 << 40) // 140TB (see issue #881)
#elif (MI_INTPTR_SIZE >= 8)
#define MI_MAX_ADDRESS ((size_t)40 << 40) // 40TB (to include huge page areas) #define MI_MAX_ADDRESS ((size_t)40 << 40) // 40TB (to include huge page areas)
#else #else
#define MI_MAX_ADDRESS ((size_t)2 << 30) // 2Gb #define MI_MAX_ADDRESS ((size_t)2 << 30) // 2Gb

View file

@ -312,20 +312,28 @@ static size_t mi_segment_info_size(mi_segment_t* segment) {
return segment->segment_info_slices * MI_SEGMENT_SLICE_SIZE; return segment->segment_info_slices * MI_SEGMENT_SLICE_SIZE;
} }
static uint8_t* _mi_segment_page_start_from_slice(const mi_segment_t* segment, const mi_slice_t* slice, size_t xblock_size, size_t* page_size) static uint8_t* _mi_segment_page_start_from_slice(const mi_segment_t* segment, const mi_slice_t* slice, size_t block_size, size_t* page_size)
{ {
ptrdiff_t idx = slice - segment->slices; const ptrdiff_t idx = slice - segment->slices;
size_t psize = (size_t)slice->slice_count * MI_SEGMENT_SLICE_SIZE; const size_t psize = (size_t)slice->slice_count * MI_SEGMENT_SLICE_SIZE;
uint8_t* const pstart = (uint8_t*)segment + (idx*MI_SEGMENT_SLICE_SIZE);
// make the start not OS page aligned for smaller blocks to avoid page/cache effects // make the start not OS page aligned for smaller blocks to avoid page/cache effects
// note: the offset must always be an xblock_size multiple since we assume small allocations // note: the offset must always be a block_size multiple since we assume small allocations
// are aligned (see `mi_heap_malloc_aligned`). // are aligned (see `mi_heap_malloc_aligned`).
size_t start_offset = 0; size_t start_offset = 0;
if (xblock_size >= MI_INTPTR_SIZE) { if (block_size > 0 && block_size <= MI_MAX_ALIGN_GUARANTEE) {
if (xblock_size <= 64) { start_offset = 3*xblock_size; } // for small objects, ensure the page start is aligned with the block size (PR#66 by kickunderscore)
else if (xblock_size <= 512) { start_offset = xblock_size; } const size_t adjust = block_size - ((uintptr_t)pstart % block_size);
if (adjust < block_size && psize >= block_size + adjust) {
start_offset += adjust;
}
}
if (block_size >= MI_INTPTR_SIZE) {
if (block_size <= 64) { start_offset += 3*block_size; }
else if (block_size <= 512) { start_offset += block_size; }
} }
if (page_size != NULL) { *page_size = psize - start_offset; } if (page_size != NULL) { *page_size = psize - start_offset; }
return (uint8_t*)segment + ((idx*MI_SEGMENT_SLICE_SIZE) + start_offset); return (pstart + start_offset);
} }
// Start of the page available memory; can be used on uninitialized pages // Start of the page available memory; can be used on uninitialized pages

View file

@ -230,6 +230,28 @@ int main(void) {
result = (((uintptr_t)p % 0x100) == 0); // #602 result = (((uintptr_t)p % 0x100) == 0); // #602
mi_free(p); mi_free(p);
} }
CHECK_BODY("mimalloc-aligned13") {
bool ok = true;
for( size_t size = 1; size <= (MI_SMALL_SIZE_MAX * 2) && ok; size++ ) {
for(size_t align = 1; align <= size && ok; align *= 2 ) {
void* p[10];
for(int i = 0; i < 10 && ok; i++) {
p[i] = mi_malloc_aligned(size,align);;
ok = (p[i] != NULL && ((uintptr_t)(p[i]) % align) == 0);
}
for(int i = 0; i < 10 && ok; i++) {
mi_free(p[i]);
}
/*
if (ok && align <= size && ((size + MI_PADDING_SIZE) & (align-1)) == 0) {
size_t bsize = mi_good_size(size);
ok = (align <= bsize && (bsize & (align-1)) == 0);
}
*/
}
}
result = ok;
}
CHECK_BODY("malloc-aligned-at1") { CHECK_BODY("malloc-aligned-at1") {
void* p = mi_malloc_aligned_at(48,32,0); result = (p != NULL && ((uintptr_t)(p) + 0) % 32 == 0); mi_free(p); void* p = mi_malloc_aligned_at(48,32,0); result = (p != NULL && ((uintptr_t)(p) + 0) % 32 == 0); mi_free(p);
}; };