mirror of
https://github.com/microsoft/mimalloc.git
synced 2025-05-08 00:09:31 +03:00
merge from dev-slice
This commit is contained in:
commit
8b15203950
17 changed files with 191 additions and 129 deletions
|
@ -58,6 +58,9 @@ set(mi_sources
|
|||
src/prim/prim.c)
|
||||
|
||||
set(mi_cflags "")
|
||||
set(mi_cflags_static "") # extra flags for a static library build
|
||||
set(mi_cflags_dynamic "") # extra flags for a shared-object library build
|
||||
set(mi_defines "")
|
||||
set(mi_libraries "")
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
@ -288,8 +291,7 @@ if(CMAKE_SYSTEM_NAME MATCHES "Linux|Android")
|
|||
endif()
|
||||
|
||||
if(MI_LIBC_MUSL)
|
||||
message(STATUS "Assume using musl libc (MI_LIBC_MUSL=ON) (this implies MI_LOCAL_DYNAMIC_TLS=ON)")
|
||||
set(MI_LOCAL_DYNAMIC_TLS "ON")
|
||||
message(STATUS "Assume using musl libc (MI_LIBC_MUSL=ON)")
|
||||
list(APPEND mi_defines MI_LIBC_MUSL=1)
|
||||
endif()
|
||||
|
||||
|
@ -318,7 +320,14 @@ if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU|Intel" AND NOT CMAKE_SYSTEM
|
|||
if(MI_LOCAL_DYNAMIC_TLS)
|
||||
list(APPEND mi_cflags -ftls-model=local-dynamic)
|
||||
else()
|
||||
list(APPEND mi_cflags -ftls-model=initial-exec)
|
||||
if(MI_LIBC_MUSL)
|
||||
# with musl we use local-dynamic for the static build, see issue #644
|
||||
list(APPEND mi_cflags_static -ftls-model=local-dynamic)
|
||||
list(APPEND mi_cflags_dynamic -ftls-model=initial-exec)
|
||||
message(STATUS "Use local dynamic TLS for the static build (since MI_LIBC_MUSL=ON)")
|
||||
else()
|
||||
list(APPEND mi_cflags -ftls-model=initial-exec)
|
||||
endif()
|
||||
endif()
|
||||
if(MI_OVERRIDE)
|
||||
list(APPEND mi_cflags -fno-builtin-malloc)
|
||||
|
@ -426,7 +435,7 @@ if(MI_BUILD_SHARED)
|
|||
add_library(mimalloc SHARED ${mi_sources})
|
||||
set_target_properties(mimalloc PROPERTIES VERSION ${mi_version} SOVERSION ${mi_version_major} OUTPUT_NAME ${mi_basename} )
|
||||
target_compile_definitions(mimalloc PRIVATE ${mi_defines} MI_SHARED_LIB MI_SHARED_LIB_EXPORT)
|
||||
target_compile_options(mimalloc PRIVATE ${mi_cflags})
|
||||
target_compile_options(mimalloc PRIVATE ${mi_cflags} ${mi_cflags_dynamic})
|
||||
target_link_libraries(mimalloc PRIVATE ${mi_libraries})
|
||||
target_include_directories(mimalloc PUBLIC
|
||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
|
||||
|
@ -456,7 +465,7 @@ if (MI_BUILD_STATIC)
|
|||
add_library(mimalloc-static STATIC ${mi_sources})
|
||||
set_property(TARGET mimalloc-static PROPERTY POSITION_INDEPENDENT_CODE ON)
|
||||
target_compile_definitions(mimalloc-static PRIVATE ${mi_defines} MI_STATIC_LIB)
|
||||
target_compile_options(mimalloc-static PRIVATE ${mi_cflags})
|
||||
target_compile_options(mimalloc-static PRIVATE ${mi_cflags} ${mi_cflags_static})
|
||||
target_link_libraries(mimalloc-static PRIVATE ${mi_libraries})
|
||||
target_include_directories(mimalloc-static PUBLIC
|
||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
|
||||
|
@ -488,7 +497,7 @@ if (MI_BUILD_OBJECT)
|
|||
add_library(mimalloc-obj OBJECT src/static.c)
|
||||
set_property(TARGET mimalloc-obj PROPERTY POSITION_INDEPENDENT_CODE ON)
|
||||
target_compile_definitions(mimalloc-obj PRIVATE ${mi_defines})
|
||||
target_compile_options(mimalloc-obj PRIVATE ${mi_cflags})
|
||||
target_compile_options(mimalloc-obj PRIVATE ${mi_cflags} ${mi_cflags_static})
|
||||
target_include_directories(mimalloc-obj PUBLIC
|
||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
|
||||
$<INSTALL_INTERFACE:${mi_install_incdir}>
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
set(mi_version_major 2)
|
||||
set(mi_version_minor 1)
|
||||
set(mi_version_patch 4)
|
||||
set(mi_version_patch 5)
|
||||
set(mi_version ${mi_version_major}.${mi_version_minor})
|
||||
|
||||
set(PACKAGE_VERSION ${mi_version})
|
||||
|
|
|
@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file
|
|||
#ifndef MIMALLOC_H
|
||||
#define MIMALLOC_H
|
||||
|
||||
#define MI_MALLOC_VERSION 214 // major + 2 digits minor
|
||||
#define MI_MALLOC_VERSION 215 // major + 2 digits minor
|
||||
|
||||
// ------------------------------------------------------
|
||||
// Compiler specific attributes
|
||||
|
|
|
@ -199,7 +199,9 @@ mi_msecs_t _mi_clock_end(mi_msecs_t start);
|
|||
mi_msecs_t _mi_clock_start(void);
|
||||
|
||||
// "alloc.c"
|
||||
void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept; // called from `_mi_malloc_generic`
|
||||
void* _mi_page_malloc_zero(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept; // called from `_mi_malloc_generic`
|
||||
void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept; // called from `_mi_heap_malloc_aligned`
|
||||
void* _mi_page_malloc_zeroed(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept; // called from `_mi_heap_malloc_aligned`
|
||||
void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept;
|
||||
void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept; // called from `_mi_heap_malloc_aligned`
|
||||
void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) mi_attr_noexcept;
|
||||
|
|
|
@ -14,17 +14,17 @@ terms of the MIT license. A copy of the license can be found in the file
|
|||
// Each OS/host needs to implement these primitives, see `src/prim`
|
||||
// for implementations on Window, macOS, WASI, and Linux/Unix.
|
||||
//
|
||||
// note: on all primitive functions, we always have result parameters != NUL, and:
|
||||
// note: on all primitive functions, we always have result parameters != NULL, and:
|
||||
// addr != NULL and page aligned
|
||||
// size > 0 and page aligned
|
||||
// return value is an error code an int where 0 is success.
|
||||
// the return value is an error code as an `int` where 0 is success
|
||||
// --------------------------------------------------------------------------
|
||||
|
||||
// OS memory configuration
|
||||
typedef struct mi_os_mem_config_s {
|
||||
size_t page_size; // 4KiB
|
||||
size_t large_page_size; // 2MiB
|
||||
size_t alloc_granularity; // smallest allocation size (on Windows 64KiB)
|
||||
size_t page_size; // default to 4KiB
|
||||
size_t large_page_size; // 0 if not supported, usually 2MiB (4MiB on Windows)
|
||||
size_t alloc_granularity; // smallest allocation size (usually 4KiB, on Windows 64KiB)
|
||||
bool has_overcommit; // can we reserve more memory than can be actually committed?
|
||||
bool must_free_whole; // must allocated blocks be freed as a whole (false for mmap, true for VirtualAlloc)
|
||||
bool has_virtual_reserve; // supports virtual address space reservation? (if true we can reserve virtual address space without using commit or physical memory)
|
||||
|
@ -130,8 +130,9 @@ void _mi_prim_thread_associate_default_heap(mi_heap_t* heap);
|
|||
// If you test on another platform and it works please send a PR :-)
|
||||
// see also https://akkadia.org/drepper/tls.pdf for more info on the TLS register.
|
||||
//
|
||||
// Note: on most platforms this is not actually used anymore as we prefer `__builtin_thread_pointer()` nowadays.
|
||||
// However, we do still use it with older clang compilers and Apple OS (as we use TLS slot for the default heap there).
|
||||
// Note: we would like to prefer `__builtin_thread_pointer()` nowadays instead of using assembly,
|
||||
// but unfortunately we can not detect support reliably (see issue #883)
|
||||
// We also use it on Apple OS as we use a TLS slot for the default heap there.
|
||||
#if defined(__GNUC__) && ( \
|
||||
(defined(__GLIBC__) && (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))) \
|
||||
|| (defined(__APPLE__) && (defined(__x86_64__) || defined(__aarch64__) || defined(__POWERPC__))) \
|
||||
|
@ -203,13 +204,14 @@ static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexce
|
|||
|
||||
#endif
|
||||
|
||||
// Do we have __builtin_thread_pointer? (do not make this a compound test as it fails on older gcc's, see issue #851)
|
||||
#if defined(__has_builtin)
|
||||
#if __has_builtin(__builtin_thread_pointer)
|
||||
#define MI_HAS_BUILTIN_THREAD_POINTER 1
|
||||
#endif
|
||||
#elif defined(__GNUC__) && (__GNUC__ >= 7) && defined(__aarch64__) // special case aarch64 for older gcc versions (issue #851)
|
||||
#define MI_HAS_BUILTIN_THREAD_POINTER 1
|
||||
// Do we have __builtin_thread_pointer? This would be the preferred way to get a unique thread id
|
||||
// but unfortunately, it seems we cannot test for this reliably at this time (see issue #883)
|
||||
// Nevertheless, it seems needed on older graviton platforms (see issue #851).
|
||||
// For now, we only enable this for specific platforms.
|
||||
#if defined(__GNUC__) && (__GNUC__ >= 7) && defined(__aarch64__) /* special case aarch64 for older gcc versions (issue #851) */ \
|
||||
&& !defined(__APPLE__) /* on apple (M1) the wrong register is read (tpidr_el0 instead of tpidrro_el0) so fall back to TLS slot assembly (<https://github.com/microsoft/mimalloc/issues/343#issuecomment-763272369>)*/ \
|
||||
&& (!defined(__clang_major__) || __clang_major__ >= 14) /* older clang versions emit bad code; fall back to using the TLS slot (<https://lore.kernel.org/linux-arm-kernel/202110280952.352F66D8@keescook/T/>) */
|
||||
#define MI_USE_BUILTIN_THREAD_POINTER 1
|
||||
#endif
|
||||
|
||||
|
||||
|
@ -231,12 +233,10 @@ static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
|
|||
return (uintptr_t)NtCurrentTeb();
|
||||
}
|
||||
|
||||
#elif MI_HAS_BUILTIN_THREAD_POINTER && \
|
||||
(!defined(__APPLE__)) && /* on apple (M1) the wrong register is read (tpidr_el0 instead of tpidrro_el0) so fall back to TLS slot assembly (<https://github.com/microsoft/mimalloc/issues/343#issuecomment-763272369>)*/ \
|
||||
(!defined(__clang_major__) || __clang_major__ >= 14) // older clang versions emit bad code; fall back to using the TLS slot (<https://lore.kernel.org/linux-arm-kernel/202110280952.352F66D8@keescook/T/>)
|
||||
#elif MI_USE_BUILTIN_THREAD_POINTER
|
||||
|
||||
static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
|
||||
// Works on most Unix based platforms
|
||||
// Works on most Unix based platforms with recent compilers
|
||||
return (uintptr_t)__builtin_thread_pointer();
|
||||
}
|
||||
|
||||
|
|
|
@ -203,8 +203,8 @@ typedef int32_t mi_ssize_t;
|
|||
#error "mimalloc internal: define more bins"
|
||||
#endif
|
||||
|
||||
// blocks up to this size are always allocated aligned
|
||||
#define MI_MAX_ALIGN_GUARANTEE (8*MI_MAX_ALIGN_SIZE)
|
||||
// Maximum block size for which blocks are guaranteed to be block size aligned. (see `segment.c:_mi_segment_page_start`)
|
||||
#define MI_MAX_ALIGN_GUARANTEE (MI_MEDIUM_OBJ_SIZE_MAX)
|
||||
|
||||
// Alignments over MI_BLOCK_ALIGNMENT_MAX are allocated in dedicated huge page segments
|
||||
#define MI_BLOCK_ALIGNMENT_MAX (MI_SEGMENT_SIZE >> 1)
|
||||
|
@ -214,7 +214,7 @@ typedef int32_t mi_ssize_t;
|
|||
|
||||
// we never allocate more than PTRDIFF_MAX (see also <https://sourceware.org/ml/libc-announce/2019/msg00001.html>)
|
||||
// on 64-bit+ systems we also limit the maximum allocation size such that the slice count fits in 32-bits. (issue #877)
|
||||
#if PTRDIFF_MAX >= (MI_SEGMENT_SLIZE_SIZE * UINT32_MAX)
|
||||
#if (PTRDIFF_MAX > INT32_MAX) && (PTRDIFF_MAX >= (MI_SEGMENT_SLIZE_SIZE * UINT32_MAX))
|
||||
#define MI_MAX_ALLOC_SIZE (MI_SEGMENT_SLICE_SIZE * (UINT32_MAX-1))
|
||||
#else
|
||||
#define MI_MAX_ALLOC_SIZE PTRDIFF_MAX
|
||||
|
|
11
readme.md
11
readme.md
|
@ -12,8 +12,8 @@ is a general purpose allocator with excellent [performance](#performance) charac
|
|||
Initially developed by Daan Leijen for the runtime systems of the
|
||||
[Koka](https://koka-lang.github.io) and [Lean](https://github.com/leanprover/lean) languages.
|
||||
|
||||
Latest release tag: `v2.1.4` (2024-04-22).
|
||||
Latest stable tag: `v1.8.4` (2024-04-22).
|
||||
Latest release tag: `v2.1.5` (2024-05-13).
|
||||
Latest v1 tag: `v1.8.5` (2024-05-13).
|
||||
|
||||
mimalloc is a drop-in replacement for `malloc` and can be used in other programs
|
||||
without code changes, for example, on dynamically linked ELF-based systems (Linux, BSD, etc.) you can use it as:
|
||||
|
@ -72,14 +72,17 @@ Enjoy!
|
|||
|
||||
* `master`: latest stable release (based on `dev-slice`).
|
||||
* `dev`: development branch for mimalloc v1. Use this branch for submitting PR's.
|
||||
* `dev-slice`: development branch for mimalloc v2. This branch is downstream of `dev`.
|
||||
* `dev-slice`: development branch for mimalloc v2. This branch is downstream of `dev` (and is essentially equal to `dev` except for
|
||||
`src/segment.c`)
|
||||
|
||||
### Releases
|
||||
|
||||
Note: the `v2.x` version has a new algorithm for managing internal mimalloc pages that tends to reduce memory usage
|
||||
Note: the `v2.x` version has a different algorithm for managing internal mimalloc pages (as slices) that tends to use reduce
|
||||
memory usage
|
||||
and fragmentation compared to mimalloc `v1.x` (especially for large workloads). Should otherwise have similar performance
|
||||
(see [below](#performance)); please report if you observe any significant performance regression.
|
||||
|
||||
* 2024-05-13, `v1.8.5`, `v2.1.5`: Fix build errors on various (older) platforms. Refactored aligned allocation.
|
||||
* 2024-04-22, `v1.8.4`, `v2.1.4`: Fixes various bugs and build issues. Add `MI_LIBC_MUSL` cmake flag for musl builds.
|
||||
Free-ing code is refactored into a separate module (`free.c`). Mimalloc page info is simplified with the block size
|
||||
directly available (and new `block_size_shift` to improve aligned block free-ing).
|
||||
|
|
|
@ -15,22 +15,21 @@ terms of the MIT license. A copy of the license can be found in the file
|
|||
// Aligned Allocation
|
||||
// ------------------------------------------------------
|
||||
|
||||
// Fallback primitive aligned allocation -- split out for better codegen
|
||||
static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t* const heap, const size_t size, const size_t alignment, const size_t offset, const bool zero) mi_attr_noexcept
|
||||
static bool mi_malloc_is_naturally_aligned( size_t size, size_t alignment ) {
|
||||
// objects up to `MI_MAX_ALIGN_GUARANTEE` are allocated aligned to their size (see `segment.c:_mi_segment_page_start`).
|
||||
mi_assert_internal(_mi_is_power_of_two(alignment) && (alignment > 0));
|
||||
if (alignment > size) return false;
|
||||
if (alignment <= MI_MAX_ALIGN_SIZE) return true;
|
||||
const size_t bsize = mi_good_size(size);
|
||||
return (bsize <= MI_MAX_ALIGN_GUARANTEE && (bsize & (alignment-1)) == 0);
|
||||
}
|
||||
|
||||
// Fallback aligned allocation that over-allocates -- split out for better codegen
|
||||
static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_overalloc(mi_heap_t* const heap, const size_t size, const size_t alignment, const size_t offset, const bool zero) mi_attr_noexcept
|
||||
{
|
||||
mi_assert_internal(size <= PTRDIFF_MAX);
|
||||
mi_assert_internal(size <= (MI_MAX_ALLOC_SIZE - MI_PADDING_SIZE));
|
||||
mi_assert_internal(alignment != 0 && _mi_is_power_of_two(alignment));
|
||||
|
||||
const uintptr_t align_mask = alignment - 1; // for any x, `(x & align_mask) == (x % alignment)`
|
||||
const size_t padsize = size + MI_PADDING_SIZE;
|
||||
|
||||
// use regular allocation if it is guaranteed to fit the alignment constraints
|
||||
if (offset==0 && alignment<=padsize && padsize<=MI_MAX_ALIGN_GUARANTEE && (padsize&align_mask)==0) {
|
||||
void* p = _mi_heap_malloc_zero(heap, size, zero);
|
||||
mi_assert_internal(p == NULL || ((uintptr_t)p % alignment) == 0);
|
||||
return p;
|
||||
}
|
||||
|
||||
void* p;
|
||||
size_t oversize;
|
||||
if mi_unlikely(alignment > MI_BLOCK_ALIGNMENT_MAX) {
|
||||
|
@ -57,6 +56,7 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t*
|
|||
}
|
||||
|
||||
// .. and align within the allocation
|
||||
const uintptr_t align_mask = alignment - 1; // for any x, `(x & align_mask) == (x % alignment)`
|
||||
const uintptr_t poffset = ((uintptr_t)p + offset) & align_mask;
|
||||
const uintptr_t adjust = (poffset == 0 ? 0 : alignment - poffset);
|
||||
mi_assert_internal(adjust < alignment);
|
||||
|
@ -76,7 +76,7 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t*
|
|||
|
||||
// now zero the block if needed
|
||||
if (alignment > MI_BLOCK_ALIGNMENT_MAX) {
|
||||
// for the tracker, on huge aligned allocations only from the start of the large block is defined
|
||||
// for the tracker, on huge aligned allocations only the memory from the start of the large block is defined
|
||||
mi_track_mem_undefined(aligned_p, size);
|
||||
if (zero) {
|
||||
_mi_memzero_aligned(aligned_p, mi_usable_size(aligned_p));
|
||||
|
@ -89,6 +89,39 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t*
|
|||
return aligned_p;
|
||||
}
|
||||
|
||||
// Generic primitive aligned allocation -- split out for better codegen
|
||||
static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_generic(mi_heap_t* const heap, const size_t size, const size_t alignment, const size_t offset, const bool zero) mi_attr_noexcept
|
||||
{
|
||||
mi_assert_internal(alignment != 0 && _mi_is_power_of_two(alignment));
|
||||
// we don't allocate more than MI_MAX_ALLOC_SIZE (see <https://sourceware.org/ml/libc-announce/2019/msg00001.html>)
|
||||
if mi_unlikely(size > (MI_MAX_ALLOC_SIZE - MI_PADDING_SIZE)) {
|
||||
#if MI_DEBUG > 0
|
||||
_mi_error_message(EOVERFLOW, "aligned allocation request is too large (size %zu, alignment %zu)\n", size, alignment);
|
||||
#endif
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// use regular allocation if it is guaranteed to fit the alignment constraints.
|
||||
// this is important to try as the fast path in `mi_heap_malloc_zero_aligned` only works when there exist
|
||||
// a page with the right block size, and if we always use the over-alloc fallback that would never happen.
|
||||
if (offset == 0 && mi_malloc_is_naturally_aligned(size,alignment)) {
|
||||
void* p = _mi_heap_malloc_zero(heap, size, zero);
|
||||
mi_assert_internal(p == NULL || ((uintptr_t)p % alignment) == 0);
|
||||
const bool is_aligned_or_null = (((uintptr_t)p) & (alignment-1))==0;
|
||||
if mi_likely(is_aligned_or_null) {
|
||||
return p;
|
||||
}
|
||||
else {
|
||||
// this should never happen if the `mi_malloc_is_naturally_aligned` check is correct..
|
||||
mi_assert(false);
|
||||
mi_free(p);
|
||||
}
|
||||
}
|
||||
|
||||
// fall back to over-allocation
|
||||
return mi_heap_malloc_zero_aligned_at_overalloc(heap,size,alignment,offset,zero);
|
||||
}
|
||||
|
||||
// Primitive aligned allocation
|
||||
static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t size, const size_t alignment, const size_t offset, const bool zero) mi_attr_noexcept
|
||||
{
|
||||
|
@ -99,34 +132,30 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t
|
|||
#endif
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if mi_unlikely(size > PTRDIFF_MAX) { // we don't allocate more than PTRDIFF_MAX (see <https://sourceware.org/ml/libc-announce/2019/msg00001.html>)
|
||||
#if MI_DEBUG > 0
|
||||
_mi_error_message(EOVERFLOW, "aligned allocation request is too large (size %zu, alignment %zu)\n", size, alignment);
|
||||
#endif
|
||||
return NULL;
|
||||
}
|
||||
const uintptr_t align_mask = alignment-1; // for any x, `(x & align_mask) == (x % alignment)`
|
||||
const size_t padsize = size + MI_PADDING_SIZE; // note: cannot overflow due to earlier size > PTRDIFF_MAX check
|
||||
|
||||
|
||||
// try first if there happens to be a small block available with just the right alignment
|
||||
if mi_likely(padsize <= MI_SMALL_SIZE_MAX && alignment <= padsize) {
|
||||
if mi_likely(size <= MI_SMALL_SIZE_MAX && alignment <= size) {
|
||||
const uintptr_t align_mask = alignment-1; // for any x, `(x & align_mask) == (x % alignment)`
|
||||
const size_t padsize = size + MI_PADDING_SIZE;
|
||||
mi_page_t* page = _mi_heap_get_free_small_page(heap, padsize);
|
||||
const bool is_aligned = (((uintptr_t)page->free+offset) & align_mask)==0;
|
||||
if mi_likely(page->free != NULL && is_aligned)
|
||||
{
|
||||
#if MI_STAT>1
|
||||
mi_heap_stat_increase(heap, malloc, size);
|
||||
#endif
|
||||
void* p = _mi_page_malloc(heap, page, padsize, zero); // TODO: inline _mi_page_malloc
|
||||
mi_assert_internal(p != NULL);
|
||||
mi_assert_internal(((uintptr_t)p + offset) % alignment == 0);
|
||||
mi_track_malloc(p,size,zero);
|
||||
return p;
|
||||
if mi_likely(page->free != NULL) {
|
||||
const bool is_aligned = (((uintptr_t)page->free + offset) & align_mask)==0;
|
||||
if mi_likely(is_aligned)
|
||||
{
|
||||
#if MI_STAT>1
|
||||
mi_heap_stat_increase(heap, malloc, size);
|
||||
#endif
|
||||
void* p = (zero ? _mi_page_malloc_zeroed(heap,page,padsize) : _mi_page_malloc(heap,page,padsize)); // call specific page malloc for better codegen
|
||||
mi_assert_internal(p != NULL);
|
||||
mi_assert_internal(((uintptr_t)p + offset) % alignment == 0);
|
||||
mi_track_malloc(p,size,zero);
|
||||
return p;
|
||||
}
|
||||
}
|
||||
}
|
||||
// fallback
|
||||
return mi_heap_malloc_zero_aligned_at_fallback(heap, size, alignment, offset, zero);
|
||||
|
||||
// fallback to generic aligned allocation
|
||||
return mi_heap_malloc_zero_aligned_at_generic(heap, size, alignment, offset, zero);
|
||||
}
|
||||
|
||||
|
||||
|
@ -139,22 +168,7 @@ mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc_aligned_at(mi_heap_t* he
|
|||
}
|
||||
|
||||
mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept {
|
||||
if mi_unlikely(alignment == 0 || !_mi_is_power_of_two(alignment)) return NULL;
|
||||
#if !MI_PADDING
|
||||
// without padding, any small sized allocation is naturally aligned (see also `_mi_segment_page_start`)
|
||||
if mi_likely(_mi_is_power_of_two(size) && size >= alignment && size <= MI_SMALL_SIZE_MAX)
|
||||
#else
|
||||
// with padding, we can only guarantee this for fixed alignments
|
||||
if mi_likely((alignment == sizeof(void*) || (alignment == MI_MAX_ALIGN_SIZE && size > (MI_MAX_ALIGN_SIZE/2)))
|
||||
&& size <= MI_SMALL_SIZE_MAX)
|
||||
#endif
|
||||
{
|
||||
// fast path for common alignment and size
|
||||
return mi_heap_malloc_small(heap, size);
|
||||
}
|
||||
else {
|
||||
return mi_heap_malloc_aligned_at(heap, size, alignment, 0);
|
||||
}
|
||||
return mi_heap_malloc_aligned_at(heap, size, alignment, 0);
|
||||
}
|
||||
|
||||
// ensure a definition is emitted
|
||||
|
|
|
@ -136,8 +136,11 @@ typedef void* mi_nothrow_t;
|
|||
mi_decl_export void* realloc(void* p, size_t newsize) MI_FORWARD2(mi_realloc, p, newsize)
|
||||
mi_decl_export void free(void* p) MI_FORWARD0(mi_free, p)
|
||||
// In principle we do not need to forward `strdup`/`strndup` but on some systems these do not use `malloc` internally (but a more primitive call)
|
||||
// We only override if `strdup` is not a macro (as on some older libc's, see issue #885)
|
||||
#if !defined(strdup)
|
||||
mi_decl_export char* strdup(const char* str) MI_FORWARD1(mi_strdup, str)
|
||||
#if !defined(__APPLE__) || (defined(MAC_OS_X_VERSION_10_7) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_7)
|
||||
#endif
|
||||
#if !defined(strndup) && (!defined(__APPLE__) || (defined(MAC_OS_X_VERSION_10_7) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_7))
|
||||
mi_decl_export char* strndup(const char* str, size_t n) MI_FORWARD2(mi_strndup, str, n)
|
||||
#endif
|
||||
#endif
|
||||
|
|
40
src/alloc.c
40
src/alloc.c
|
@ -28,7 +28,7 @@ terms of the MIT license. A copy of the license can be found in the file
|
|||
// Fast allocation in a page: just pop from the free list.
|
||||
// Fall back to generic allocation only if the list is empty.
|
||||
// Note: in release mode the (inlined) routine is about 7 instructions with a single test.
|
||||
extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept
|
||||
extern inline void* _mi_page_malloc_zero(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept
|
||||
{
|
||||
mi_assert_internal(page->block_size == 0 /* empty heap */ || mi_page_block_size(page) >= size);
|
||||
mi_block_t* const block = page->free;
|
||||
|
@ -85,14 +85,14 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz
|
|||
#endif
|
||||
|
||||
#if MI_PADDING // && !MI_TRACK_ENABLED
|
||||
mi_padding_t* const padding = (mi_padding_t*)((uint8_t*)block + mi_page_usable_block_size(page));
|
||||
ptrdiff_t delta = ((uint8_t*)padding - (uint8_t*)block - (size - MI_PADDING_SIZE));
|
||||
mi_padding_t* const padding = (mi_padding_t*)((uint8_t*)block + mi_page_usable_block_size(page));
|
||||
ptrdiff_t delta = ((uint8_t*)padding - (uint8_t*)block - (size - MI_PADDING_SIZE));
|
||||
#if (MI_DEBUG>=2)
|
||||
mi_assert_internal(delta >= 0 && mi_page_usable_block_size(page) >= (size - MI_PADDING_SIZE + delta));
|
||||
#endif
|
||||
mi_track_mem_defined(padding,sizeof(mi_padding_t)); // note: re-enable since mi_page_usable_block_size may set noaccess
|
||||
padding->canary = (uint32_t)(mi_ptr_encode(page,block,page->keys));
|
||||
padding->delta = (uint32_t)(delta);
|
||||
mi_track_mem_defined(padding,sizeof(mi_padding_t)); // note: re-enable since mi_page_usable_block_size may set noaccess
|
||||
padding->canary = (uint32_t)(mi_ptr_encode(page,block,page->keys));
|
||||
padding->delta = (uint32_t)(delta);
|
||||
#if MI_PADDING_CHECK
|
||||
if (!mi_page_is_huge(page)) {
|
||||
uint8_t* fill = (uint8_t*)padding - delta;
|
||||
|
@ -105,6 +105,14 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz
|
|||
return block;
|
||||
}
|
||||
|
||||
// extra entries for improved efficiency in `alloc-aligned.c`.
|
||||
extern void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept {
|
||||
return _mi_page_malloc_zero(heap,page,size,false);
|
||||
}
|
||||
extern void* _mi_page_malloc_zeroed(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept {
|
||||
return _mi_page_malloc_zero(heap,page,size,true);
|
||||
}
|
||||
|
||||
static inline mi_decl_restrict void* mi_heap_malloc_small_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept {
|
||||
mi_assert(heap != NULL);
|
||||
#if MI_DEBUG
|
||||
|
@ -117,7 +125,7 @@ static inline mi_decl_restrict void* mi_heap_malloc_small_zero(mi_heap_t* heap,
|
|||
#endif
|
||||
|
||||
mi_page_t* page = _mi_heap_get_free_small_page(heap, size + MI_PADDING_SIZE);
|
||||
void* const p = _mi_page_malloc(heap, page, size + MI_PADDING_SIZE, zero);
|
||||
void* const p = _mi_page_malloc_zero(heap, page, size + MI_PADDING_SIZE, zero);
|
||||
mi_track_malloc(p,size,zero);
|
||||
|
||||
#if MI_STAT>1
|
||||
|
@ -321,11 +329,11 @@ mi_decl_nodiscard void* mi_recalloc(void* p, size_t count, size_t size) mi_attr_
|
|||
// `strdup` using mi_malloc
|
||||
mi_decl_nodiscard mi_decl_restrict char* mi_heap_strdup(mi_heap_t* heap, const char* s) mi_attr_noexcept {
|
||||
if (s == NULL) return NULL;
|
||||
size_t n = strlen(s);
|
||||
char* t = (char*)mi_heap_malloc(heap,n+1);
|
||||
size_t len = _mi_strlen(s);
|
||||
char* t = (char*)mi_heap_malloc(heap,len+1);
|
||||
if (t == NULL) return NULL;
|
||||
_mi_memcpy(t, s, n);
|
||||
t[n] = 0;
|
||||
_mi_memcpy(t, s, len);
|
||||
t[len] = 0;
|
||||
return t;
|
||||
}
|
||||
|
||||
|
@ -336,13 +344,11 @@ mi_decl_nodiscard mi_decl_restrict char* mi_strdup(const char* s) mi_attr_noexce
|
|||
// `strndup` using mi_malloc
|
||||
mi_decl_nodiscard mi_decl_restrict char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n) mi_attr_noexcept {
|
||||
if (s == NULL) return NULL;
|
||||
const char* end = (const char*)memchr(s, 0, n); // find end of string in the first `n` characters (returns NULL if not found)
|
||||
const size_t m = (end != NULL ? (size_t)(end - s) : n); // `m` is the minimum of `n` or the end-of-string
|
||||
mi_assert_internal(m <= n);
|
||||
char* t = (char*)mi_heap_malloc(heap, m+1);
|
||||
const size_t len = _mi_strnlen(s,n); // len <= n
|
||||
char* t = (char*)mi_heap_malloc(heap, len+1);
|
||||
if (t == NULL) return NULL;
|
||||
_mi_memcpy(t, s, m);
|
||||
t[m] = 0;
|
||||
_mi_memcpy(t, s, len);
|
||||
t[len] = 0;
|
||||
return t;
|
||||
}
|
||||
|
||||
|
|
|
@ -113,10 +113,10 @@ size_t _mi_bin_size(uint8_t bin) {
|
|||
// Good size for allocation
|
||||
size_t mi_good_size(size_t size) mi_attr_noexcept {
|
||||
if (size <= MI_MEDIUM_OBJ_SIZE_MAX) {
|
||||
return _mi_bin_size(mi_bin(size));
|
||||
return _mi_bin_size(mi_bin(size + MI_PADDING_SIZE));
|
||||
}
|
||||
else {
|
||||
return _mi_align_up(size,_mi_os_page_size());
|
||||
return _mi_align_up(size + MI_PADDING_SIZE,_mi_os_page_size());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -932,12 +932,12 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_al
|
|||
// and try again, this time succeeding! (i.e. this should never recurse through _mi_page_malloc)
|
||||
if mi_unlikely(zero && page->block_size == 0) {
|
||||
// note: we cannot call _mi_page_malloc with zeroing for huge blocks; we zero it afterwards in that case.
|
||||
void* p = _mi_page_malloc(heap, page, size, false);
|
||||
void* p = _mi_page_malloc(heap, page, size);
|
||||
mi_assert_internal(p != NULL);
|
||||
_mi_memzero_aligned(p, mi_page_usable_block_size(page));
|
||||
return p;
|
||||
}
|
||||
else {
|
||||
return _mi_page_malloc(heap, page, size, zero);
|
||||
return _mi_page_malloc_zero(heap, page, size, zero);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -68,7 +68,7 @@ int _mi_prim_free(void* addr, size_t size) {
|
|||
// Allocation
|
||||
//---------------------------------------------
|
||||
|
||||
extern void* emmalloc_memalign(size_t, size_t);
|
||||
extern void* emmalloc_memalign(size_t alignment, size_t size);
|
||||
|
||||
// Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned.
|
||||
int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) {
|
||||
|
@ -78,17 +78,10 @@ int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_la
|
|||
// That assumes no one else uses sbrk but us (they could go up,
|
||||
// scribble, and then down), but we could assert on that perhaps.
|
||||
*is_zero = false;
|
||||
// emmalloc has some limitations on alignment size.
|
||||
// TODO: Why does mimalloc ask for an align of 4MB? that ends up allocating
|
||||
// 8, which wastes quite a lot for us in wasm. If that is unavoidable,
|
||||
// we may want to improve emmalloc to support such alignment. See also
|
||||
// https://github.com/emscripten-core/emscripten/issues/20645
|
||||
// emmalloc has a minimum alignment size.
|
||||
#define MIN_EMMALLOC_ALIGN 8
|
||||
#define MAX_EMMALLOC_ALIGN (1024*1024)
|
||||
if (try_alignment < MIN_EMMALLOC_ALIGN) {
|
||||
try_alignment = MIN_EMMALLOC_ALIGN;
|
||||
} else if (try_alignment > MAX_EMMALLOC_ALIGN) {
|
||||
try_alignment = MAX_EMMALLOC_ALIGN;
|
||||
}
|
||||
void* p = emmalloc_memalign(try_alignment, size);
|
||||
*addr = p;
|
||||
|
|
|
@ -482,7 +482,7 @@ void _mi_prim_out_stderr( const char* msg )
|
|||
// on windows with redirection, the C runtime cannot handle locale dependent output
|
||||
// after the main thread closes so we use direct console output.
|
||||
if (!_mi_preloading()) {
|
||||
// _cputs(msg); // _cputs cannot be used at is aborts if it fails to lock the console
|
||||
// _cputs(msg); // _cputs cannot be used as it aborts when failing to lock the console
|
||||
static HANDLE hcon = INVALID_HANDLE_VALUE;
|
||||
static bool hconIsConsole;
|
||||
if (hcon == INVALID_HANDLE_VALUE) {
|
||||
|
|
|
@ -16,7 +16,9 @@ terms of the MIT license. A copy of the license can be found in the file
|
|||
#include "mimalloc/internal.h"
|
||||
#include "mimalloc/atomic.h"
|
||||
|
||||
#if (MI_INTPTR_SIZE==8)
|
||||
#if (MI_INTPTR_SIZE>=8) && MI_TRACK_ASAN
|
||||
#define MI_MAX_ADDRESS ((size_t)140 << 40) // 140TB (see issue #881)
|
||||
#elif (MI_INTPTR_SIZE >= 8)
|
||||
#define MI_MAX_ADDRESS ((size_t)40 << 40) // 40TB (to include huge page areas)
|
||||
#else
|
||||
#define MI_MAX_ADDRESS ((size_t)2 << 30) // 2Gb
|
||||
|
|
|
@ -312,20 +312,28 @@ static size_t mi_segment_info_size(mi_segment_t* segment) {
|
|||
return segment->segment_info_slices * MI_SEGMENT_SLICE_SIZE;
|
||||
}
|
||||
|
||||
static uint8_t* _mi_segment_page_start_from_slice(const mi_segment_t* segment, const mi_slice_t* slice, size_t xblock_size, size_t* page_size)
|
||||
static uint8_t* _mi_segment_page_start_from_slice(const mi_segment_t* segment, const mi_slice_t* slice, size_t block_size, size_t* page_size)
|
||||
{
|
||||
ptrdiff_t idx = slice - segment->slices;
|
||||
size_t psize = (size_t)slice->slice_count * MI_SEGMENT_SLICE_SIZE;
|
||||
const ptrdiff_t idx = slice - segment->slices;
|
||||
const size_t psize = (size_t)slice->slice_count * MI_SEGMENT_SLICE_SIZE;
|
||||
uint8_t* const pstart = (uint8_t*)segment + (idx*MI_SEGMENT_SLICE_SIZE);
|
||||
// make the start not OS page aligned for smaller blocks to avoid page/cache effects
|
||||
// note: the offset must always be an xblock_size multiple since we assume small allocations
|
||||
// note: the offset must always be a block_size multiple since we assume small allocations
|
||||
// are aligned (see `mi_heap_malloc_aligned`).
|
||||
size_t start_offset = 0;
|
||||
if (xblock_size >= MI_INTPTR_SIZE) {
|
||||
if (xblock_size <= 64) { start_offset = 3*xblock_size; }
|
||||
else if (xblock_size <= 512) { start_offset = xblock_size; }
|
||||
if (block_size > 0 && block_size <= MI_MAX_ALIGN_GUARANTEE) {
|
||||
// for small objects, ensure the page start is aligned with the block size (PR#66 by kickunderscore)
|
||||
const size_t adjust = block_size - ((uintptr_t)pstart % block_size);
|
||||
if (adjust < block_size && psize >= block_size + adjust) {
|
||||
start_offset += adjust;
|
||||
}
|
||||
}
|
||||
if (block_size >= MI_INTPTR_SIZE) {
|
||||
if (block_size <= 64) { start_offset += 3*block_size; }
|
||||
else if (block_size <= 512) { start_offset += block_size; }
|
||||
}
|
||||
if (page_size != NULL) { *page_size = psize - start_offset; }
|
||||
return (uint8_t*)segment + ((idx*MI_SEGMENT_SLICE_SIZE) + start_offset);
|
||||
return (pstart + start_offset);
|
||||
}
|
||||
|
||||
// Start of the page available memory; can be used on uninitialized pages
|
||||
|
|
|
@ -230,6 +230,28 @@ int main(void) {
|
|||
result = (((uintptr_t)p % 0x100) == 0); // #602
|
||||
mi_free(p);
|
||||
}
|
||||
CHECK_BODY("mimalloc-aligned13") {
|
||||
bool ok = true;
|
||||
for( size_t size = 1; size <= (MI_SMALL_SIZE_MAX * 2) && ok; size++ ) {
|
||||
for(size_t align = 1; align <= size && ok; align *= 2 ) {
|
||||
void* p[10];
|
||||
for(int i = 0; i < 10 && ok; i++) {
|
||||
p[i] = mi_malloc_aligned(size,align);;
|
||||
ok = (p[i] != NULL && ((uintptr_t)(p[i]) % align) == 0);
|
||||
}
|
||||
for(int i = 0; i < 10 && ok; i++) {
|
||||
mi_free(p[i]);
|
||||
}
|
||||
/*
|
||||
if (ok && align <= size && ((size + MI_PADDING_SIZE) & (align-1)) == 0) {
|
||||
size_t bsize = mi_good_size(size);
|
||||
ok = (align <= bsize && (bsize & (align-1)) == 0);
|
||||
}
|
||||
*/
|
||||
}
|
||||
}
|
||||
result = ok;
|
||||
}
|
||||
CHECK_BODY("malloc-aligned-at1") {
|
||||
void* p = mi_malloc_aligned_at(48,32,0); result = (p != NULL && ((uintptr_t)(p) + 0) % 32 == 0); mi_free(p);
|
||||
};
|
||||
|
|
Loading…
Add table
Reference in a new issue