Merge branch 'dev2'

This commit is contained in:
Daan 2025-03-28 14:20:20 -07:00
commit 13cd3f7f05
29 changed files with 256 additions and 102 deletions

View file

@ -434,7 +434,7 @@ endif()
if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU|Intel" AND NOT CMAKE_SYSTEM_NAME MATCHES "Haiku") if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU|Intel" AND NOT CMAKE_SYSTEM_NAME MATCHES "Haiku")
if(MI_OPT_ARCH) if(MI_OPT_ARCH)
if(APPLE AND CMAKE_C_COMPILER_ID STREQUAL "AppleClang" AND CMAKE_OSX_ARCHITECTURES) # to support multi-arch binaries (#999) if(APPLE AND CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang" AND CMAKE_OSX_ARCHITECTURES) # to support multi-arch binaries (#999)
if("arm64" IN_LIST CMAKE_OSX_ARCHITECTURES) if("arm64" IN_LIST CMAKE_OSX_ARCHITECTURES)
list(APPEND MI_OPT_ARCH_FLAGS "-Xarch_arm64;-march=armv8.1-a") list(APPEND MI_OPT_ARCH_FLAGS "-Xarch_arm64;-march=armv8.1-a")
endif() endif()
@ -532,7 +532,9 @@ if(MI_TRACK_ASAN)
endif() endif()
string(TOLOWER "${CMAKE_BUILD_TYPE}" CMAKE_BUILD_TYPE_LC) string(TOLOWER "${CMAKE_BUILD_TYPE}" CMAKE_BUILD_TYPE_LC)
list(APPEND mi_defines "MI_CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE_LC}") #todo: multi-config project needs $<CONFIG> ? list(APPEND mi_defines "MI_CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE_LC}") #todo: multi-config project needs $<CONFIG> ?
if(NOT(CMAKE_BUILD_TYPE_LC MATCHES "^(release|relwithdebinfo|minsizerel|none)$")) if(CMAKE_BUILD_TYPE_LC MATCHES "^(release|relwithdebinfo|minsizerel|none)$")
list(APPEND mi_defines MI_BUILD_RELEASE)
else()
set(mi_libname "${mi_libname}-${CMAKE_BUILD_TYPE_LC}") #append build type (e.g. -debug) if not a release version set(mi_libname "${mi_libname}-${CMAKE_BUILD_TYPE_LC}") #append build type (e.g. -debug) if not a release version
endif() endif()
@ -582,7 +584,7 @@ if(MI_BUILD_SHARED)
install(TARGETS mimalloc EXPORT mimalloc ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}) install(TARGETS mimalloc EXPORT mimalloc ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR})
install(EXPORT mimalloc DESTINATION ${mi_install_cmakedir}) install(EXPORT mimalloc DESTINATION ${mi_install_cmakedir})
if(WIN32) if(WIN32 AND NOT MINGW)
# On windows, the import library name for the dll would clash with the static mimalloc.lib library # On windows, the import library name for the dll would clash with the static mimalloc.lib library
# so we postfix the dll import library with `.dll.lib` (and also the .pdb debug file) # so we postfix the dll import library with `.dll.lib` (and also the .pdb debug file)
set_property(TARGET mimalloc PROPERTY ARCHIVE_OUTPUT_NAME "${mi_libname}.dll" ) set_property(TARGET mimalloc PROPERTY ARCHIVE_OUTPUT_NAME "${mi_libname}.dll" )
@ -592,6 +594,9 @@ if(MI_BUILD_SHARED)
# install(FILES "$<TARGET_FILE_DIR:mimalloc>/${mi_libname}.dll.pdb" DESTINATION ${CMAKE_INSTALL_LIBDIR}) # install(FILES "$<TARGET_FILE_DIR:mimalloc>/${mi_libname}.dll.pdb" DESTINATION ${CMAKE_INSTALL_LIBDIR})
endif() endif()
if(WIN32 AND MI_WIN_REDIRECT) if(WIN32 AND MI_WIN_REDIRECT)
if(MINGW)
set_property(TARGET mimalloc PROPERTY PREFIX "")
endif()
# On windows, link and copy the mimalloc redirection dll too. # On windows, link and copy the mimalloc redirection dll too.
if(CMAKE_GENERATOR_PLATFORM STREQUAL "arm64ec") if(CMAKE_GENERATOR_PLATFORM STREQUAL "arm64ec")
set(MIMALLOC_REDIRECT_SUFFIX "-arm64ec") set(MIMALLOC_REDIRECT_SUFFIX "-arm64ec")

View file

@ -7,9 +7,9 @@ trigger:
branches: branches:
include: include:
- master - master
- dev
- dev2
- dev3 - dev3
- dev2
- dev
tags: tags:
include: include:
- v* - v*
@ -34,6 +34,14 @@ jobs:
BuildType: secure BuildType: secure
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON
MSBuildConfiguration: Release MSBuildConfiguration: Release
Debug x86:
BuildType: debug
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON -A Win32
MSBuildConfiguration: Debug
Release x86:
BuildType: release
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -A Win32
MSBuildConfiguration: Release
steps: steps:
- task: CMake@1 - task: CMake@1
inputs: inputs:

BIN
bin/mimalloc-redirect-arm64.dll Normal file → Executable file

Binary file not shown.

BIN
bin/mimalloc-redirect-arm64.lib Normal file → Executable file

Binary file not shown.

BIN
bin/mimalloc-redirect-arm64ec.dll Normal file → Executable file

Binary file not shown.

BIN
bin/mimalloc-redirect-arm64ec.lib Normal file → Executable file

Binary file not shown.

BIN
bin/mimalloc-redirect.dll Normal file → Executable file

Binary file not shown.

BIN
bin/mimalloc-redirect.lib Normal file → Executable file

Binary file not shown.

BIN
bin/mimalloc-redirect32.dll Normal file → Executable file

Binary file not shown.

BIN
bin/mimalloc-redirect32.lib Normal file → Executable file

Binary file not shown.

View file

@ -1,6 +1,6 @@
set(mi_version_major 2) set(mi_version_major 2)
set(mi_version_minor 2) set(mi_version_minor 2)
set(mi_version_patch 2) set(mi_version_patch 3)
set(mi_version ${mi_version_major}.${mi_version_minor}) set(mi_version ${mi_version_major}.${mi_version_minor})
set(PACKAGE_VERSION ${mi_version}) set(PACKAGE_VERSION ${mi_version})

View file

@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file
#ifndef MIMALLOC_H #ifndef MIMALLOC_H
#define MIMALLOC_H #define MIMALLOC_H
#define MI_MALLOC_VERSION 222 // major + 2 digits minor #define MI_MALLOC_VERSION 223 // major + 2 digits minor
// ------------------------------------------------------ // ------------------------------------------------------
// Compiler specific attributes // Compiler specific attributes

View file

@ -266,6 +266,13 @@ static inline int64_t mi_atomic_addi64_relaxed(volatile _Atomic(int64_t)*p, int6
return current; return current;
#endif #endif
} }
static inline void mi_atomic_void_addi64_relaxed(volatile int64_t* p, const volatile int64_t* padd) {
const int64_t add = *padd;
if (add != 0) {
mi_atomic_addi64_relaxed((volatile _Atomic(int64_t)*)p, add);
}
}
static inline void mi_atomic_maxi64_relaxed(volatile _Atomic(int64_t)*p, int64_t x) { static inline void mi_atomic_maxi64_relaxed(volatile _Atomic(int64_t)*p, int64_t x) {
int64_t current; int64_t current;
do { do {
@ -363,8 +370,9 @@ static inline void mi_atomic_yield(void) {
_mm_pause(); _mm_pause();
} }
#elif (defined(__GNUC__) || defined(__clang__)) && \ #elif (defined(__GNUC__) || defined(__clang__)) && \
(defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__armel__) || defined(__ARMEL__) || \ (defined(__x86_64__) || defined(__i386__) || \
defined(__aarch64__) || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__)) || defined(__POWERPC__) defined(__aarch64__) || defined(__arm__) || \
defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) || defined(__POWERPC__))
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
static inline void mi_atomic_yield(void) { static inline void mi_atomic_yield(void) {
__asm__ volatile ("pause" ::: "memory"); __asm__ volatile ("pause" ::: "memory");
@ -373,10 +381,16 @@ static inline void mi_atomic_yield(void) {
static inline void mi_atomic_yield(void) { static inline void mi_atomic_yield(void) {
__asm__ volatile("wfe"); __asm__ volatile("wfe");
} }
#elif (defined(__arm__) && __ARM_ARCH__ >= 7) #elif defined(__arm__)
#if __ARM_ARCH >= 7
static inline void mi_atomic_yield(void) { static inline void mi_atomic_yield(void) {
__asm__ volatile("yield" ::: "memory"); __asm__ volatile("yield" ::: "memory");
} }
#else
static inline void mi_atomic_yield(void) {
__asm__ volatile ("nop" ::: "memory");
}
#endif
#elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) || defined(__POWERPC__) #elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) || defined(__POWERPC__)
#ifdef __APPLE__ #ifdef __APPLE__
static inline void mi_atomic_yield(void) { static inline void mi_atomic_yield(void) {
@ -387,10 +401,6 @@ static inline void mi_atomic_yield(void) {
__asm__ __volatile__ ("or 27,27,27" ::: "memory"); __asm__ __volatile__ ("or 27,27,27" ::: "memory");
} }
#endif #endif
#elif defined(__armel__) || defined(__ARMEL__)
static inline void mi_atomic_yield(void) {
__asm__ volatile ("nop" ::: "memory");
}
#endif #endif
#elif defined(__sun) #elif defined(__sun)
// Fallback for other archs // Fallback for other archs

View file

@ -127,6 +127,7 @@ bool _mi_os_has_virtual_reserve(void);
bool _mi_os_reset(void* addr, size_t size); bool _mi_os_reset(void* addr, size_t size);
bool _mi_os_commit(void* p, size_t size, bool* is_zero); bool _mi_os_commit(void* p, size_t size, bool* is_zero);
bool _mi_os_commit_ex(void* addr, size_t size, bool* is_zero, size_t stat_size);
bool _mi_os_decommit(void* addr, size_t size); bool _mi_os_decommit(void* addr, size_t size);
bool _mi_os_protect(void* addr, size_t size); bool _mi_os_protect(void* addr, size_t size);
bool _mi_os_unprotect(void* addr, size_t size); bool _mi_os_unprotect(void* addr, size_t size);
@ -1030,6 +1031,21 @@ static inline size_t mi_bsr(size_t x) {
return (x==0 ? MI_SIZE_BITS : MI_SIZE_BITS - 1 - mi_clz(x)); return (x==0 ? MI_SIZE_BITS : MI_SIZE_BITS - 1 - mi_clz(x));
} }
size_t _mi_popcount_generic(size_t x);
static inline size_t mi_popcount(size_t x) {
if (x<=1) return x;
if (x==SIZE_MAX) return MI_SIZE_BITS;
#if defined(__GNUC__)
#if (SIZE_MAX == ULONG_MAX)
return __builtin_popcountl(x);
#else
return __builtin_popcountll(x);
#endif
#else
return _mi_popcount_generic(x);
#endif
}
// --------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------
// Provide our own `_mi_memcpy` for potential performance optimizations. // Provide our own `_mi_memcpy` for potential performance optimizations.

View file

@ -67,10 +67,10 @@ terms of the MIT license. A copy of the license can be found in the file
// #define MI_DEBUG 2 // + internal assertion checks // #define MI_DEBUG 2 // + internal assertion checks
// #define MI_DEBUG 3 // + extensive internal invariant checking (cmake -DMI_DEBUG_FULL=ON) // #define MI_DEBUG 3 // + extensive internal invariant checking (cmake -DMI_DEBUG_FULL=ON)
#if !defined(MI_DEBUG) #if !defined(MI_DEBUG)
#if !defined(NDEBUG) || defined(_DEBUG) #if defined(MI_BUILD_RELEASE) || defined(NDEBUG)
#define MI_DEBUG 2
#else
#define MI_DEBUG 0 #define MI_DEBUG 0
#else
#define MI_DEBUG 2
#endif #endif
#endif #endif
@ -683,22 +683,25 @@ void _mi_assert_fail(const char* assertion, const char* fname, unsigned int line
// add to stat keeping track of the peak // add to stat keeping track of the peak
void _mi_stat_increase(mi_stat_count_t* stat, size_t amount); void _mi_stat_increase(mi_stat_count_t* stat, size_t amount);
void _mi_stat_decrease(mi_stat_count_t* stat, size_t amount); void _mi_stat_decrease(mi_stat_count_t* stat, size_t amount);
void _mi_stat_adjust_decrease(mi_stat_count_t* stat, size_t amount);
// counters can just be increased // counters can just be increased
void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount); void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount);
#if (MI_STAT) #if (MI_STAT)
#define mi_stat_increase(stat,amount) _mi_stat_increase( &(stat), amount) #define mi_stat_increase(stat,amount) _mi_stat_increase( &(stat), amount)
#define mi_stat_decrease(stat,amount) _mi_stat_decrease( &(stat), amount) #define mi_stat_decrease(stat,amount) _mi_stat_decrease( &(stat), amount)
#define mi_stat_adjust_decrease(stat,amount) _mi_stat_adjust_decrease( &(stat), amount)
#define mi_stat_counter_increase(stat,amount) _mi_stat_counter_increase( &(stat), amount) #define mi_stat_counter_increase(stat,amount) _mi_stat_counter_increase( &(stat), amount)
#else #else
#define mi_stat_increase(stat,amount) ((void)0) #define mi_stat_increase(stat,amount) ((void)0)
#define mi_stat_decrease(stat,amount) ((void)0) #define mi_stat_decrease(stat,amount) ((void)0)
#define mi_stat_adjust_decrease(stat,amount) ((void)0)
#define mi_stat_counter_increase(stat,amount) ((void)0) #define mi_stat_counter_increase(stat,amount) ((void)0)
#endif #endif
#define mi_heap_stat_counter_increase(heap,stat,amount) mi_stat_counter_increase( (heap)->tld->stats.stat, amount) #define mi_heap_stat_counter_increase(heap,stat,amount) mi_stat_counter_increase( (heap)->tld->stats.stat, amount)
#define mi_heap_stat_increase(heap,stat,amount) mi_stat_increase( (heap)->tld->stats.stat, amount) #define mi_heap_stat_increase(heap,stat,amount) mi_stat_increase( (heap)->tld->stats.stat, amount)
#define mi_heap_stat_decrease(heap,stat,amount) mi_stat_decrease( (heap)->tld->stats.stat, amount) #define mi_heap_stat_decrease(heap,stat,amount) mi_stat_decrease( (heap)->tld->stats.stat, amount)
#define mi_heap_stat_adjust_decrease(heap,stat,amount) mi_stat_adjust_decrease( (heap)->tld->stats.stat, amount)
#endif #endif

View file

@ -12,9 +12,9 @@ is a general purpose allocator with excellent [performance](#performance) charac
Initially developed by Daan Leijen for the runtime systems of the Initially developed by Daan Leijen for the runtime systems of the
[Koka](https://koka-lang.github.io) and [Lean](https://github.com/leanprover/lean) languages. [Koka](https://koka-lang.github.io) and [Lean](https://github.com/leanprover/lean) languages.
Latest release : `v3.0.2` (beta) (2025-03-06). Latest release : `v3.0.3` (beta) (2025-03-28).
Latest v2 release: `v2.2.2` (2025-03-06). Latest v2 release: `v2.2.3` (2025-03-28).
Latest v1 release: `v1.9.2` (2024-03-06). Latest v1 release: `v1.9.3` (2024-03-28).
mimalloc is a drop-in replacement for `malloc` and can be used in other programs mimalloc is a drop-in replacement for `malloc` and can be used in other programs
without code changes, for example, on dynamically linked ELF-based systems (Linux, BSD, etc.) you can use it as: without code changes, for example, on dynamically linked ELF-based systems (Linux, BSD, etc.) you can use it as:
@ -84,6 +84,9 @@ Enjoy!
### Releases ### Releases
* 2025-03-28, `v1.9.3`, `v2.2.3`, `v3.0.3-beta`: Various small bug and build fixes, including:
fix arm32 pre v7 builds, fix mingw build, get runtime statistics, improve statistic commit counts,
fix execution on non BMI1 x64 systems.
* 2025-03-06, `v1.9.2`, `v2.2.2`, `v3.0.2-beta`: Various small bug and build fixes. * 2025-03-06, `v1.9.2`, `v2.2.2`, `v3.0.2-beta`: Various small bug and build fixes.
Add `mi_options_print`, `mi_arenas_print`, and the experimental `mi_stat_get` and `mi_stat_get_json`. Add `mi_options_print`, `mi_arenas_print`, and the experimental `mi_stat_get` and `mi_stat_get_json`.
Add `mi_thread_set_in_threadpool` and `mi_heap_set_numa_affinity` (v3 only). Add vcpkg portfile. Add `mi_thread_set_in_threadpool` and `mi_heap_set_numa_affinity` (v3 only). Add vcpkg portfile.

View file

@ -191,9 +191,6 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t
const bool is_aligned = (((uintptr_t)page->free + offset) & align_mask)==0; const bool is_aligned = (((uintptr_t)page->free + offset) & align_mask)==0;
if mi_likely(is_aligned) if mi_likely(is_aligned)
{ {
#if MI_STAT>1
mi_heap_stat_increase(heap, malloc_requested, size);
#endif
void* p = (zero ? _mi_page_malloc_zeroed(heap,page,padsize) : _mi_page_malloc(heap,page,padsize)); // call specific page malloc for better codegen void* p = (zero ? _mi_page_malloc_zeroed(heap,page,padsize) : _mi_page_malloc(heap,page,padsize)); // call specific page malloc for better codegen
mi_assert_internal(p != NULL); mi_assert_internal(p != NULL);
mi_assert_internal(((uintptr_t)p + offset) % alignment == 0); mi_assert_internal(((uintptr_t)p + offset) % alignment == 0);

View file

@ -30,6 +30,7 @@ terms of the MIT license. A copy of the license can be found in the file
// Note: in release mode the (inlined) routine is about 7 instructions with a single test. // Note: in release mode the (inlined) routine is about 7 instructions with a single test.
extern inline void* _mi_page_malloc_zero(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept extern inline void* _mi_page_malloc_zero(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept
{ {
mi_assert_internal(size >= MI_PADDING_SIZE);
mi_assert_internal(page->block_size == 0 /* empty heap */ || mi_page_block_size(page) >= size); mi_assert_internal(page->block_size == 0 /* empty heap */ || mi_page_block_size(page) >= size);
// check the free list // check the free list
@ -88,6 +89,7 @@ extern inline void* _mi_page_malloc_zero(mi_heap_t* heap, mi_page_t* page, size_
#if (MI_STAT>1) #if (MI_STAT>1)
const size_t bin = _mi_bin(bsize); const size_t bin = _mi_bin(bsize);
mi_heap_stat_increase(heap, malloc_bins[bin], 1); mi_heap_stat_increase(heap, malloc_bins[bin], 1);
mi_heap_stat_increase(heap, malloc_requested, size - MI_PADDING_SIZE);
#endif #endif
} }
#endif #endif
@ -146,12 +148,6 @@ static inline mi_decl_restrict void* mi_heap_malloc_small_zero(mi_heap_t* heap,
void* const p = _mi_page_malloc_zero(heap, page, size + MI_PADDING_SIZE, zero); void* const p = _mi_page_malloc_zero(heap, page, size + MI_PADDING_SIZE, zero);
mi_track_malloc(p,size,zero); mi_track_malloc(p,size,zero);
#if MI_STAT>1
if (p != NULL) {
if (!mi_heap_is_initialized(heap)) { heap = mi_prim_get_default_heap(); }
mi_heap_stat_increase(heap, malloc_requested, mi_usable_size(p));
}
#endif
#if MI_DEBUG>3 #if MI_DEBUG>3
if (p != NULL && zero) { if (p != NULL && zero) {
mi_assert_expensive(mi_mem_is_zero(p, size)); mi_assert_expensive(mi_mem_is_zero(p, size));
@ -188,12 +184,6 @@ extern inline void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool z
void* const p = _mi_malloc_generic(heap, size + MI_PADDING_SIZE, zero, huge_alignment); // note: size can overflow but it is detected in malloc_generic void* const p = _mi_malloc_generic(heap, size + MI_PADDING_SIZE, zero, huge_alignment); // note: size can overflow but it is detected in malloc_generic
mi_track_malloc(p,size,zero); mi_track_malloc(p,size,zero);
#if MI_STAT>1
if (p != NULL) {
if (!mi_heap_is_initialized(heap)) { heap = mi_prim_get_default_heap(); }
mi_heap_stat_increase(heap, malloc_requested, mi_usable_size(p));
}
#endif
#if MI_DEBUG>3 #if MI_DEBUG>3
if (p != NULL && zero) { if (p != NULL && zero) {
mi_assert_expensive(mi_mem_is_zero(p, size)); mi_assert_expensive(mi_mem_is_zero(p, size));
@ -666,7 +656,8 @@ mi_decl_restrict void* _mi_heap_malloc_guarded(mi_heap_t* heap, size_t size, boo
if (p != NULL) { if (p != NULL) {
if (!mi_heap_is_initialized(heap)) { heap = mi_prim_get_default_heap(); } if (!mi_heap_is_initialized(heap)) { heap = mi_prim_get_default_heap(); }
#if MI_STAT>1 #if MI_STAT>1
mi_heap_stat_increase(heap, malloc_requested, mi_usable_size(p)); mi_heap_stat_adjust_decrease(heap, malloc_requested, req_size);
mi_heap_stat_increase(heap, malloc_requested, size);
#endif #endif
_mi_stat_counter_increase(&heap->tld->stats.malloc_guarded_count, 1); _mi_stat_counter_increase(&heap->tld->stats.malloc_guarded_count, 1);
} }

View file

@ -259,7 +259,7 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t ar
// set the dirty bits (todo: no need for an atomic op here?) // set the dirty bits (todo: no need for an atomic op here?)
if (arena->memid.initially_zero && arena->blocks_dirty != NULL) { if (arena->memid.initially_zero && arena->blocks_dirty != NULL) {
memid->initially_zero = _mi_bitmap_claim_across(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL); memid->initially_zero = _mi_bitmap_claim_across(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL, NULL);
} }
// set commit state // set commit state
@ -271,10 +271,14 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t ar
// commit requested, but the range may not be committed as a whole: ensure it is committed now // commit requested, but the range may not be committed as a whole: ensure it is committed now
memid->initially_committed = true; memid->initially_committed = true;
bool any_uncommitted; bool any_uncommitted;
_mi_bitmap_claim_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted); size_t already_committed = 0;
_mi_bitmap_claim_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted, &already_committed);
if (any_uncommitted) { if (any_uncommitted) {
mi_assert_internal(already_committed < needed_bcount);
const size_t commit_size = mi_arena_block_size(needed_bcount);
const size_t stat_commit_size = commit_size - mi_arena_block_size(already_committed);
bool commit_zero = false; bool commit_zero = false;
if (!_mi_os_commit(p, mi_arena_block_size(needed_bcount), &commit_zero)) { if (!_mi_os_commit_ex(p, commit_size, &commit_zero, stat_commit_size)) {
memid->initially_committed = false; memid->initially_committed = false;
} }
else { else {
@ -284,7 +288,14 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t ar
} }
else { else {
// no need to commit, but check if already fully committed // no need to commit, but check if already fully committed
memid->initially_committed = _mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index); size_t already_committed = 0;
memid->initially_committed = _mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &already_committed);
if (!memid->initially_committed && already_committed > 0) {
// partially committed: as it will be committed at some time, adjust the stats and pretend the range is fully uncommitted.
mi_assert_internal(already_committed < needed_bcount);
_mi_stat_decrease(&_mi_stats_main.committed, mi_arena_block_size(already_committed));
_mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index);
}
} }
return p; return p;
@ -468,17 +479,19 @@ static void mi_arena_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks)
const size_t size = mi_arena_block_size(blocks); const size_t size = mi_arena_block_size(blocks);
void* const p = mi_arena_block_start(arena, bitmap_idx); void* const p = mi_arena_block_start(arena, bitmap_idx);
bool needs_recommit; bool needs_recommit;
if (_mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx)) { size_t already_committed = 0;
if (_mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx, &already_committed)) {
// all blocks are committed, we can purge freely // all blocks are committed, we can purge freely
mi_assert_internal(already_committed == blocks);
needs_recommit = _mi_os_purge(p, size); needs_recommit = _mi_os_purge(p, size);
} }
else { else {
// some blocks are not committed -- this can happen when a partially committed block is freed // some blocks are not committed -- this can happen when a partially committed block is freed
// in `_mi_arena_free` and it is conservatively marked as uncommitted but still scheduled for a purge // in `_mi_arena_free` and it is conservatively marked as uncommitted but still scheduled for a purge
// we need to ensure we do not try to reset (as that may be invalid for uncommitted memory), // we need to ensure we do not try to reset (as that may be invalid for uncommitted memory).
// and also undo the decommit stats (as it was already adjusted) mi_assert_internal(already_committed < blocks);
mi_assert_internal(mi_option_is_enabled(mi_option_purge_decommits)); mi_assert_internal(mi_option_is_enabled(mi_option_purge_decommits));
needs_recommit = _mi_os_purge_ex(p, size, false /* allow reset? */, 0); needs_recommit = _mi_os_purge_ex(p, size, false /* allow reset? */, mi_arena_block_size(already_committed));
} }
// clear the purged blocks // clear the purged blocks
@ -512,7 +525,7 @@ static void mi_arena_schedule_purge(mi_arena_t* arena, size_t bitmap_idx, size_t
else { else {
// already an expiration was set // already an expiration was set
} }
_mi_bitmap_claim_across(arena->blocks_purge, arena->field_count, blocks, bitmap_idx, NULL); _mi_bitmap_claim_across(arena->blocks_purge, arena->field_count, blocks, bitmap_idx, NULL, NULL);
} }
} }
@ -652,15 +665,16 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi
if (p==NULL) return; if (p==NULL) return;
if (size==0) return; if (size==0) return;
const bool all_committed = (committed_size == size); const bool all_committed = (committed_size == size);
const size_t decommitted_size = (committed_size <= size ? size - committed_size : 0);
// need to set all memory to undefined as some parts may still be marked as no_access (like padding etc.) // need to set all memory to undefined as some parts may still be marked as no_access (like padding etc.)
mi_track_mem_undefined(p,size); mi_track_mem_undefined(p,size);
if (mi_memkind_is_os(memid.memkind)) { if (mi_memkind_is_os(memid.memkind)) {
// was a direct OS allocation, pass through // was a direct OS allocation, pass through
if (!all_committed && committed_size > 0) { if (!all_committed && decommitted_size > 0) {
// if partially committed, adjust the committed stats (as `_mi_os_free` will increase decommit by the full size) // if partially committed, adjust the committed stats (as `_mi_os_free` will decrease commit by the full size)
_mi_stat_decrease(&_mi_stats_main.committed, committed_size); _mi_stat_increase(&_mi_stats_main.committed, decommitted_size);
} }
_mi_os_free(p, size, memid); _mi_os_free(p, size, memid);
} }
@ -694,14 +708,14 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi
mi_assert_internal(arena->blocks_purge != NULL); mi_assert_internal(arena->blocks_purge != NULL);
if (!all_committed) { if (!all_committed) {
// mark the entire range as no longer committed (so we recommit the full range when re-using) // mark the entire range as no longer committed (so we will recommit the full range when re-using)
_mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx); _mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx);
mi_track_mem_noaccess(p,size); mi_track_mem_noaccess(p,size);
if (committed_size > 0) { //if (committed_size > 0) {
// if partially committed, adjust the committed stats (is it will be recommitted when re-using) // if partially committed, adjust the committed stats (is it will be recommitted when re-using)
// in the delayed purge, we now need to not count a decommit if the range is not marked as committed. // in the delayed purge, we do no longer decrease the commit if the range is not marked entirely as committed.
_mi_stat_decrease(&_mi_stats_main.committed, committed_size); _mi_stat_decrease(&_mi_stats_main.committed, committed_size);
} //}
// note: if not all committed, it may be that the purge will reset/decommit the entire range // note: if not all committed, it may be that the purge will reset/decommit the entire range
// that contains already decommitted parts. Since purge consistently uses reset or decommit that // that contains already decommitted parts. Since purge consistently uses reset or decommit that
// works (as we should never reset decommitted parts). // works (as we should never reset decommitted parts).

View file

@ -369,7 +369,7 @@ bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t
// Set `count` bits at `bitmap_idx` to 1 atomically // Set `count` bits at `bitmap_idx` to 1 atomically
// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit. // Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero) { bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero, size_t* already_set) {
size_t idx = mi_bitmap_index_field(bitmap_idx); size_t idx = mi_bitmap_index_field(bitmap_idx);
size_t pre_mask; size_t pre_mask;
size_t mid_mask; size_t mid_mask;
@ -377,28 +377,31 @@ bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t co
size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask); size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask);
bool all_zero = true; bool all_zero = true;
bool any_zero = false; bool any_zero = false;
size_t one_count = 0;
_Atomic(size_t)*field = &bitmap[idx]; _Atomic(size_t)*field = &bitmap[idx];
size_t prev = mi_atomic_or_acq_rel(field++, pre_mask); size_t prev = mi_atomic_or_acq_rel(field++, pre_mask);
if ((prev & pre_mask) != 0) all_zero = false; if ((prev & pre_mask) != 0) { all_zero = false; one_count += mi_popcount(prev & pre_mask); }
if ((prev & pre_mask) != pre_mask) any_zero = true; if ((prev & pre_mask) != pre_mask) any_zero = true;
while (mid_count-- > 0) { while (mid_count-- > 0) {
prev = mi_atomic_or_acq_rel(field++, mid_mask); prev = mi_atomic_or_acq_rel(field++, mid_mask);
if ((prev & mid_mask) != 0) all_zero = false; if ((prev & mid_mask) != 0) { all_zero = false; one_count += mi_popcount(prev & mid_mask); }
if ((prev & mid_mask) != mid_mask) any_zero = true; if ((prev & mid_mask) != mid_mask) any_zero = true;
} }
if (post_mask!=0) { if (post_mask!=0) {
prev = mi_atomic_or_acq_rel(field, post_mask); prev = mi_atomic_or_acq_rel(field, post_mask);
if ((prev & post_mask) != 0) all_zero = false; if ((prev & post_mask) != 0) { all_zero = false; one_count += mi_popcount(prev & post_mask); }
if ((prev & post_mask) != post_mask) any_zero = true; if ((prev & post_mask) != post_mask) any_zero = true;
} }
if (pany_zero != NULL) { *pany_zero = any_zero; } if (pany_zero != NULL) { *pany_zero = any_zero; }
if (already_set != NULL) { *already_set = one_count; };
mi_assert_internal(all_zero ? one_count == 0 : one_count <= count);
return all_zero; return all_zero;
} }
// Returns `true` if all `count` bits were 1. // Returns `true` if all `count` bits were 1.
// `any_ones` is `true` if there was at least one bit set to one. // `any_ones` is `true` if there was at least one bit set to one.
static bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_ones) { static bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_ones, size_t* already_set) {
size_t idx = mi_bitmap_index_field(bitmap_idx); size_t idx = mi_bitmap_index_field(bitmap_idx);
size_t pre_mask; size_t pre_mask;
size_t mid_mask; size_t mid_mask;
@ -406,30 +409,33 @@ static bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_field
size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask); size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask);
bool all_ones = true; bool all_ones = true;
bool any_ones = false; bool any_ones = false;
size_t one_count = 0;
mi_bitmap_field_t* field = &bitmap[idx]; mi_bitmap_field_t* field = &bitmap[idx];
size_t prev = mi_atomic_load_relaxed(field++); size_t prev = mi_atomic_load_relaxed(field++);
if ((prev & pre_mask) != pre_mask) all_ones = false; if ((prev & pre_mask) != pre_mask) all_ones = false;
if ((prev & pre_mask) != 0) any_ones = true; if ((prev & pre_mask) != 0) { any_ones = true; one_count += mi_popcount(prev & pre_mask); }
while (mid_count-- > 0) { while (mid_count-- > 0) {
prev = mi_atomic_load_relaxed(field++); prev = mi_atomic_load_relaxed(field++);
if ((prev & mid_mask) != mid_mask) all_ones = false; if ((prev & mid_mask) != mid_mask) all_ones = false;
if ((prev & mid_mask) != 0) any_ones = true; if ((prev & mid_mask) != 0) { any_ones = true; one_count += mi_popcount(prev & mid_mask); }
} }
if (post_mask!=0) { if (post_mask!=0) {
prev = mi_atomic_load_relaxed(field); prev = mi_atomic_load_relaxed(field);
if ((prev & post_mask) != post_mask) all_ones = false; if ((prev & post_mask) != post_mask) all_ones = false;
if ((prev & post_mask) != 0) any_ones = true; if ((prev & post_mask) != 0) { any_ones = true; one_count += mi_popcount(prev & post_mask); }
} }
if (pany_ones != NULL) { *pany_ones = any_ones; } if (pany_ones != NULL) { *pany_ones = any_ones; }
if (already_set != NULL) { *already_set = one_count; }
mi_assert_internal(all_ones ? one_count == count : one_count < count);
return all_ones; return all_ones;
} }
bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, size_t* already_set) {
return mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, NULL); return mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, NULL, already_set);
} }
bool _mi_bitmap_is_any_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { bool _mi_bitmap_is_any_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
bool any_ones; bool any_ones;
mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, &any_ones); mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, &any_ones, NULL);
return any_ones; return any_ones;
} }

View file

@ -111,9 +111,9 @@ bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t
// Set `count` bits at `bitmap_idx` to 1 atomically // Set `count` bits at `bitmap_idx` to 1 atomically
// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit. // Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero); bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero, size_t* already_set);
bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, size_t* already_set);
bool _mi_bitmap_is_any_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); bool _mi_bitmap_is_any_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
#endif #endif

View file

@ -522,15 +522,13 @@ static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) {
// only maintain stats for smaller objects if requested // only maintain stats for smaller objects if requested
#if (MI_STAT>0) #if (MI_STAT>0)
static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) { static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) {
#if (MI_STAT < 2)
MI_UNUSED(block); MI_UNUSED(block);
#endif
mi_heap_t* const heap = mi_heap_get_default(); mi_heap_t* const heap = mi_heap_get_default();
const size_t bsize = mi_page_usable_block_size(page); const size_t bsize = mi_page_usable_block_size(page);
#if (MI_STAT>1) // #if (MI_STAT>1)
const size_t usize = mi_page_usable_size_of(page, block); // const size_t usize = mi_page_usable_size_of(page, block);
mi_heap_stat_decrease(heap, malloc_requested, usize); // mi_heap_stat_decrease(heap, malloc_requested, usize);
#endif // #endif
if (bsize <= MI_MEDIUM_OBJ_SIZE_MAX) { if (bsize <= MI_MEDIUM_OBJ_SIZE_MAX) {
mi_heap_stat_decrease(heap, malloc_normal, bsize); mi_heap_stat_decrease(heap, malloc_normal, bsize);
#if (MI_STAT > 1) #if (MI_STAT > 1)

View file

@ -344,17 +344,17 @@ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_
mi_heap_stat_decrease(heap, malloc_huge, bsize); mi_heap_stat_decrease(heap, malloc_huge, bsize);
} }
} }
#if (MI_STAT) #if (MI_STAT>0)
_mi_page_free_collect(page, false); // update used count _mi_page_free_collect(page, false); // update used count
const size_t inuse = page->used; const size_t inuse = page->used;
if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { if (bsize <= MI_LARGE_OBJ_SIZE_MAX) {
mi_heap_stat_decrease(heap, malloc_normal, bsize * inuse); mi_heap_stat_decrease(heap, malloc_normal, bsize * inuse);
#if (MI_STAT>1) #if (MI_STAT>1)
mi_heap_stat_decrease(heap, malloc_bins[_mi_bin(bsize)], inuse); mi_heap_stat_decrease(heap, malloc_bins[_mi_bin(bsize)], inuse);
#endif #endif
} }
mi_heap_stat_decrease(heap, malloc_requested, bsize * inuse); // todo: off for aligned blocks... // mi_heap_stat_decrease(heap, malloc_requested, bsize * inuse); // todo: off for aligned blocks...
#endif #endif
/// pretend it is all free now /// pretend it is all free now
mi_assert_internal(mi_page_thread_free(page) == NULL); mi_assert_internal(mi_page_thread_free(page) == NULL);

View file

@ -108,7 +108,7 @@ const mi_page_t _mi_page_empty = {
// may lead to allocation itself on some platforms) // may lead to allocation itself on some platforms)
// -------------------------------------------------------- // --------------------------------------------------------
mi_decl_hidden mi_decl_cache_align const mi_heap_t _mi_heap_empty = { mi_decl_cache_align const mi_heap_t _mi_heap_empty = {
NULL, NULL,
MI_ATOMIC_VAR_INIT(NULL), MI_ATOMIC_VAR_INIT(NULL),
0, // tid 0, // tid

View file

@ -275,3 +275,60 @@ int _mi_snprintf(char* buf, size_t buflen, const char* fmt, ...) {
va_end(args); va_end(args);
return written; return written;
} }
#if MI_SIZE_SIZE == 4
#define mi_mask_even_bits32 (0x55555555)
#define mi_mask_even_pairs32 (0x33333333)
#define mi_mask_even_nibbles32 (0x0F0F0F0F)
// sum of all the bytes in `x` if it is guaranteed that the sum < 256!
static size_t mi_byte_sum32(uint32_t x) {
// perform `x * 0x01010101`: the highest byte contains the sum of all bytes.
x += (x << 8);
x += (x << 16);
return (size_t)(x >> 24);
}
static size_t mi_popcount_generic32(uint32_t x) {
// first count each 2-bit group `a`, where: a==0b00 -> 00, a==0b01 -> 01, a==0b10 -> 01, a==0b11 -> 10
// in other words, `a - (a>>1)`; to do this in parallel, we need to mask to prevent spilling a bit pair
// into the lower bit-pair:
x = x - ((x >> 1) & mi_mask_even_bits32);
// add the 2-bit pair results
x = (x & mi_mask_even_pairs32) + ((x >> 2) & mi_mask_even_pairs32);
// add the 4-bit nibble results
x = (x + (x >> 4)) & mi_mask_even_nibbles32;
// each byte now has a count of its bits, we can sum them now:
return mi_byte_sum32(x);
}
mi_decl_noinline size_t _mi_popcount_generic(size_t x) {
return mi_popcount_generic32(x);
}
#else
#define mi_mask_even_bits64 (0x5555555555555555)
#define mi_mask_even_pairs64 (0x3333333333333333)
#define mi_mask_even_nibbles64 (0x0F0F0F0F0F0F0F0F)
// sum of all the bytes in `x` if it is guaranteed that the sum < 256!
static size_t mi_byte_sum64(uint64_t x) {
x += (x << 8);
x += (x << 16);
x += (x << 32);
return (size_t)(x >> 56);
}
static size_t mi_popcount_generic64(uint64_t x) {
x = x - ((x >> 1) & mi_mask_even_bits64);
x = (x & mi_mask_even_pairs64) + ((x >> 2) & mi_mask_even_pairs64);
x = (x + (x >> 4)) & mi_mask_even_nibbles64;
return mi_byte_sum64(x);
}
mi_decl_noinline size_t _mi_popcount_generic(size_t x) {
return mi_popcount_generic64(x);
}
#endif

View file

@ -31,11 +31,12 @@ terms of the MIT license. A copy of the license can be found in the file
#if defined(__linux__) #if defined(__linux__)
#include <features.h> #include <features.h>
#include <linux/prctl.h> // PR_SET_VMA
//#if defined(MI_NO_THP) //#if defined(MI_NO_THP)
#include <sys/prctl.h> // THP disable #include <sys/prctl.h> // THP disable
//#endif //#endif
#if defined(__GLIBC__) #if defined(__GLIBC__)
#include <linux/mman.h> // linux mmap flags #include <linux/mman.h> // linux mmap flags
#else #else
#include <sys/mman.h> #include <sys/mman.h>
#endif #endif
@ -205,14 +206,24 @@ static int unix_madvise(void* addr, size_t size, int advice) {
return (res==0 ? 0 : errno); return (res==0 ? 0 : errno);
} }
static void* unix_mmap_prim(void* addr, size_t size, size_t try_alignment, int protect_flags, int flags, int fd) { static void* unix_mmap_prim(void* addr, size_t size, int protect_flags, int flags, int fd) {
void* p = mmap(addr, size, protect_flags, flags, fd, 0 /* offset */);
#if (defined(__linux__) && defined(PR_SET_VMA))
if (p!=MAP_FAILED && p!=NULL) {
prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, p, size, "mimalloc");
}
#endif
return p;
}
static void* unix_mmap_prim_aligned(void* addr, size_t size, size_t try_alignment, int protect_flags, int flags, int fd) {
MI_UNUSED(try_alignment); MI_UNUSED(try_alignment);
void* p = NULL; void* p = NULL;
#if defined(MAP_ALIGNED) // BSD #if defined(MAP_ALIGNED) // BSD
if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0) { if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0) {
size_t n = mi_bsr(try_alignment); size_t n = mi_bsr(try_alignment);
if (((size_t)1 << n) == try_alignment && n >= 12 && n <= 30) { // alignment is a power of 2 and 4096 <= alignment <= 1GiB if (((size_t)1 << n) == try_alignment && n >= 12 && n <= 30) { // alignment is a power of 2 and 4096 <= alignment <= 1GiB
p = mmap(addr, size, protect_flags, flags | MAP_ALIGNED(n), fd, 0); p = unix_mmap_prim(addr, size, protect_flags, flags | MAP_ALIGNED(n), fd);
if (p==MAP_FAILED || !_mi_is_aligned(p,try_alignment)) { if (p==MAP_FAILED || !_mi_is_aligned(p,try_alignment)) {
int err = errno; int err = errno;
_mi_trace_message("unable to directly request aligned OS memory (error: %d (0x%x), size: 0x%zx bytes, alignment: 0x%zx, hint address: %p)\n", err, err, size, try_alignment, addr); _mi_trace_message("unable to directly request aligned OS memory (error: %d (0x%x), size: 0x%zx bytes, alignment: 0x%zx, hint address: %p)\n", err, err, size, try_alignment, addr);
@ -223,7 +234,7 @@ static void* unix_mmap_prim(void* addr, size_t size, size_t try_alignment, int p
} }
#elif defined(MAP_ALIGN) // Solaris #elif defined(MAP_ALIGN) // Solaris
if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0) { if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0) {
p = mmap((void*)try_alignment, size, protect_flags, flags | MAP_ALIGN, fd, 0); // addr parameter is the required alignment p = unix_mmap_prim((void*)try_alignment, size, protect_flags, flags | MAP_ALIGN, fd); // addr parameter is the required alignment
if (p!=MAP_FAILED) return p; if (p!=MAP_FAILED) return p;
// fall back to regular mmap // fall back to regular mmap
} }
@ -233,7 +244,7 @@ static void* unix_mmap_prim(void* addr, size_t size, size_t try_alignment, int p
if (addr == NULL) { if (addr == NULL) {
void* hint = _mi_os_get_aligned_hint(try_alignment, size); void* hint = _mi_os_get_aligned_hint(try_alignment, size);
if (hint != NULL) { if (hint != NULL) {
p = mmap(hint, size, protect_flags, flags, fd, 0); p = unix_mmap_prim(hint, size, protect_flags, flags, fd);
if (p==MAP_FAILED || !_mi_is_aligned(p,try_alignment)) { if (p==MAP_FAILED || !_mi_is_aligned(p,try_alignment)) {
#if MI_TRACK_ENABLED // asan sometimes does not instrument errno correctly? #if MI_TRACK_ENABLED // asan sometimes does not instrument errno correctly?
int err = 0; int err = 0;
@ -248,7 +259,7 @@ static void* unix_mmap_prim(void* addr, size_t size, size_t try_alignment, int p
} }
#endif #endif
// regular mmap // regular mmap
p = mmap(addr, size, protect_flags, flags, fd, 0); p = unix_mmap_prim(addr, size, protect_flags, flags, fd);
if (p!=MAP_FAILED) return p; if (p!=MAP_FAILED) return p;
// failed to allocate // failed to allocate
return NULL; return NULL;
@ -319,7 +330,7 @@ static void* unix_mmap(void* addr, size_t size, size_t try_alignment, int protec
if (large_only || lflags != flags) { if (large_only || lflags != flags) {
// try large OS page allocation // try large OS page allocation
*is_large = true; *is_large = true;
p = unix_mmap_prim(addr, size, try_alignment, protect_flags, lflags, lfd); p = unix_mmap_prim_aligned(addr, size, try_alignment, protect_flags, lflags, lfd);
#ifdef MAP_HUGE_1GB #ifdef MAP_HUGE_1GB
if (p == NULL && (lflags & MAP_HUGE_1GB) == MAP_HUGE_1GB) { if (p == NULL && (lflags & MAP_HUGE_1GB) == MAP_HUGE_1GB) {
mi_huge_pages_available = false; // don't try huge 1GiB pages again mi_huge_pages_available = false; // don't try huge 1GiB pages again
@ -327,7 +338,7 @@ static void* unix_mmap(void* addr, size_t size, size_t try_alignment, int protec
_mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (errno: %i)\n", errno); _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (errno: %i)\n", errno);
} }
lflags = ((lflags & ~MAP_HUGE_1GB) | MAP_HUGE_2MB); lflags = ((lflags & ~MAP_HUGE_1GB) | MAP_HUGE_2MB);
p = unix_mmap_prim(addr, size, try_alignment, protect_flags, lflags, lfd); p = unix_mmap_prim_aligned(addr, size, try_alignment, protect_flags, lflags, lfd);
} }
#endif #endif
if (large_only) return p; if (large_only) return p;
@ -340,7 +351,7 @@ static void* unix_mmap(void* addr, size_t size, size_t try_alignment, int protec
// regular allocation // regular allocation
if (p == NULL) { if (p == NULL) {
*is_large = false; *is_large = false;
p = unix_mmap_prim(addr, size, try_alignment, protect_flags, flags, fd); p = unix_mmap_prim_aligned(addr, size, try_alignment, protect_flags, flags, fd);
if (p != NULL) { if (p != NULL) {
#if defined(MADV_HUGEPAGE) #if defined(MADV_HUGEPAGE)
// Many Linux systems don't allow MAP_HUGETLB but they support instead // Many Linux systems don't allow MAP_HUGETLB but they support instead

View file

@ -143,13 +143,17 @@ void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* ctx_new) {
uintptr_t _mi_random_next(mi_random_ctx_t* ctx) { uintptr_t _mi_random_next(mi_random_ctx_t* ctx) {
mi_assert_internal(mi_random_is_initialized(ctx)); mi_assert_internal(mi_random_is_initialized(ctx));
#if MI_INTPTR_SIZE <= 4 uintptr_t r;
return chacha_next32(ctx); do {
#elif MI_INTPTR_SIZE == 8 #if MI_INTPTR_SIZE <= 4
return (((uintptr_t)chacha_next32(ctx) << 32) | chacha_next32(ctx)); r = chacha_next32(ctx);
#else #elif MI_INTPTR_SIZE == 8
# error "define mi_random_next for this platform" r = (((uintptr_t)chacha_next32(ctx) << 32) | chacha_next32(ctx));
#endif #else
# error "define mi_random_next for this platform"
#endif
} while (r==0);
return r;
} }
@ -163,7 +167,7 @@ uintptr_t _mi_os_random_weak(uintptr_t extra_seed) {
x ^= _mi_prim_clock_now(); x ^= _mi_prim_clock_now();
// and do a few randomization steps // and do a few randomization steps
uintptr_t max = ((x ^ (x >> 17)) & 0x0F) + 1; uintptr_t max = ((x ^ (x >> 17)) & 0x0F) + 1;
for (uintptr_t i = 0; i < max; i++) { for (uintptr_t i = 0; i < max || x==0; i++, x++) {
x = _mi_random_shuffle(x); x = _mi_random_shuffle(x);
} }
mi_assert_internal(x != 0); mi_assert_internal(x != 0);
@ -179,7 +183,7 @@ static void mi_random_init_ex(mi_random_ctx_t* ctx, bool use_weak) {
if (!use_weak) { _mi_warning_message("unable to use secure randomness\n"); } if (!use_weak) { _mi_warning_message("unable to use secure randomness\n"); }
#endif #endif
uintptr_t x = _mi_os_random_weak(0); uintptr_t x = _mi_os_random_weak(0);
for (size_t i = 0; i < 8; i++) { // key is eight 32-bit words. for (size_t i = 0; i < 8; i++, x++) { // key is eight 32-bit words.
x = _mi_random_shuffle(x); x = _mi_random_shuffle(x);
((uint32_t*)key)[i] = (uint32_t)x; ((uint32_t*)key)[i] = (uint32_t)x;
} }

View file

@ -30,6 +30,7 @@ static void mi_stat_update(mi_stat_count_t* stat, int64_t amount) {
{ {
// add atomically (for abandoned pages) // add atomically (for abandoned pages)
int64_t current = mi_atomic_addi64_relaxed(&stat->current, amount); int64_t current = mi_atomic_addi64_relaxed(&stat->current, amount);
// if (stat == &_mi_stats_main.committed) { mi_assert_internal(current + amount >= 0); };
mi_atomic_maxi64_relaxed(&stat->peak, current + amount); mi_atomic_maxi64_relaxed(&stat->peak, current + amount);
if (amount > 0) { if (amount > 0) {
mi_atomic_addi64_relaxed(&stat->total,amount); mi_atomic_addi64_relaxed(&stat->total,amount);
@ -61,6 +62,25 @@ void _mi_stat_decrease(mi_stat_count_t* stat, size_t amount) {
} }
static void mi_stat_adjust(mi_stat_count_t* stat, int64_t amount) {
if (amount == 0) return;
if mi_unlikely(mi_is_in_main(stat))
{
// adjust atomically
mi_atomic_addi64_relaxed(&stat->current, amount);
mi_atomic_addi64_relaxed(&stat->total,amount);
}
else {
// adjust local
stat->current += amount;
stat->total += amount;
}
}
void _mi_stat_adjust_decrease(mi_stat_count_t* stat, size_t amount) {
mi_stat_adjust(stat, -((int64_t)amount));
}
// must be thread safe as it is called from stats_merge // must be thread safe as it is called from stats_merge
static void mi_stat_count_add_mt(mi_stat_count_t* stat, const mi_stat_count_t* src) { static void mi_stat_count_add_mt(mi_stat_count_t* stat, const mi_stat_count_t* src) {
@ -198,6 +218,13 @@ static void mi_stat_peak_print(const mi_stat_count_t* stat, const char* msg, int
_mi_fprintf(out, arg, "\n"); _mi_fprintf(out, arg, "\n");
} }
static void mi_stat_total_print(const mi_stat_count_t* stat, const char* msg, int64_t unit, mi_output_fun* out, void* arg) {
_mi_fprintf(out, arg, "%10s:", msg);
_mi_fprintf(out, arg, "%12s", " "); // no peak
mi_print_amount(stat->total, unit, out, arg);
_mi_fprintf(out, arg, "\n");
}
static void mi_stat_counter_print(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out, void* arg ) { static void mi_stat_counter_print(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out, void* arg ) {
_mi_fprintf(out, arg, "%10s:", msg); _mi_fprintf(out, arg, "%10s:", msg);
mi_print_amount(stat->total, -1, out, arg); mi_print_amount(stat->total, -1, out, arg);
@ -214,7 +241,7 @@ static void mi_stat_counter_print_avg(const mi_stat_counter_t* stat, const char*
static void mi_print_header(mi_output_fun* out, void* arg ) { static void mi_print_header(mi_output_fun* out, void* arg ) {
_mi_fprintf(out, arg, "%10s: %11s %11s %11s %11s %11s\n", "heap stats", "peak ", "total ", "current ", "unit ", "total# "); _mi_fprintf(out, arg, "%10s: %11s %11s %11s %11s %11s\n", "heap stats", "peak ", "total ", "current ", "block ", "total# ");
} }
#if MI_STAT>1 #if MI_STAT>1
@ -283,10 +310,10 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0)
// and print using that // and print using that
mi_print_header(out,arg); mi_print_header(out,arg);
#if MI_STAT>1 #if MI_STAT>1
mi_stats_print_bins(stats->malloc_bins, MI_BIN_HUGE, "normal",out,arg); mi_stats_print_bins(stats->malloc_bins, MI_BIN_HUGE, "bin",out,arg);
#endif #endif
#if MI_STAT #if MI_STAT
mi_stat_print(&stats->malloc_normal, "normal", (stats->malloc_normal_count.total == 0 ? 1 : -1), out, arg); mi_stat_print(&stats->malloc_normal, "binned", (stats->malloc_normal_count.total == 0 ? 1 : -1), out, arg);
// mi_stat_print(&stats->malloc_large, "large", (stats->malloc_large_count.total == 0 ? 1 : -1), out, arg); // mi_stat_print(&stats->malloc_large, "large", (stats->malloc_large_count.total == 0 ? 1 : -1), out, arg);
mi_stat_print(&stats->malloc_huge, "huge", (stats->malloc_huge_count.total == 0 ? 1 : -1), out, arg); mi_stat_print(&stats->malloc_huge, "huge", (stats->malloc_huge_count.total == 0 ? 1 : -1), out, arg);
mi_stat_count_t total = { 0,0,0 }; mi_stat_count_t total = { 0,0,0 };
@ -296,7 +323,7 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0)
mi_stat_print_ex(&total, "total", 1, out, arg, ""); mi_stat_print_ex(&total, "total", 1, out, arg, "");
#endif #endif
#if MI_STAT>1 #if MI_STAT>1
mi_stat_print_ex(&stats->malloc_requested, "malloc req", 1, out, arg, ""); mi_stat_total_print(&stats->malloc_requested, "malloc req", 1, out, arg);
_mi_fprintf(out, arg, "\n"); _mi_fprintf(out, arg, "\n");
#endif #endif
mi_stat_print_ex(&stats->reserved, "reserved", 1, out, arg, ""); mi_stat_print_ex(&stats->reserved, "reserved", 1, out, arg, "");

View file

@ -203,7 +203,11 @@ int main(void) {
CHECK_BODY("malloc-aligned9") { // test large alignments CHECK_BODY("malloc-aligned9") { // test large alignments
bool ok = true; bool ok = true;
void* p[8]; void* p[8];
size_t sizes[8] = { 8, 512, 1024 * 1024, MI_BLOCK_ALIGNMENT_MAX, MI_BLOCK_ALIGNMENT_MAX + 1, 2 * MI_BLOCK_ALIGNMENT_MAX, 8 * MI_BLOCK_ALIGNMENT_MAX, 0 }; size_t sizes[8] = { 8, 512, 1024 * 1024, MI_BLOCK_ALIGNMENT_MAX, MI_BLOCK_ALIGNMENT_MAX + 1,
#if SIZE_MAX > UINT32_MAX
2 * MI_BLOCK_ALIGNMENT_MAX, 8 * MI_BLOCK_ALIGNMENT_MAX,
#endif
0 };
for (int i = 0; i < 28 && ok; i++) { for (int i = 0; i < 28 && ok; i++) {
int align = (1 << i); int align = (1 << i);
for (int j = 0; j < 8 && ok; j++) { for (int j = 0; j < 8 && ok; j++) {