merge from dev

This commit is contained in:
Daan 2024-05-11 09:40:54 -07:00
commit 8b8e689b91
41 changed files with 2107 additions and 1326 deletions

1
.gitattributes vendored
View file

@ -10,3 +10,4 @@
*.dll binary
*.lib binary
*.exe binary
bin export-ignore

View file

@ -19,6 +19,7 @@ option(MI_OSX_INTERPOSE "Use interpose to override standard malloc on macOS"
option(MI_OSX_ZONE "Use malloc zone to override standard malloc on macOS" ON)
option(MI_WIN_REDIRECT "Use redirection module ('mimalloc-redirect') on Windows if compiling mimalloc as a DLL" ON)
option(MI_LOCAL_DYNAMIC_TLS "Use slightly slower, dlopen-compatible TLS mechanism (Unix)" OFF)
option(MI_LIBC_MUSL "Set this when linking with musl libc" OFF)
option(MI_BUILD_SHARED "Build shared library" ON)
option(MI_BUILD_STATIC "Build static library" ON)
option(MI_BUILD_OBJECT "Build object library" ON)
@ -28,6 +29,7 @@ option(MI_DEBUG_UBSAN "Build with undefined-behavior sanitizer (needs clan
option(MI_SKIP_COLLECT_ON_EXIT "Skip collecting memory on program exit" OFF)
option(MI_NO_PADDING "Force no use of padding even in DEBUG mode etc." OFF)
option(MI_INSTALL_TOPLEVEL "Install directly into $CMAKE_INSTALL_PREFIX instead of PREFIX/lib/mimalloc-version" OFF)
option(MI_NO_THP "Disable transparent huge pages support on Linux/Android for the mimalloc process only" OFF)
# deprecated options
option(MI_CHECK_FULL "Use full internal invariant checking in DEBUG mode (deprecated, use MI_DEBUG_FULL instead)" OFF)
@ -56,6 +58,9 @@ set(mi_sources
src/prim/prim.c)
set(mi_cflags "")
set(mi_cflags_static "") # extra flags for a static library build
set(mi_cflags_dynamic "") # extra flags for a shared-object library build
set(mi_defines "")
set(mi_libraries "")
# -----------------------------------------------------------------------------
@ -83,6 +88,17 @@ endif()
# Process options
# -----------------------------------------------------------------------------
# put -Wall early so other warnings can be disabled selectively
if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang")
list(APPEND mi_cflags -Wall -Wextra -Wpedantic)
endif()
if(CMAKE_C_COMPILER_ID MATCHES "GNU")
list(APPEND mi_cflags -Wall -Wextra)
endif()
if(CMAKE_C_COMPILER_ID MATCHES "Intel")
list(APPEND mi_cflags -Wall)
endif()
if(CMAKE_C_COMPILER_ID MATCHES "MSVC|Intel")
set(MI_USE_CXX "ON")
endif()
@ -128,7 +144,7 @@ endif()
if(MI_SECURE)
message(STATUS "Set full secure build (MI_SECURE=ON)")
list(APPEND mi_defines MI_SECURE=4)
list(APPEND mi_defines MI_SECURE=4)
endif()
if(MI_TRACK_VALGRIND)
@ -185,6 +201,10 @@ endif()
if(MI_SEE_ASM)
message(STATUS "Generate assembly listings (MI_SEE_ASM=ON)")
list(APPEND mi_cflags -save-temps)
if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang")
message(STATUS "No GNU Line marker")
list(APPEND mi_cflags -Wno-gnu-line-marker)
endif()
endif()
if(MI_CHECK_FULL)
@ -247,7 +267,7 @@ if(MI_DEBUG_UBSAN)
message(WARNING "Can only use undefined-behavior sanitizer with clang++ (MI_DEBUG_UBSAN=ON but ignored)")
endif()
else()
message(WARNING "Can only use thread sanitizer with a debug build (CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE})")
message(WARNING "Can only use undefined-behavior sanitizer with a debug build (CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE})")
endif()
endif()
@ -263,31 +283,51 @@ if(MI_USE_CXX)
endif()
endif()
if(CMAKE_SYSTEM_NAME MATCHES "Haiku")
SET(CMAKE_INSTALL_LIBDIR ~/config/non-packaged/lib)
SET(CMAKE_INSTALL_INCLUDEDIR ~/config/non-packaged/headers)
endif()
if(CMAKE_SYSTEM_NAME MATCHES "Linux|Android")
if(MI_NO_THP)
message(STATUS "Disable transparent huge pages support (MI_NO_THP=ON)")
list(APPEND mi_defines MI_NO_THP=1)
endif()
endif()
if(MI_LIBC_MUSL)
message(STATUS "Assume using musl libc (MI_LIBC_MUSL=ON)")
list(APPEND mi_defines MI_LIBC_MUSL=1)
endif()
# On Haiku use `-DCMAKE_INSTALL_PREFIX` instead, issue #788
# if(CMAKE_SYSTEM_NAME MATCHES "Haiku")
# SET(CMAKE_INSTALL_LIBDIR ~/config/non-packaged/lib)
# SET(CMAKE_INSTALL_INCLUDEDIR ~/config/non-packaged/headers)
# endif()
# Compiler flags
if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU")
list(APPEND mi_cflags -Wall -Wextra -Wno-unknown-pragmas -fvisibility=hidden)
list(APPEND mi_cflags -Wno-unknown-pragmas -fvisibility=hidden)
if(NOT MI_USE_CXX)
list(APPEND mi_cflags -Wstrict-prototypes)
endif()
if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang")
list(APPEND mi_cflags -Wpedantic -Wno-static-in-inline)
list(APPEND mi_cflags -Wno-static-in-inline)
endif()
endif()
if(CMAKE_C_COMPILER_ID MATCHES "Intel")
list(APPEND mi_cflags -Wall -fvisibility=hidden)
list(APPEND mi_cflags -fvisibility=hidden)
endif()
if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU|Intel" AND NOT CMAKE_SYSTEM_NAME MATCHES "Haiku")
if(MI_LOCAL_DYNAMIC_TLS)
list(APPEND mi_cflags -ftls-model=local-dynamic)
else()
list(APPEND mi_cflags -ftls-model=initial-exec)
if(MI_LIBC_MUSL)
# with musl we use local-dynamic for the static build, see issue #644
list(APPEND mi_cflags_static -ftls-model=local-dynamic)
list(APPEND mi_cflags_dynamic -ftls-model=initial-exec)
message(STATUS "Use local dynamic TLS for the static build (since MI_LIBC_MUSL=ON)")
else()
list(APPEND mi_cflags -ftls-model=initial-exec)
endif()
endif()
if(MI_OVERRIDE)
list(APPEND mi_cflags -fno-builtin-malloc)
@ -395,7 +435,7 @@ if(MI_BUILD_SHARED)
add_library(mimalloc SHARED ${mi_sources})
set_target_properties(mimalloc PROPERTIES VERSION ${mi_version} SOVERSION ${mi_version_major} OUTPUT_NAME ${mi_basename} )
target_compile_definitions(mimalloc PRIVATE ${mi_defines} MI_SHARED_LIB MI_SHARED_LIB_EXPORT)
target_compile_options(mimalloc PRIVATE ${mi_cflags})
target_compile_options(mimalloc PRIVATE ${mi_cflags} ${mi_cflags_dynamic})
target_link_libraries(mimalloc PRIVATE ${mi_libraries})
target_include_directories(mimalloc PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
@ -425,7 +465,7 @@ if (MI_BUILD_STATIC)
add_library(mimalloc-static STATIC ${mi_sources})
set_property(TARGET mimalloc-static PROPERTY POSITION_INDEPENDENT_CODE ON)
target_compile_definitions(mimalloc-static PRIVATE ${mi_defines} MI_STATIC_LIB)
target_compile_options(mimalloc-static PRIVATE ${mi_cflags})
target_compile_options(mimalloc-static PRIVATE ${mi_cflags} ${mi_cflags_static})
target_link_libraries(mimalloc-static PRIVATE ${mi_libraries})
target_include_directories(mimalloc-static PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
@ -457,7 +497,7 @@ if (MI_BUILD_OBJECT)
add_library(mimalloc-obj OBJECT src/static.c)
set_property(TARGET mimalloc-obj PROPERTY POSITION_INDEPENDENT_CODE ON)
target_compile_definitions(mimalloc-obj PRIVATE ${mi_defines})
target_compile_options(mimalloc-obj PRIVATE ${mi_cflags})
target_compile_options(mimalloc-obj PRIVATE ${mi_cflags} ${mi_cflags_static})
target_include_directories(mimalloc-obj PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
$<INSTALL_INTERFACE:${mi_install_incdir}>
@ -468,7 +508,7 @@ if (MI_BUILD_OBJECT)
set(mimalloc-obj-static "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/mimalloc-obj.dir/src/static.c${CMAKE_C_OUTPUT_EXTENSION}")
set(mimalloc-obj-out "${CMAKE_CURRENT_BINARY_DIR}/${mi_basename}${CMAKE_C_OUTPUT_EXTENSION}")
add_custom_command(OUTPUT ${mimalloc-obj-out} DEPENDS mimalloc-obj COMMAND "${CMAKE_COMMAND}" -E copy "${mimalloc-obj-static}" "${mimalloc-obj-out}")
add_custom_target(mimalloc-obj-target ALL DEPENDS ${mimalloc-obj-out})
add_custom_target(mimalloc-obj-target ALL DEPENDS ${mimalloc-obj-out})
endif()
# the following seems to lead to cmake warnings/errors on some systems, disable for now :-(

41
SECURITY.md Normal file
View file

@ -0,0 +1,41 @@
<!-- BEGIN MICROSOFT SECURITY.MD V0.0.9 BLOCK -->
## Security
Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet) and [Xamarin](https://github.com/xamarin).
If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/security.md/definition), please report it to us as described below.
## Reporting Security Issues
**Please do not report security vulnerabilities through public GitHub issues.**
Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/security.md/msrc/create-report).
If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/security.md/msrc/pgp).
You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc).
Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
* Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
* Full paths of source file(s) related to the manifestation of the issue
* The location of the affected source code (tag/branch/commit or direct URL)
* Any special configuration required to reproduce the issue
* Step-by-step instructions to reproduce the issue
* Proof-of-concept or exploit code (if possible)
* Impact of the issue, including how an attacker might exploit the issue
This information will help us triage your report more quickly.
If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/security.md/msrc/bounty) page for more details about our active programs.
## Preferred Languages
We prefer all communications to be in English.
## Policy
Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/security.md/cvd).
<!-- END MICROSOFT SECURITY.MD BLOCK -->

View file

@ -1,6 +1,6 @@
set(mi_version_major 1)
set(mi_version_minor 8)
set(mi_version_patch 2)
set(mi_version_patch 5)
set(mi_version ${mi_version_major}.${mi_version_minor})
set(PACKAGE_VERSION ${mi_version})

View file

@ -466,7 +466,7 @@ LOOKUP_CACHE_SIZE = 0
# than 0 to get more control over the balance between CPU load and processing
# speed. At this moment only the input processing can be done using multiple
# threads. Since this is still an experimental feature the default is set to 1,
# which efficively disables parallel processing. Please report any issues you
# which effectively disables parallel processing. Please report any issues you
# encounter. Generating dot graphs in parallel is controlled by the
# DOT_NUM_THREADS setting.
# Minimum value: 0, maximum value: 32, default value: 1.

View file

@ -168,7 +168,7 @@ void* mi_expand(void* p, size_t newsize);
/// @returns A pointer to a block of \a count * \a size bytes, or \a NULL
/// if out of memory or if \a count * \a size overflows.
///
/// If there is no overflow, it behaves exactly like `mi_malloc(p,count*size)`.
/// If there is no overflow, it behaves exactly like `mi_malloc(count*size)`.
/// @see mi_calloc()
/// @see mi_zallocn()
void* mi_mallocn(size_t count, size_t size);
@ -441,7 +441,7 @@ bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_la
/// @param pages The number of 1GiB pages to reserve.
/// @param numa_nodes The number of nodes do evenly divide the pages over, or 0 for using the actual number of NUMA nodes.
/// @param timeout_msecs Maximum number of milli-seconds to try reserving, or 0 for no timeout.
/// @returns 0 if successfull, \a ENOMEM if running out of memory, or \a ETIMEDOUT if timed out.
/// @returns 0 if successful, \a ENOMEM if running out of memory, or \a ETIMEDOUT if timed out.
///
/// The reserved memory is used by mimalloc to satisfy allocations.
/// May quit before \a timeout_msecs are expired if it estimates it will take more than
@ -455,7 +455,7 @@ int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t
/// @param pages The number of 1GiB pages to reserve.
/// @param numa_node The NUMA node where the memory is reserved (start at 0).
/// @param timeout_msecs Maximum number of milli-seconds to try reserving, or 0 for no timeout.
/// @returns 0 if successfull, \a ENOMEM if running out of memory, or \a ETIMEDOUT if timed out.
/// @returns 0 if successful, \a ENOMEM if running out of memory, or \a ETIMEDOUT if timed out.
///
/// The reserved memory is used by mimalloc to satisfy allocations.
/// May quit before \a timeout_msecs are expired if it estimates it will take more than
@ -468,7 +468,7 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec
/// Is the C runtime \a malloc API redirected?
/// @returns \a true if all malloc API calls are redirected to mimalloc.
///
/// Currenty only used on Windows.
/// Currently only used on Windows.
bool mi_is_redirected();
/// Return process information (time and memory usage).
@ -499,11 +499,11 @@ void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_m
/// \{
/// The maximum supported alignment size (currently 1MiB).
#define MI_ALIGNMENT_MAX (1024*1024UL)
#define MI_BLOCK_ALIGNMENT_MAX (1024*1024UL)
/// Allocate \a size bytes aligned by \a alignment.
/// @param size number of bytes to allocate.
/// @param alignment the minimal alignment of the allocated memory. Must be less than #MI_ALIGNMENT_MAX.
/// @param alignment the minimal alignment of the allocated memory. Must be less than #MI_BLOCK_ALIGNMENT_MAX.
/// @returns pointer to the allocated memory or \a NULL if out of memory.
/// The returned pointer is aligned by \a alignment, i.e.
/// `(uintptr_t)p % alignment == 0`.
@ -558,7 +558,7 @@ mi_heap_t* mi_heap_new();
/// Delete a previously allocated heap.
/// This will release resources and migrate any
/// still allocated blocks in this heap (efficienty)
/// still allocated blocks in this heap (efficiently)
/// to the default heap.
///
/// If \a heap is the default heap, the default
@ -888,7 +888,7 @@ void mi_free_aligned(void* p, size_t alignment);
///
/// Note: use the `mimalloc-new-delete.h` header to override the \a new
/// and \a delete operators globally. The wrappers here are mostly
/// for convience for library writers that need to interface with
/// for convenience for library writers that need to interface with
/// mimalloc from C++.
///
/// \{

View file

@ -100,7 +100,7 @@ $(document).ready(function(){initNavTree('bench.html',''); initResizable(); });
<div class="contents">
<div class="textblock"><p>We tested <em>mimalloc</em> against many other top allocators over a wide range of benchmarks, ranging from various real world programs to synthetic benchmarks that see how the allocator behaves under more extreme circumstances.</p>
<p>In our benchmarks, <em>mimalloc</em> always outperforms all other leading allocators (<em>jemalloc</em>, <em>tcmalloc</em>, <em>Hoard</em>, etc) (Jan 2021), and usually uses less memory (up to 25% more in the worst case). A nice property is that it does <em>consistently</em> well over the wide range of benchmarks.</p>
<p>See the <a href="https://github.com/microsoft/mimalloc#Performance">Performance</a> section in the <em>mimalloc</em> repository for benchmark results, or the the technical report for detailed benchmark results. </p>
<p>See the <a href="https://github.com/microsoft/mimalloc#Performance">Performance</a> section in the <em>mimalloc</em> repository for benchmark results, or the technical report for detailed benchmark results. </p>
</div></div><!-- contents -->
</div><!-- PageDoc -->
</div><!-- doc-content -->

View file

@ -217,6 +217,12 @@
<ClCompile Include="..\..\src\bitmap.c">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="..\..\src\free.c">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="..\..\src\heap.c" />
<ClCompile Include="..\..\src\init.c" />
<ClCompile Include="..\..\src\libc.c" />

View file

@ -58,6 +58,9 @@
<ClCompile Include="..\..\src\libc.c">
<Filter>Sources</Filter>
</ClCompile>
<ClCompile Include="..\..\src\free.c">
<Filter>Sources</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\..\src\bitmap.h">

View file

@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file
#ifndef MIMALLOC_H
#define MIMALLOC_H
#define MI_MALLOC_VERSION 182 // major + 2 digits minor
#define MI_MALLOC_VERSION 185 // major + 2 digits minor
// ------------------------------------------------------
// Compiler specific attributes
@ -300,6 +300,8 @@ mi_decl_export int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size
mi_decl_export int mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noexcept;
mi_decl_export bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept;
mi_decl_export void mi_debug_show_arenas(bool show_inuse, bool show_abandoned, bool show_purge) mi_attr_noexcept;
// Experimental: heaps associated with specific memory arena's
typedef int mi_arena_id_t;
mi_decl_export void* mi_arena_area(mi_arena_id_t arena_id, size_t* size);
@ -341,41 +343,44 @@ mi_decl_export int mi_reserve_huge_os_pages(size_t pages, double max_secs, size
typedef enum mi_option_e {
// stable options
mi_option_show_errors, // print error messages
mi_option_show_stats, // print statistics on termination
mi_option_verbose, // print verbose messages
// the following options are experimental (see src/options.h)
mi_option_eager_commit, // eager commit segments? (after `eager_commit_delay` segments) (=1)
mi_option_arena_eager_commit, // eager commit arenas? Use 2 to enable just on overcommit systems (=2)
mi_option_purge_decommits, // should a memory purge decommit (or only reset) (=1)
mi_option_allow_large_os_pages, // allow large (2MiB) OS pages, implies eager commit
mi_option_reserve_huge_os_pages, // reserve N huge OS pages (1GiB/page) at startup
mi_option_reserve_huge_os_pages_at, // reserve huge OS pages at a specific NUMA node
mi_option_reserve_os_memory, // reserve specified amount of OS memory in an arena at startup
mi_option_show_errors, // print error messages
mi_option_show_stats, // print statistics on termination
mi_option_verbose, // print verbose messages
// advanced options
mi_option_eager_commit, // eager commit segments? (after `eager_commit_delay` segments) (=1)
mi_option_arena_eager_commit, // eager commit arenas? Use 2 to enable just on overcommit systems (=2)
mi_option_purge_decommits, // should a memory purge decommit? (=1). Set to 0 to use memory reset on a purge (instead of decommit)
mi_option_allow_large_os_pages, // allow large (2 or 4 MiB) OS pages, implies eager commit. If false, also disables THP for the process.
mi_option_reserve_huge_os_pages, // reserve N huge OS pages (1GiB pages) at startup
mi_option_reserve_huge_os_pages_at, // reserve huge OS pages at a specific NUMA node
mi_option_reserve_os_memory, // reserve specified amount of OS memory in an arena at startup
mi_option_deprecated_segment_cache,
mi_option_deprecated_page_reset,
mi_option_abandoned_page_purge, // immediately purge delayed purges on thread termination
mi_option_abandoned_page_purge, // immediately purge delayed purges on thread termination
mi_option_deprecated_segment_reset,
mi_option_eager_commit_delay,
mi_option_purge_delay, // memory purging is delayed by N milli seconds; use 0 for immediate purging or -1 for no purging at all.
mi_option_use_numa_nodes, // 0 = use all available numa nodes, otherwise use at most N nodes.
mi_option_limit_os_alloc, // 1 = do not use OS memory for allocation (but only programmatically reserved arenas)
mi_option_os_tag, // tag used for OS logging (macOS only for now)
mi_option_max_errors, // issue at most N error messages
mi_option_max_warnings, // issue at most N warning messages
mi_option_max_segment_reclaim,
mi_option_destroy_on_exit, // if set, release all memory on exit; sometimes used for dynamic unloading but can be unsafe.
mi_option_arena_reserve, // initial memory size in KiB for arena reservation (1GiB on 64-bit)
mi_option_arena_purge_mult,
mi_option_eager_commit_delay, // the first N segments per thread are not eagerly committed (but per page in the segment on demand)
mi_option_purge_delay, // memory purging is delayed by N milli seconds; use 0 for immediate purging or -1 for no purging at all. (=10)
mi_option_use_numa_nodes, // 0 = use all available numa nodes, otherwise use at most N nodes.
mi_option_disallow_os_alloc, // 1 = do not use OS memory for allocation (but only programmatically reserved arenas)
mi_option_os_tag, // tag used for OS logging (macOS only for now) (=100)
mi_option_max_errors, // issue at most N error messages
mi_option_max_warnings, // issue at most N warning messages
mi_option_max_segment_reclaim, // max. percentage of the abandoned segments can be reclaimed per try (=10%)
mi_option_destroy_on_exit, // if set, release all memory on exit; sometimes used for dynamic unloading but can be unsafe
mi_option_arena_reserve, // initial memory size in KiB for arena reservation (= 1 GiB on 64-bit)
mi_option_arena_purge_mult, // multiplier for `purge_delay` for the purging delay for arenas (=10)
mi_option_purge_extend_delay,
mi_option_remap_threshold, // size in KiB after which realloc uses OS in-place remap; use 0 to disable
mi_option_abandoned_reclaim_on_free, // allow to reclaim an abandoned segment on a free (=1)
mi_option_disallow_arena_alloc, // 1 = do not use arena's for allocation (except if using specific arena id's)
mi_option_remap_threshold, // size in KiB after which realloc uses OS in-place remap; use 0 to disable
_mi_option_last,
// legacy option names
mi_option_large_os_pages = mi_option_allow_large_os_pages,
mi_option_eager_region_commit = mi_option_arena_eager_commit,
mi_option_reset_decommits = mi_option_purge_decommits,
mi_option_reset_delay = mi_option_purge_delay,
mi_option_abandoned_page_reset = mi_option_abandoned_page_purge
mi_option_abandoned_page_reset = mi_option_abandoned_page_purge,
mi_option_limit_os_alloc = mi_option_disallow_os_alloc
} mi_option_t;
@ -518,7 +523,7 @@ template<class T, bool _mi_destroy> struct _mi_heap_stl_allocator_common : publi
using typename _mi_stl_allocator_common<T>::value_type;
using typename _mi_stl_allocator_common<T>::pointer;
_mi_heap_stl_allocator_common(mi_heap_t* hp) : heap(hp) { } /* will not delete nor destroy the passed in heap */
_mi_heap_stl_allocator_common(mi_heap_t* hp) : heap(hp, [](mi_heap_t*) {}) {} /* will not delete nor destroy the passed in heap */
#if (__cplusplus >= 201703L) // C++17
mi_decl_nodiscard T* allocate(size_type count) { return static_cast<T*>(mi_heap_alloc_new_n(this->heap.get(), count, sizeof(T))); }

View file

@ -23,8 +23,10 @@ terms of the MIT license. A copy of the license can be found in the file
#define _Atomic(tp) std::atomic<tp>
#define mi_atomic(name) std::atomic_##name
#define mi_memory_order(name) std::memory_order_##name
#if !defined(ATOMIC_VAR_INIT) || (__cplusplus >= 202002L) // c++20, see issue #571
#define MI_ATOMIC_VAR_INIT(x) x
#if (__cplusplus >= 202002L) // c++20, see issue #571
#define MI_ATOMIC_VAR_INIT(x) x
#elif !defined(ATOMIC_VAR_INIT)
#define MI_ATOMIC_VAR_INIT(x) x
#else
#define MI_ATOMIC_VAR_INIT(x) ATOMIC_VAR_INIT(x)
#endif
@ -39,7 +41,9 @@ terms of the MIT license. A copy of the license can be found in the file
#include <stdatomic.h>
#define mi_atomic(name) atomic_##name
#define mi_memory_order(name) memory_order_##name
#if !defined(ATOMIC_VAR_INIT) || (__STDC_VERSION__ >= 201710L) // c17, see issue #735
#if (__STDC_VERSION__ >= 201710L) // c17, see issue #735
#define MI_ATOMIC_VAR_INIT(x) x
#elif !defined(ATOMIC_VAR_INIT)
#define MI_ATOMIC_VAR_INIT(x) x
#else
#define MI_ATOMIC_VAR_INIT(x) ATOMIC_VAR_INIT(x)
@ -129,7 +133,9 @@ static inline void mi_atomic_maxi64_relaxed(volatile int64_t* p, int64_t x) {
#elif defined(_MSC_VER)
// MSVC C compilation wrapper that uses Interlocked operations to model C11 atomics.
#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN
#endif
#include <windows.h>
#include <intrin.h>
#ifdef _WIN64
@ -323,7 +329,9 @@ static inline void mi_atomic_yield(void) {
std::this_thread::yield();
}
#elif defined(_WIN32)
#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN
#endif
#include <windows.h>
static inline void mi_atomic_yield(void) {
YieldProcessor();

View file

@ -30,14 +30,17 @@ terms of the MIT license. A copy of the license can be found in the file
#define mi_decl_noinline __declspec(noinline)
#define mi_decl_thread __declspec(thread)
#define mi_decl_cache_align __declspec(align(MI_CACHE_LINE))
#define mi_decl_weak
#elif (defined(__GNUC__) && (__GNUC__ >= 3)) || defined(__clang__) // includes clang and icc
#define mi_decl_noinline __attribute__((noinline))
#define mi_decl_thread __thread
#define mi_decl_cache_align __attribute__((aligned(MI_CACHE_LINE)))
#define mi_decl_weak __attribute__((weak))
#else
#define mi_decl_noinline
#define mi_decl_thread __thread // hope for the best :-)
#define mi_decl_cache_align
#define mi_decl_weak
#endif
#if defined(__EMSCRIPTEN__) && !defined(__wasi__)
@ -88,7 +91,7 @@ void _mi_thread_data_collect(void);
// os.c
void _mi_os_init(void); // called from process init
void* _mi_os_alloc(size_t size, mi_memid_t* memid, mi_stats_t* stats);
void* _mi_os_alloc(size_t size, mi_memid_t* memid, mi_stats_t* stats);
void _mi_os_free(void* p, size_t size, mi_memid_t memid, mi_stats_t* stats);
void _mi_os_free_ex(void* p, size_t size, bool still_committed, mi_memid_t memid, mi_stats_t* stats);
@ -128,9 +131,21 @@ void* _mi_arena_alloc(size_t size, bool commit, bool allow_large, mi_arena_
void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld);
bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_id_t request_arena_id);
bool _mi_arena_contains(const void* p);
void _mi_arena_collect(bool force_purge, mi_stats_t* stats);
void _mi_arenas_collect(bool force_purge, mi_stats_t* stats);
void _mi_arena_unsafe_destroy_all(mi_stats_t* stats);
bool _mi_arena_segment_clear_abandoned(mi_segment_t* segment);
void _mi_arena_segment_mark_abandoned(mi_segment_t* segment);
size_t _mi_arena_segment_abandoned_count(void);
typedef struct mi_arena_field_cursor_s { // abstract
mi_arena_id_t start;
int count;
size_t bitmap_idx;
} mi_arena_field_cursor_t;
void _mi_arena_field_cursor_init(mi_heap_t* heap, mi_arena_field_cursor_t* current);
mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_field_cursor_t* previous);
// "segment-map.c"
void _mi_segment_map_allocated_at(const mi_segment_t* segment);
void _mi_segment_map_freed_at(const mi_segment_t* segment);
@ -139,7 +154,7 @@ void _mi_segment_map_freed_at(const mi_segment_t* segment);
mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment, mi_segments_tld_t* tld, mi_os_tld_t* os_tld);
void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld);
void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld);
uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t block_size, size_t* page_size, size_t* pre_size); // page start for any page
uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size);
#if MI_HUGE_PAGE_ABANDON
void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block_t* block);
@ -147,9 +162,10 @@ void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi
void _mi_segment_huge_page_reset(mi_segment_t* segment, mi_page_t* page, mi_block_t* block);
#endif
void _mi_segment_thread_collect(mi_segments_tld_t* tld);
void _mi_segments_collect(bool force, mi_segments_tld_t* tld);
void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld);
void _mi_abandoned_await_readers(void);
bool _mi_segment_attempt_reclaim(mi_heap_t* heap, mi_segment_t* segment);
mi_block_t* _mi_segment_huge_page_remap(mi_segment_t* segment, mi_page_t* page, mi_block_t* block, size_t newsize, mi_segments_tld_t* tld);
mi_block_t* _mi_segment_huge_page_expand(mi_segment_t* segment, mi_page_t* page, mi_block_t* block, size_t newsize, mi_segments_tld_t* tld);
@ -193,13 +209,15 @@ mi_msecs_t _mi_clock_end(mi_msecs_t start);
mi_msecs_t _mi_clock_start(void);
// "alloc.c"
void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept; // called from `_mi_malloc_generic`
void* _mi_page_malloc_zero(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept; // called from `_mi_malloc_generic`
void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept; // called from `_mi_heap_malloc_aligned`
void* _mi_page_malloc_zeroed(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept; // called from `_mi_heap_malloc_aligned`
void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept;
void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept; // called from `_mi_heap_malloc_aligned`
void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) mi_attr_noexcept;
mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p);
mi_block_t* _mi_page_ptr_unalign(const mi_page_t* page, const void* p);
bool _mi_free_delayed_block(mi_block_t* block);
void _mi_free_generic(const mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept; // for runtime integration
void _mi_free_generic(mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept; // for runtime integration
void _mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size);
// "libc.c"
@ -412,13 +430,19 @@ static inline mi_page_t* _mi_heap_get_free_small_page(mi_heap_t* heap, size_t si
// Large aligned blocks may be aligned at N*MI_SEGMENT_SIZE (inside a huge segment > MI_SEGMENT_SIZE),
// and we need align "down" to the segment info which is `MI_SEGMENT_SIZE` bytes before it;
// therefore we align one byte before `p`.
// We check for NULL afterwards on 64-bit systems to improve codegen for `mi_free`.
static inline mi_segment_t* _mi_ptr_segment(const void* p) {
mi_assert_internal(p != NULL);
return (mi_segment_t*)(((uintptr_t)p - 1) & ~MI_SEGMENT_MASK);
mi_segment_t* const segment = (mi_segment_t*)(((uintptr_t)p - 1) & ~MI_SEGMENT_MASK);
#if MI_INTPTR_SIZE <= 4
return (p==NULL ? NULL : segment);
#else
return ((intptr_t)segment <= 0 ? NULL : segment);
#endif
}
// Segment belonging to a page
static inline mi_segment_t* _mi_page_segment(const mi_page_t* page) {
mi_assert_internal(page!=NULL);
mi_segment_t* segment = _mi_ptr_segment(page);
mi_assert_internal(segment == NULL || page == &segment->pages[page->segment_idx]);
return segment;
@ -442,14 +466,15 @@ static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const
}
// Quick page start for initialized pages
static inline uint8_t* _mi_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) {
const size_t bsize = page->xblock_size;
mi_assert_internal(bsize > 0 && (bsize%sizeof(void*)) == 0);
return _mi_segment_page_start(segment, page, bsize, page_size, NULL);
static inline uint8_t* mi_page_start(const mi_page_t* page) {
mi_assert_internal(page->page_start != NULL);
mi_assert_expensive(_mi_segment_page_start(_mi_page_segment(page),page,NULL) == page->page_start);
return page->page_start;
}
// Get the page containing the pointer
static inline mi_page_t* _mi_ptr_page(void* p) {
mi_assert_internal(p!=NULL);
return _mi_segment_page_of(_mi_ptr_segment(p), p);
}
@ -457,26 +482,16 @@ static inline bool mi_segment_is_huge(const mi_segment_t* segment) {
return (segment->page_kind == MI_PAGE_HUGE);
}
static inline bool mi_page_is_huge(const mi_page_t* page) {
bool huge = mi_segment_is_huge(_mi_page_segment(page));
mi_assert_internal((huge && page->xblock_size == MI_HUGE_BLOCK_SIZE) || (!huge && page->xblock_size <= MI_LARGE_OBJ_SIZE_MAX));
return huge;
}
// Get the block size of a page (special case for huge objects)
static inline size_t mi_page_block_size(const mi_page_t* page) {
const size_t bsize = page->xblock_size;
mi_assert_internal(bsize > 0);
if mi_likely(bsize < MI_HUGE_BLOCK_SIZE) {
mi_assert_internal(bsize <= MI_LARGE_OBJ_SIZE_MAX);
return bsize;
}
else {
mi_assert_internal(mi_page_is_huge(page));
size_t psize;
_mi_segment_page_start(_mi_page_segment(page), page, bsize, &psize, NULL);
return psize;
}
mi_assert_internal(page->block_size > 0);
return page->block_size;
}
static inline bool mi_page_is_huge(const mi_page_t* page) {
mi_assert_internal((page->is_huge && _mi_page_segment(page)->page_kind == MI_PAGE_HUGE) ||
(!page->is_huge && _mi_page_segment(page)->page_kind != MI_PAGE_HUGE));
return page->is_huge;
}
// Get the usable block size of a page without fixed padding.

View file

@ -36,10 +36,10 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config );
// Free OS memory
int _mi_prim_free(void* addr, size_t size );
// Allocate OS memory. Return NULL on error.
// The `try_alignment` is just a hint and the returned pointer does not have to be aligned.
// If `commit` is false, the virtual memory range only needs to be reserved (with no access)
// If `commit` is false, the virtual memory range only needs to be reserved (with no access)
// which will later be committed explicitly using `_mi_prim_commit`.
// `is_zero` is set to true if the memory was zero initialized (as on most OS's)
// The `hint` address is either `NULL` or a preferred allocation address but can be ignored.
@ -104,11 +104,11 @@ mi_msecs_t _mi_prim_clock_now(void);
typedef struct mi_process_info_s {
mi_msecs_t elapsed;
mi_msecs_t utime;
mi_msecs_t stime;
size_t current_rss;
size_t peak_rss;
mi_msecs_t stime;
size_t current_rss;
size_t peak_rss;
size_t current_commit;
size_t peak_commit;
size_t peak_commit;
size_t page_faults;
} mi_process_info_t;
@ -139,7 +139,7 @@ void _mi_prim_thread_associate_default_heap(mi_heap_t* heap);
//-------------------------------------------------------------------
// Thread id: `_mi_prim_thread_id()`
//
//
// Getting the thread id should be performant as it is called in the
// fast path of `_mi_free` and we specialize for various platforms as
// inlined definitions. Regular code should call `init.c:_mi_thread_id()`.
@ -147,33 +147,24 @@ void _mi_prim_thread_associate_default_heap(mi_heap_t* heap);
// for each thread (unequal to zero).
//-------------------------------------------------------------------
// defined in `init.c`; do not use these directly
extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from
extern bool _mi_process_is_initialized; // has mi_process_init been called?
static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept;
#if defined(_WIN32)
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
// Windows: works on Intel and ARM in both 32- and 64-bit
return (uintptr_t)NtCurrentTeb();
}
// We use assembly for a fast thread id on the main platforms. The TLS layout depends on
// both the OS and libc implementation so we use specific tests for each main platform.
// On some libc + platform combinations we can directly access a thread-local storage (TLS) slot.
// The TLS layout depends on both the OS and libc implementation so we use specific tests for each main platform.
// If you test on another platform and it works please send a PR :-)
// see also https://akkadia.org/drepper/tls.pdf for more info on the TLS register.
#elif defined(__GNUC__) && ( \
//
// Note: we would like to prefer `__builtin_thread_pointer()` nowadays instead of using assembly,
// but unfortunately we can not detect support reliably (see issue #883)
// We also use it on Apple OS as we use a TLS slot for the default heap there.
#if defined(__GNUC__) && ( \
(defined(__GLIBC__) && (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))) \
|| (defined(__APPLE__) && (defined(__x86_64__) || defined(__aarch64__))) \
|| (defined(__APPLE__) && (defined(__x86_64__) || defined(__aarch64__) || defined(__POWERPC__))) \
|| (defined(__BIONIC__) && (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))) \
|| (defined(__FreeBSD__) && (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__))) \
|| (defined(__OpenBSD__) && (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__))) \
)
#define MI_HAS_TLS_SLOT
static inline void* mi_prim_tls_slot(size_t slot) mi_attr_noexcept {
void* res;
const size_t ofs = (slot*sizeof(void*));
@ -197,6 +188,9 @@ static inline void* mi_prim_tls_slot(size_t slot) mi_attr_noexcept {
__asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb));
#endif
res = tcb[slot];
#elif defined(__APPLE__) && defined(__POWERPC__) // ppc, issue #781
MI_UNUSED(ofs);
res = pthread_getspecific(slot);
#endif
return res;
}
@ -224,9 +218,52 @@ static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexce
__asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb));
#endif
tcb[slot] = value;
#elif defined(__APPLE__) && defined(__POWERPC__) // ppc, issue #781
MI_UNUSED(ofs);
pthread_setspecific(slot, value);
#endif
}
#endif
// Do we have __builtin_thread_pointer? This would be the preferred way to get a unique thread id
// but unfortunately, it seems we cannot test for this reliably at this time (see issue #883)
// Nevertheless, it seems needed on older graviton platforms (see issue #851).
// For now, we only enable this for specific platforms.
#if defined(__GNUC__) && (__GNUC__ >= 7) && defined(__aarch64__) /* special case aarch64 for older gcc versions (issue #851) */ \
&& !defined(__APPLE__) /* on apple (M1) the wrong register is read (tpidr_el0 instead of tpidrro_el0) so fall back to TLS slot assembly (<https://github.com/microsoft/mimalloc/issues/343#issuecomment-763272369>)*/ \
&& (!defined(__clang_major__) || __clang_major__ >= 14) /* older clang versions emit bad code; fall back to using the TLS slot (<https://lore.kernel.org/linux-arm-kernel/202110280952.352F66D8@keescook/T/>) */
#define MI_USE_BUILTIN_THREAD_POINTER 1
#endif
// defined in `init.c`; do not use these directly
extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from
extern bool _mi_process_is_initialized; // has mi_process_init been called?
static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept;
// Get a unique id for the current thread.
#if defined(_WIN32)
#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN
#endif
#include <windows.h>
static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
// Windows: works on Intel and ARM in both 32- and 64-bit
return (uintptr_t)NtCurrentTeb();
}
#elif MI_USE_BUILTIN_THREAD_POINTER
static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
// Works on most Unix based platforms with recent compilers
return (uintptr_t)__builtin_thread_pointer();
}
#elif defined(MI_HAS_TLS_SLOT)
static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
#if defined(__BIONIC__)
// issue #384, #495: on the Bionic libc (Android), slot 1 is the thread id
@ -273,7 +310,6 @@ static inline mi_heap_t* mi_prim_get_default_heap(void);
#if defined(MI_MALLOC_OVERRIDE)
#if defined(__APPLE__) // macOS
#define MI_TLS_SLOT 89 // seems unused?
// #define MI_TLS_RECURSE_GUARD 1
// other possible unused ones are 9, 29, __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY4 (94), __PTK_FRAMEWORK_GC_KEY9 (112) and __PTK_FRAMEWORK_OLDGC_KEY9 (89)
// see <https://github.com/rweichler/substrate/blob/master/include/pthread_machdep.h>
#elif defined(__OpenBSD__)
@ -291,6 +327,9 @@ static inline mi_heap_t* mi_prim_get_default_heap(void);
#if defined(MI_TLS_SLOT)
# if !defined(MI_HAS_TLS_SLOT)
# error "trying to use a TLS slot for the default heap, but the mi_prim_tls_slot primitives are not defined"
# endif
static inline mi_heap_t* mi_prim_get_default_heap(void) {
mi_heap_t* heap = (mi_heap_t*)mi_prim_tls_slot(MI_TLS_SLOT);

View file

@ -83,7 +83,9 @@ defined, undefined, or not accessible at all:
#define MI_TRACK_HEAP_DESTROY 1
#define MI_TRACK_TOOL "ETW"
#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN
#endif
#include <windows.h>
#include "../src/prim/windows/etw.h"

View file

@ -1,5 +1,5 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2018-2023, Microsoft Research, Daan Leijen
Copyright (c) 2018-2024, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
@ -16,6 +16,8 @@ terms of the MIT license. A copy of the license can be found in the file
// are allocated.
// mi_page_t : a mimalloc page (usually 64KiB or 512KiB) from
// where objects are allocated.
// Note: we write "OS page" for OS memory pages while
// using plain "page" for mimalloc pages (`mi_page_t`).
// --------------------------------------------------------------------------
@ -89,10 +91,11 @@ terms of the MIT license. A copy of the license can be found in the file
#endif
// We used to abandon huge pages but to eagerly deallocate if freed from another thread,
// but that makes it not possible to visit them during a heap walk or include them in a
// `mi_heap_destroy`. We therefore instead reset/decommit the huge blocks if freed from
// another thread so most memory is available until it gets properly freed by the owning thread.
// We used to abandon huge pages in order to eagerly deallocate it if freed from another thread.
// Unfortunately, that makes it not possible to visit them during a heap walk or include them in a
// `mi_heap_destroy`. We therefore instead reset/decommit the huge blocks nowadays if freed from
// another thread so the memory becomes "virtually" available (and eventually gets properly freed by
// the owning thread).
// #define MI_HUGE_PAGE_ABANDON 1
@ -157,10 +160,18 @@ typedef int32_t mi_ssize_t;
// Main tuning parameters for segment and page sizes
// Sizes for 64-bit, divide by two for 32-bit
#ifndef MI_SMALL_PAGE_SHIFT
#define MI_SMALL_PAGE_SHIFT (13 + MI_INTPTR_SHIFT) // 64KiB
#endif
#ifndef MI_MEDIUM_PAGE_SHIFT
#define MI_MEDIUM_PAGE_SHIFT ( 3 + MI_SMALL_PAGE_SHIFT) // 512KiB
#endif
#ifndef MI_LARGE_PAGE_SHIFT
#define MI_LARGE_PAGE_SHIFT ( 3 + MI_MEDIUM_PAGE_SHIFT) // 4MiB
#define MI_SEGMENT_SHIFT ( MI_LARGE_PAGE_SHIFT) // 4MiB
#endif
#ifndef MI_SEGMENT_SHIFT
#define MI_SEGMENT_SHIFT ( MI_LARGE_PAGE_SHIFT) // 4MiB -- must be equal to `MI_LARGE_PAGE_SHIFT`
#endif
// Derived constants
#define MI_SEGMENT_SIZE (MI_ZU(1)<<MI_SEGMENT_SHIFT)
@ -181,7 +192,6 @@ typedef int32_t mi_ssize_t;
#define MI_MEDIUM_OBJ_SIZE_MAX (MI_MEDIUM_PAGE_SIZE/4) // 128KiB
#define MI_LARGE_OBJ_SIZE_MAX (MI_LARGE_PAGE_SIZE/2) // 2MiB
#define MI_LARGE_OBJ_WSIZE_MAX (MI_LARGE_OBJ_SIZE_MAX/MI_INTPTR_SIZE)
#define MI_HUGE_OBJ_SIZE_MAX (2*MI_INTPTR_SIZE*MI_SEGMENT_SIZE) // (must match MI_REGION_MAX_ALLOC_SIZE in memory.c)
// Maximum number of size classes. (spaced exponentially in 12.5% increments)
#define MI_BIN_HUGE (73U)
@ -190,18 +200,20 @@ typedef int32_t mi_ssize_t;
#error "mimalloc internal: define more bins"
#endif
// Used as a special value to encode block sizes in 32 bits.
#define MI_HUGE_BLOCK_SIZE ((uint32_t)MI_HUGE_OBJ_SIZE_MAX)
// Maximum block size for which blocks are guaranteed to be block size aligned. (see `segment.c:_mi_segment_page_start`)
#define MI_MAX_ALIGN_GUARANTEE (MI_MEDIUM_OBJ_SIZE_MAX)
// Alignments over MI_ALIGNMENT_MAX are allocated in dedicated huge page segments
#define MI_ALIGN_HUGE (MI_SEGMENT_SIZE >> 1)
// Alignments over MI_BLOCK_ALIGNMENT_MAX are allocated in dedicated huge page segments
#define MI_BLOCK_ALIGNMENT_MAX (MI_SEGMENT_SIZE >> 1)
// We use special alignments internally to allocate remappable and expandable memory
#define MI_ALIGN_REMAP (MI_ALIGN_HUGE - 1)
#define MI_ALIGN_EXPAND_MAX (MI_ALIGN_HUGE - 2)
#define MI_ALIGN_EXPAND_MIN (1)
#define MI_EXPAND_INCREMENT (MI_MiB)
#define MI_ALIGN_REMAP (MI_BLOCK_ALIGNMENT_MAX - 1)
#define MI_ALIGN_EXPAND_MAX (MI_BLOCK_ALIGNMENT_MAX - 2)
#define MI_ALIGN_EXPAND_MIN (1)
#define MI_EXPAND_INCREMENT (MI_MiB)
// We never allocate more than PTRDIFF_MAX (see also <https://sourceware.org/ml/libc-announce/2019/msg00001.html>)
#define MI_MAX_ALLOC_SIZE PTRDIFF_MAX
// ------------------------------------------------------
// Mimalloc pages contain allocated blocks
@ -225,7 +237,7 @@ typedef enum mi_delayed_e {
MI_USE_DELAYED_FREE = 0, // push on the owning heap thread delayed list
MI_DELAYED_FREEING = 1, // temporary: another thread is accessing the owning heap
MI_NO_DELAYED_FREE = 2, // optimize: push on page local thread free queue if another block is already in the heap thread delayed free list
MI_NEVER_DELAYED_FREE = 3 // sticky, only resets on page reclaim
MI_NEVER_DELAYED_FREE = 3 // sticky: used for abondoned pages without a owning heap; this only resets on page reclaim
} mi_delayed_t;
@ -264,7 +276,6 @@ typedef uintptr_t mi_thread_free_t;
// implement a monotonic heartbeat. The `thread_free` list is needed for
// avoiding atomic operations in the common case.
//
//
// `used - |thread_free|` == actual blocks that are in use (alive)
// `used - |thread_free| + |free| + |local_free| == capacity`
//
@ -272,16 +283,13 @@ typedef uintptr_t mi_thread_free_t;
// the number of memory accesses in the `mi_page_all_free` function(s).
//
// Notes:
// - Access is optimized for `mi_free` and `mi_page_alloc` (in `alloc.c`)
// - Access is optimized for `free.c:mi_free` and `alloc.c:mi_page_alloc`
// - Using `uint16_t` does not seem to slow things down
// - The size is 8 words on 64-bit which helps the page index calculations
// (and 10 words on 32-bit, and encoded free lists add 2 words. Sizes 10
// and 12 are still good for address calculation)
// - To limit the structure size, the `xblock_size` is 32-bits only; for
// blocks > MI_HUGE_BLOCK_SIZE the size is determined from the segment page size
// - `thread_free` uses the bottom bits as a delayed-free flags to optimize
// - The size is 10 words on 64-bit which helps the page index calculations
// (and 12 words on 32-bit, and encoded free lists add 2 words)
// - `xthread_free` uses the bottom bits as a delayed-free flags to optimize
// concurrent frees where only the first concurrent free adds to the owning
// heap `thread_delayed_free` list (see `alloc.c:mi_free_block_mt`).
// heap `thread_delayed_free` list (see `free.c:mi_free_block_mt`).
// The invariant is that no-delayed-free is only set if there is
// at least one block that will be added, or as already been added, to
// the owning heap `thread_delayed_free` list. This guarantees that pages
@ -292,6 +300,7 @@ typedef struct mi_page_s {
uint8_t segment_in_use:1; // `true` if the segment allocated this page
uint8_t is_committed:1; // `true` if the page virtual memory is committed
uint8_t is_zero_init:1; // `true` if the page was initially zero initialized
uint8_t is_huge:1; // `true` if the page is in a huge segment
// layout like this to optimize access in `mi_malloc` and `mi_free`
uint16_t capacity; // number of blocks committed, must be the first field, see `segment.c:page_clear`
@ -301,9 +310,12 @@ typedef struct mi_page_s {
uint8_t retire_expire:7; // expiration count for retired blocks
mi_block_t* free; // list of available free blocks (`malloc` allocates from this list)
uint32_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`)
uint32_t xblock_size; // size available in each block (always `>0`)
mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`)
uint16_t used; // number of blocks in use (including blocks in `thread_free`)
uint8_t block_size_shift; // if not zero, then `(1 << block_size_shift) == block_size` (only used for fast path in `free.c:_mi_page_ptr_unalign`)
// padding
size_t block_size; // size available in each block (always `>0`)
uint8_t* page_start; // start of the page area containing the blocks
#if (MI_ENCODE_FREELIST || MI_PADDING)
uintptr_t keys[2]; // two random keys to encode the free lists (see `_mi_block_next`) or padding canary
@ -312,8 +324,12 @@ typedef struct mi_page_s {
_Atomic(mi_thread_free_t) xthread_free; // list of deferred free blocks freed by other threads
_Atomic(uintptr_t) xheap;
struct mi_page_s* next; // next page owned by this thread with the same `block_size`
struct mi_page_s* prev; // previous page owned by this thread with the same `block_size`
struct mi_page_s* next; // next page owned by the heap with the same `block_size`
struct mi_page_s* prev; // previous page owned by the heap with the same `block_size`
#if MI_INTPTR_SIZE==4 // pad to 12 words on 32-bit
void* padding[1];
#endif
} mi_page_t;
@ -326,17 +342,22 @@ typedef enum mi_page_kind_e {
MI_PAGE_SMALL, // small blocks go into 64KiB pages inside a segment
MI_PAGE_MEDIUM, // medium blocks go into 512KiB pages inside a segment
MI_PAGE_LARGE, // larger blocks go into a single page spanning a whole segment
MI_PAGE_HUGE // huge blocks (>512KiB) are put into a single page in a segment of the exact size (but still 2MiB aligned)
MI_PAGE_HUGE // a huge page is a single page in a segment of variable size (but still 2MiB aligned)
// used for blocks `> MI_LARGE_OBJ_SIZE_MAX` or an aligment `> MI_BLOCK_ALIGNMENT_MAX`.
} mi_page_kind_t;
// ---------------------------------------------------------------
// a memory id tracks the provenance of arena/OS allocated memory
// ---------------------------------------------------------------
// Memory can reside in arena's, direct OS allocated, or statically allocated. The memid keeps track of this.
typedef enum mi_memkind_e {
MI_MEM_NONE, // not allocated
MI_MEM_EXTERNAL, // not owned by mimalloc but provided externally (via `mi_manage_os_memory` for example)
MI_MEM_STATIC, // allocated in a static area and should not be freed (for arena meta data for example)
MI_MEM_OS, // allocated from the OS
MI_MEM_OS_HUGE, // allocated as huge os pages
MI_MEM_OS_HUGE, // allocated as huge os pages (usually 1GiB, pinned to physical memory)
MI_MEM_OS_EXPAND, // allocated in an expandable area
MI_MEM_OS_REMAP, // allocated in a remapable area (i.e. using `mremap`)
MI_MEM_ARENA // allocated from an arena (the usual case)
@ -356,7 +377,7 @@ typedef struct mi_memid_os_info {
typedef struct mi_memid_arena_info {
size_t block_index; // index in the arena
mi_arena_id_t id; // arena id (>= 1)
bool is_exclusive; // the arena can only be used for specific arena allocations
bool is_exclusive; // this arena can only be used for specific arena allocations
} mi_memid_arena_info_t;
typedef struct mi_memid_s {
@ -364,27 +385,30 @@ typedef struct mi_memid_s {
mi_memid_os_info_t os; // only used for MI_MEM_OS
mi_memid_arena_info_t arena; // only used for MI_MEM_ARENA
} mem;
bool is_pinned; // `true` if we cannot decommit/reset/protect in this memory (e.g. when allocated using large OS pages)
bool is_pinned; // `true` if we cannot decommit/reset/protect in this memory (e.g. when allocated using large (2Mib) or huge (1GiB) OS pages)
bool initially_committed;// `true` if the memory was originally allocated as committed
bool initially_zero; // `true` if the memory was originally zero initialized
mi_memkind_t memkind;
} mi_memid_t;
// Segments are large allocated memory blocks (2MiB on 64 bit) from
// the OS. Inside segments we allocated fixed size _pages_ that
// contain blocks.
// ---------------------------------------------------------------
// Segments contain mimalloc pages
// ---------------------------------------------------------------
// Segments are large allocated memory blocks (2MiB on 64 bit) from the OS.
// Inside segments we allocated fixed size _pages_ that contain blocks.
typedef struct mi_segment_s {
// constant fields
mi_memid_t memid; // id for the os-level memory manager
mi_memid_t memid; // memory id to track provenance
bool allow_decommit;
bool allow_purge;
size_t segment_size; // for huge pages this may be different from `MI_SEGMENT_SIZE`
// segment fields
_Atomic(struct mi_segment_s*) abandoned_next;
struct mi_segment_s* next; // must be the first segment field after abandoned_next -- see `segment.c:segment_init`
struct mi_segment_s* prev;
bool was_reclaimed; // true if it was reclaimed (used to limit on-free reclamation)
size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`)
size_t abandoned_visits; // count how often this segment is visited in the abandoned list (to force reclaim if it is too long)
@ -395,8 +419,8 @@ typedef struct mi_segment_s {
uintptr_t cookie; // verify addresses in secure mode: `_mi_ptr_cookie(segment) == segment->cookie`
// layout like this to optimize access in `mi_free`
size_t page_shift; // `1 << page_shift` == the page sizes == `page->block_size * page->reserved` (unless the first page, then `-segment_info_size`).
_Atomic(mi_threadid_t) thread_id; // unique id of the thread owning this segment
size_t page_shift; // `1 << page_shift` == the page sizes == `page->block_size * page->reserved` (unless the first page, then `-segment_info_size`).
mi_page_kind_t page_kind; // kind of pages: small, medium, large, or huge
mi_page_t pages[1]; // up to `MI_SMALL_PAGES_PER_SEGMENT` pages
} mi_segment_t;
@ -455,11 +479,9 @@ typedef struct mi_padding_s {
// A heap owns a set of pages.
struct mi_heap_s {
mi_tld_t* tld;
mi_page_t* pages_free_direct[MI_PAGES_DIRECT]; // optimize: array where every entry points a page with possibly free blocks in the corresponding queue for that size.
mi_page_queue_t pages[MI_BIN_FULL + 1]; // queue of pages for each size class (or "bin")
_Atomic(mi_block_t*) thread_delayed_free;
mi_threadid_t thread_id; // thread this heap belongs too
mi_arena_id_t arena_id; // arena id if the heap belongs to a specific arena (or 0)
mi_arena_id_t arena_id; // arena id if the heap belongs to a specific arena (or 0)
uintptr_t cookie; // random cookie to verify pointers (see `_mi_ptr_cookie`)
uintptr_t keys[2]; // two random keys used to encode the `thread_delayed_free` list
mi_random_ctx_t random; // random number context used for secure allocation
@ -468,6 +490,8 @@ struct mi_heap_s {
size_t page_retired_max; // largest retired index into the `pages` array.
mi_heap_t* next; // list of heaps per thread
bool no_reclaim; // `true` if this heap should not reclaim abandoned pages
mi_page_t* pages_free_direct[MI_PAGES_DIRECT]; // optimize: array where every entry points a page with possibly free blocks in the corresponding queue for that size.
mi_page_queue_t pages[MI_BIN_FULL + 1]; // queue of pages for each size class (or "bin")
};
@ -555,7 +579,9 @@ typedef struct mi_stats_s {
mi_stat_counter_t searches;
mi_stat_counter_t normal_count;
mi_stat_counter_t huge_count;
mi_stat_counter_t giant_count;
mi_stat_counter_t arena_count;
mi_stat_counter_t arena_crossover_count;
mi_stat_counter_t arena_rollback_count;
#if MI_STAT>1
mi_stat_count_t normal_bins[MI_BIN_HUGE+1];
#endif
@ -580,6 +606,7 @@ void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount);
#define mi_heap_stat_increase(heap,stat,amount) mi_stat_increase( (heap)->tld->stats.stat, amount)
#define mi_heap_stat_decrease(heap,stat,amount) mi_stat_decrease( (heap)->tld->stats.stat, amount)
// ------------------------------------------------------
// Thread Local data
// ------------------------------------------------------
@ -607,6 +634,7 @@ typedef struct mi_segments_tld_s {
size_t peak_count; // peak number of segments
size_t current_size; // current size of all segments
size_t peak_size; // peak size of all segments
size_t reclaim_count;// number of reclaimed (abandoned) segments
mi_stats_t* stats; // points to tld stats
mi_os_tld_t* os; // points to os stats
} mi_segments_tld_t;

View file

@ -12,8 +12,8 @@ is a general purpose allocator with excellent [performance](#performance) charac
Initially developed by Daan Leijen for the runtime systems of the
[Koka](https://koka-lang.github.io) and [Lean](https://github.com/leanprover/lean) languages.
Latest release tag: `v2.1.2` (2023-04-24).
Latest stable tag: `v1.8.2` (2023-04-24).
Latest release tag: `v2.1.4` (2024-04-22).
Latest v1 tag: `v1.8.4` (2024-04-22).
mimalloc is a drop-in replacement for `malloc` and can be used in other programs
without code changes, for example, on dynamically linked ELF-based systems (Linux, BSD, etc.) you can use it as:
@ -72,14 +72,26 @@ Enjoy!
* `master`: latest stable release (based on `dev-slice`).
* `dev`: development branch for mimalloc v1. Use this branch for submitting PR's.
* `dev-slice`: development branch for mimalloc v2. This branch is downstream of `dev`.
* `dev-slice`: development branch for mimalloc v2. This branch is downstream of `dev` (and is essentially equal to `dev` except for
`src/segment.c`)
### Releases
Note: the `v2.x` version has a new algorithm for managing internal mimalloc pages that tends to use reduce memory usage
Note: the `v2.x` version has a different algorithm for managing internal mimalloc pages (as slices) that tends to use reduce
memory usage
and fragmentation compared to mimalloc `v1.x` (especially for large workloads). Should otherwise have similar performance
(see [below](#performance)); please report if you observe any significant performance regression.
* 2024-04-22, `v1.8.4`, `v2.1.4`: Fixes various bugs and build issues. Add `MI_LIBC_MUSL` cmake flag for musl builds.
Free-ing code is refactored into a separate module (`free.c`). Mimalloc page info is simplified with the block size
directly available (and new `block_size_shift` to improve aligned block free-ing).
New approach to collection of abandoned segments: When
a thread terminates the segments it owns are abandoned (containing still live objects) and these can be
reclaimed by other threads. We no longer use a list of abandoned segments but this is now done using bitmaps in arena's
which is more concurrent (and more aggressive). Abandoned memory can now also be reclaimed if a thread frees an object in
an abandoned page (which can be disabled using `mi_option_abandoned_reclaim_on_free`). The option `mi_option_max_segment_reclaim`
gives a maximum percentage of abandoned segments that can be reclaimed per try (=10%).
* 2023-04-24, `v1.8.2`, `v2.1.2`: Fixes build issues on freeBSD, musl, and C17 (UE 5.1.1). Reduce code size/complexity
by removing regions and segment-cache's and only use arenas with improved memory purging -- this may improve memory
usage as well for larger services. Renamed options for consistency. Improved Valgrind and ASAN checking.
@ -91,7 +103,7 @@ Note: the `v2.x` version has a new algorithm for managing internal mimalloc page
abstraction layer to make it easier to port and separate platform dependent code (in `src/prim`). Fixed C++ STL compilation on older Microsoft C++ compilers, and various small bug fixes.
* 2022-12-23, `v1.7.9`, `v2.0.9`: Supports building with [asan](#asan) and improved [Valgrind](#valgrind) support.
Support abitrary large alignments (in particular for `std::pmr` pools).
Support arbitrary large alignments (in particular for `std::pmr` pools).
Added C++ STL allocators attached to a specific heap (thanks @vmarkovtsev).
Heap walks now visit all object (including huge objects). Support Windows nano server containers (by Johannes Schindelin,@dscho).
Various small bug fixes.
@ -144,7 +156,7 @@ mimalloc is used in various large scale low-latency services and programs, for e
## Windows
Open `ide/vs2019/mimalloc.sln` in Visual Studio 2019 and build.
Open `ide/vs2022/mimalloc.sln` in Visual Studio 2022 and build.
The `mimalloc` project builds a static library (in `out/msvc-x64`), while the
`mimalloc-override` project builds a DLL for overriding malloc
in the entire program.
@ -224,7 +236,7 @@ target_link_libraries(myapp PUBLIC mimalloc-static)
to link with the static library. See `test\CMakeLists.txt` for an example.
For best performance in C++ programs, it is also recommended to override the
global `new` and `delete` operators. For convience, mimalloc provides
global `new` and `delete` operators. For convenience, mimalloc provides
[`mimalloc-new-delete.h`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc-new-delete.h) which does this for you -- just include it in a single(!) source file in your project.
In C++, mimalloc also provides the `mi_stl_allocator` struct which implements the `std::allocator`
interface.
@ -280,17 +292,23 @@ You can set further options either programmatically (using [`mi_option_set`](htt
Advanced options:
- `MIMALLOC_ARENA_EAGER_COMMIT=2`: turns on eager commit for the large arenas (usually 1GiB) from which mimalloc
allocates segments and pages. Set this to 2 (default) to
only enable this on overcommit systems (e.g. Linux). Set this to 1 to enable explicitly on other systems
as well (like Windows or macOS) which may improve performance (as the whole arena is committed at once).
Note that eager commit only increases the commit but not the actual the peak resident set
(rss) so it is generally ok to enable this.
- `MIMALLOC_PURGE_DELAY=N`: the delay in `N` milli-seconds (by default `10`) after which mimalloc will purge
OS pages that are not in use. This signals to the OS that the underlying physical memory can be reused which
can reduce memory fragmentation especially in long running (server) programs. Setting `N` to `0` purges immediately when
a page becomes unused which can improve memory usage but also decreases performance. Setting `N` to a higher
value like `100` can improve performance (sometimes by a lot) at the cost of potentially using more memory at times.
Setting it to `-1` disables purging completely.
- `MIMALLOC_ARENA_EAGER_COMMIT=1`: turns on eager commit for the large arenas (usually 1GiB) from which mimalloc
allocates segments and pages. This is by default
only enabled on overcommit systems (e.g. Linux) but enabling it explicitly on other systems (like Windows or macOS)
may improve performance. Note that eager commit only increases the commit but not the actual the peak resident set
(rss) so it is generally ok to enable this.
Setting it to `-1` disables purging completely.
- `MIMALLOC_PURGE_DECOMMITS=1`: By default "purging" memory means unused memory is decommitted (`MEM_DECOMMIT` on Windows,
`MADV_DONTNEED` (which decresease rss immediately) on `mmap` systems). Set this to 0 to instead "reset" unused
memory on a purge (`MEM_RESET` on Windows, generally `MADV_FREE` (which does not decrease rss immediately) on `mmap` systems).
Mimalloc generally does not "free" OS memory but only "purges" OS memory, in other words, it tries to keep virtual
address ranges and decommits within those ranges (to make the underlying physical memory available to other processes).
Further options for large workloads and services:
@ -298,9 +316,10 @@ Further options for large workloads and services:
at runtime. Setting `N` to 1 may avoid problems in some virtual environments. Also, setting it to a lower number than
the actual NUMA nodes is fine and will only cause threads to potentially allocate more memory across actual NUMA
nodes (but this can happen in any case as NUMA local allocation is always a best effort but not guaranteed).
- `MIMALLOC_ALLOW_LARGE_OS_PAGES=1`: use large OS pages (2MiB) when available; for some workloads this can significantly
improve performance. Use `MIMALLOC_VERBOSE` to check if the large OS pages are enabled -- usually one needs
to explicitly allow large OS pages (as on [Windows][windows-huge] and [Linux][linux-huge]). However, sometimes
- `MIMALLOC_ALLOW_LARGE_OS_PAGES=1`: use large OS pages (2 or 4MiB) when available; for some workloads this can significantly
improve performance. When this option is disabled, it also disables transparent huge pages (THP) for the process
(on Linux and Android). Use `MIMALLOC_VERBOSE` to check if the large OS pages are enabled -- usually one needs
to explicitly give permissions for large OS pages (as on [Windows][windows-huge] and [Linux][linux-huge]). However, sometimes
the OS is very slow to reserve contiguous physical memory for large OS pages so use with care on systems that
can have fragmented memory (for that reason, we generally recommend to use `MIMALLOC_RESERVE_HUGE_OS_PAGES` instead whenever possible).
- `MIMALLOC_RESERVE_HUGE_OS_PAGES=N`: where `N` is the number of 1GiB _huge_ OS pages. This reserves the huge pages at
@ -309,11 +328,12 @@ Further options for large workloads and services:
OS pages, use with care as reserving
contiguous physical memory can take a long time when memory is fragmented (but reserving the huge pages is done at
startup only once).
Note that we usually need to explicitly enable huge OS pages (as on [Windows][windows-huge] and [Linux][linux-huge])).
Note that we usually need to explicitly give permission for huge OS pages (as on [Windows][windows-huge] and [Linux][linux-huge])).
With huge OS pages, it may be beneficial to set the setting
`MIMALLOC_EAGER_COMMIT_DELAY=N` (`N` is 1 by default) to delay the initial `N` segments (of 4MiB)
of a thread to not allocate in the huge OS pages; this prevents threads that are short lived
and allocate just a little to take up space in the huge OS page area (which cannot be purged).
and allocate just a little to take up space in the huge OS page area (which cannot be purged as huge OS pages are pinned
to physical memory).
The huge pages are usually allocated evenly among NUMA nodes.
We can use `MIMALLOC_RESERVE_HUGE_OS_PAGES_AT=N` where `N` is the numa node (starting at 0) to allocate all
the huge pages at a specific numa node instead.

View file

@ -15,25 +15,24 @@ terms of the MIT license. A copy of the license can be found in the file
// Aligned Allocation
// ------------------------------------------------------
// Fallback primitive aligned allocation -- split out for better codegen
static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t* const heap, const size_t size, const size_t alignment, const size_t offset, const bool zero) mi_attr_noexcept
static bool mi_malloc_is_naturally_aligned( size_t size, size_t alignment ) {
// objects up to `MI_MAX_ALIGN_GUARANTEE` are allocated aligned to their size (see `segment.c:_mi_segment_page_start`).
mi_assert_internal(_mi_is_power_of_two(alignment) && (alignment > 0));
if (alignment > size) return false;
if (alignment <= MI_MAX_ALIGN_SIZE) return true;
const size_t bsize = mi_good_size(size);
return (bsize <= MI_MAX_ALIGN_GUARANTEE && (bsize & (alignment-1)) == 0);
}
// Fallback aligned allocation that over-allocates -- split out for better codegen
static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_overalloc(mi_heap_t* const heap, const size_t size, const size_t alignment, const size_t offset, const bool zero) mi_attr_noexcept
{
mi_assert_internal(size <= PTRDIFF_MAX);
mi_assert_internal(size <= (MI_MAX_ALLOC_SIZE - MI_PADDING_SIZE));
mi_assert_internal(alignment != 0 && _mi_is_power_of_two(alignment));
const uintptr_t align_mask = alignment - 1; // for any x, `(x & align_mask) == (x % alignment)`
const size_t padsize = size + MI_PADDING_SIZE;
// use regular allocation if it is guaranteed to fit the alignment constraints
if (offset == 0 && alignment <= padsize && padsize <= MI_MEDIUM_OBJ_SIZE_MAX && (padsize & align_mask) == 0) {
void* p = _mi_heap_malloc_zero(heap, size, zero);
mi_assert_internal(p == NULL || ((uintptr_t)p % alignment) == 0);
return p;
}
void* p;
size_t oversize;
if mi_unlikely(alignment >= MI_ALIGN_HUGE) {
if mi_unlikely(alignment > MI_BLOCK_ALIGNMENT_MAX) {
// use OS allocation for very large alignment and allocate inside a huge page (dedicated segment with 1 page)
// This can support alignments >= MI_SEGMENT_SIZE by ensuring the object can be aligned at a point in the
// first (and single) page such that the segment info is `MI_SEGMENT_SIZE` bytes before it (so it can be found by aligning the pointer down)
@ -47,7 +46,7 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t*
oversize = (size <= MI_SMALL_SIZE_MAX ? MI_SMALL_SIZE_MAX + 1 /* ensure we use generic malloc path */ : size);
p = _mi_heap_malloc_zero_ex(heap, oversize, false, alignment); // the page block size should be large enough to align in the single huge page block
// zero afterwards as only the area from the aligned_p may be committed!
if (p == NULL) return NULL;
if (p == NULL) return NULL;
}
else {
// otherwise over-allocate
@ -57,6 +56,7 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t*
}
// .. and align within the allocation
const uintptr_t align_mask = alignment - 1; // for any x, `(x & align_mask) == (x % alignment)`
const uintptr_t poffset = ((uintptr_t)p + offset) & align_mask;
const uintptr_t adjust = (poffset == 0 ? 0 : alignment - poffset);
mi_assert_internal(adjust < alignment);
@ -69,14 +69,14 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t*
// todo: expand padding if overallocated ?
mi_assert_internal(mi_page_usable_block_size(_mi_ptr_page(p)) >= adjust + size);
mi_assert_internal(p == _mi_page_ptr_unalign(_mi_ptr_segment(aligned_p), _mi_ptr_page(aligned_p), aligned_p));
mi_assert_internal(p == _mi_page_ptr_unalign(_mi_ptr_page(aligned_p), aligned_p));
mi_assert_internal(((uintptr_t)aligned_p + offset) % alignment == 0);
mi_assert_internal(mi_usable_size(aligned_p)>=size);
mi_assert_internal(mi_usable_size(p) == mi_usable_size(aligned_p)+adjust);
// now zero the block if needed
if (alignment >= MI_ALIGN_HUGE) {
// for the tracker, on huge aligned allocations only the memory from the start of the large block is defined
if (alignment > MI_BLOCK_ALIGNMENT_MAX) {
// for the tracker, on huge aligned allocations only from the start of the large block is defined
mi_track_mem_undefined(aligned_p, size);
if (zero) {
_mi_memzero_aligned(aligned_p, mi_usable_size(aligned_p));
@ -85,10 +85,43 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t*
if (p != aligned_p) {
mi_track_align(p,aligned_p,adjust,mi_usable_size(aligned_p));
}
}
return aligned_p;
}
// Generic primitive aligned allocation -- split out for better codegen
static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_generic(mi_heap_t* const heap, const size_t size, const size_t alignment, const size_t offset, const bool zero) mi_attr_noexcept
{
mi_assert_internal(alignment != 0 && _mi_is_power_of_two(alignment));
// we don't allocate more than MI_MAX_ALLOC_SIZE (see <https://sourceware.org/ml/libc-announce/2019/msg00001.html>)
if mi_unlikely(size > (MI_MAX_ALLOC_SIZE - MI_PADDING_SIZE)) {
#if MI_DEBUG > 0
_mi_error_message(EOVERFLOW, "aligned allocation request is too large (size %zu, alignment %zu)\n", size, alignment);
#endif
return NULL;
}
// use regular allocation if it is guaranteed to fit the alignment constraints.
// this is important to try as the fast path in `mi_heap_malloc_zero_aligned` only works when there exist
// a page with the right block size, and if we always use the over-alloc fallback that would never happen.
if (offset == 0 && mi_malloc_is_naturally_aligned(size,alignment)) {
void* p = _mi_heap_malloc_zero(heap, size, zero);
mi_assert_internal(p == NULL || ((uintptr_t)p % alignment) == 0);
const bool is_aligned_or_null = (((uintptr_t)p) & (alignment-1))==0;
if mi_likely(is_aligned_or_null) {
return p;
}
else {
// this should never happen if the `mi_malloc_is_naturally_aligned` check is correct..
mi_assert(false);
mi_free(p);
}
}
// fall back to over-allocation
return mi_heap_malloc_zero_aligned_at_overalloc(heap,size,alignment,offset,zero);
}
// Primitive aligned allocation
static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t size, const size_t alignment, const size_t offset, const bool zero) mi_attr_noexcept
{
@ -99,34 +132,30 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t
#endif
return NULL;
}
if mi_unlikely(size > PTRDIFF_MAX) { // we don't allocate more than PTRDIFF_MAX (see <https://sourceware.org/ml/libc-announce/2019/msg00001.html>)
#if MI_DEBUG > 0
_mi_error_message(EOVERFLOW, "aligned allocation request is too large (size %zu, alignment %zu)\n", size, alignment);
#endif
return NULL;
}
const uintptr_t align_mask = alignment-1; // for any x, `(x & align_mask) == (x % alignment)`
const size_t padsize = size + MI_PADDING_SIZE; // note: cannot overflow due to earlier size > PTRDIFF_MAX check
// try first if there happens to be a small block available with just the right alignment
if mi_likely(padsize <= MI_SMALL_SIZE_MAX && alignment <= padsize) {
if mi_likely(size <= MI_SMALL_SIZE_MAX && alignment <= size) {
const uintptr_t align_mask = alignment-1; // for any x, `(x & align_mask) == (x % alignment)`
const size_t padsize = size + MI_PADDING_SIZE;
mi_page_t* page = _mi_heap_get_free_small_page(heap, padsize);
const bool is_aligned = (((uintptr_t)page->free+offset) & align_mask)==0;
if mi_likely(page->free != NULL && is_aligned)
{
#if MI_STAT>1
mi_heap_stat_increase(heap, malloc, size);
#endif
void* p = _mi_page_malloc(heap, page, padsize, zero); // TODO: inline _mi_page_malloc
mi_assert_internal(p != NULL);
mi_assert_internal(((uintptr_t)p + offset) % alignment == 0);
mi_track_malloc(p,size,zero);
return p;
if mi_likely(page->free != NULL) {
const bool is_aligned = (((uintptr_t)page->free + offset) & align_mask)==0;
if mi_likely(is_aligned)
{
#if MI_STAT>1
mi_heap_stat_increase(heap, malloc, size);
#endif
void* p = (zero ? _mi_page_malloc_zeroed(heap,page,padsize) : _mi_page_malloc(heap,page,padsize)); // call specific page malloc for better codegen
mi_assert_internal(p != NULL);
mi_assert_internal(((uintptr_t)p + offset) % alignment == 0);
mi_track_malloc(p,size,zero);
return p;
}
}
}
// fallback
return mi_heap_malloc_zero_aligned_at_fallback(heap, size, alignment, offset, zero);
// fallback to generic aligned allocation
return mi_heap_malloc_zero_aligned_at_generic(heap, size, alignment, offset, zero);
}
@ -139,22 +168,7 @@ mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc_aligned_at(mi_heap_t* he
}
mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept {
if (alignment == 0 || !_mi_is_power_of_two(alignment)) return NULL;
#if !MI_PADDING
// without padding, any small sized allocation is naturally aligned (see also `_mi_segment_page_start`)
if mi_likely(_mi_is_power_of_two(size) && size >= alignment && size <= MI_SMALL_SIZE_MAX)
#else
// with padding, we can only guarantee this for fixed alignments
if mi_likely((alignment == sizeof(void*) || (alignment == MI_MAX_ALIGN_SIZE && size > (MI_MAX_ALIGN_SIZE/2)))
&& size <= MI_SMALL_SIZE_MAX)
#endif
{
// fast path for common alignment and size
return mi_heap_malloc_small(heap, size);
}
else {
return mi_heap_malloc_aligned_at(heap, size, alignment, 0);
}
return mi_heap_malloc_aligned_at(heap, size, alignment, 0);
}
// ------------------------------------------------------

View file

@ -23,7 +23,7 @@ mi_decl_externc size_t malloc_good_size(size_t size);
#endif
// helper definition for C override of C++ new
typedef struct mi_nothrow_s { int _tag; } mi_nothrow_t;
typedef void* mi_nothrow_t;
// ------------------------------------------------------
// Override system malloc
@ -77,7 +77,9 @@ typedef struct mi_nothrow_s { int _tag; } mi_nothrow_t;
MI_INTERPOSE_MI(calloc),
MI_INTERPOSE_MI(realloc),
MI_INTERPOSE_MI(strdup),
#if defined(MAC_OS_X_VERSION_10_7) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_7
MI_INTERPOSE_MI(strndup),
#endif
MI_INTERPOSE_MI(realpath),
MI_INTERPOSE_MI(posix_memalign),
MI_INTERPOSE_MI(reallocf),
@ -128,11 +130,19 @@ typedef struct mi_nothrow_s { int _tag; } mi_nothrow_t;
// cannot override malloc unless using a dll.
// we just override new/delete which does work in a static library.
#else
// On all other systems forward to our API
// On all other systems forward allocation primitives to our API
mi_decl_export void* malloc(size_t size) MI_FORWARD1(mi_malloc, size)
mi_decl_export void* calloc(size_t size, size_t n) MI_FORWARD2(mi_calloc, size, n)
mi_decl_export void* realloc(void* p, size_t newsize) MI_FORWARD2(mi_realloc, p, newsize)
mi_decl_export void free(void* p) MI_FORWARD0(mi_free, p)
mi_decl_export void free(void* p) MI_FORWARD0(mi_free, p)
// In principle we do not need to forward `strdup`/`strndup` but on some systems these do not use `malloc` internally (but a more primitive call)
// We only override if `strdup` is not a macro (as on some older libc's, see issue #885)
#if !defined(strdup)
mi_decl_export char* strdup(const char* str) MI_FORWARD1(mi_strdup, str)
#endif
#if !defined(strndup) && (!defined(__APPLE__) || (defined(MAC_OS_X_VERSION_10_7) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_7))
mi_decl_export char* strndup(const char* str, size_t n) MI_FORWARD2(mi_strndup, str, n)
#endif
#endif
#if (defined(__GNUC__) || defined(__clang__)) && !defined(__APPLE__)
@ -192,11 +202,17 @@ typedef struct mi_nothrow_s { int _tag; } mi_nothrow_t;
void _ZdaPv(void* p) MI_FORWARD0(mi_free,p) // delete[]
void _ZdlPvm(void* p, size_t n) MI_FORWARD02(mi_free_size,p,n)
void _ZdaPvm(void* p, size_t n) MI_FORWARD02(mi_free_size,p,n)
void _ZdlPvSt11align_val_t(void* p, size_t al) { mi_free_aligned(p,al); }
void _ZdaPvSt11align_val_t(void* p, size_t al) { mi_free_aligned(p,al); }
void _ZdlPvmSt11align_val_t(void* p, size_t n, size_t al) { mi_free_size_aligned(p,n,al); }
void _ZdaPvmSt11align_val_t(void* p, size_t n, size_t al) { mi_free_size_aligned(p,n,al); }
void _ZdlPvRKSt9nothrow_t(void* p, mi_nothrow_t tag) { MI_UNUSED(tag); mi_free(p); } // operator delete(void*, std::nothrow_t const&)
void _ZdaPvRKSt9nothrow_t(void* p, mi_nothrow_t tag) { MI_UNUSED(tag); mi_free(p); } // operator delete[](void*, std::nothrow_t const&)
void _ZdlPvSt11align_val_tRKSt9nothrow_t(void* p, size_t al, mi_nothrow_t tag) { MI_UNUSED(tag); mi_free_aligned(p,al); } // operator delete(void*, std::align_val_t, std::nothrow_t const&)
void _ZdaPvSt11align_val_tRKSt9nothrow_t(void* p, size_t al, mi_nothrow_t tag) { MI_UNUSED(tag); mi_free_aligned(p,al); } // operator delete[](void*, std::align_val_t, std::nothrow_t const&)
#if (MI_INTPTR_SIZE==8)
void* _Znwm(size_t n) MI_FORWARD1(mi_new,n) // new 64-bit
void* _Znam(size_t n) MI_FORWARD1(mi_new,n) // new[] 64-bit
@ -259,10 +275,11 @@ extern "C" {
// no forwarding here due to aliasing/name mangling issues
void cfree(void* p) { mi_free(p); }
void* pvalloc(size_t size) { return mi_pvalloc(size); }
void* reallocarray(void* p, size_t count, size_t size) { return mi_reallocarray(p, count, size); }
int reallocarr(void* p, size_t count, size_t size) { return mi_reallocarr(p, count, size); }
void* memalign(size_t alignment, size_t size) { return mi_memalign(alignment, size); }
void* _aligned_malloc(size_t alignment, size_t size) { return mi_aligned_alloc(alignment, size); }
void* reallocarray(void* p, size_t count, size_t size) { return mi_reallocarray(p, count, size); }
// some systems define reallocarr so mark it as a weak symbol (#751)
mi_decl_weak int reallocarr(void* p, size_t count, size_t size) { return mi_reallocarr(p, count, size); }
#if defined(__wasi__)
// forward __libc interface (see PR #667)

View file

@ -1,5 +1,5 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2018-2022, Microsoft Research, Daan Leijen
Copyright (c) 2018-2024, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
@ -18,6 +18,7 @@ terms of the MIT license. A copy of the license can be found in the file
#define MI_IN_ALLOC_C
#include "alloc-override.c"
#include "free.c"
#undef MI_IN_ALLOC_C
// ------------------------------------------------------
@ -42,6 +43,25 @@ static void mi_padding_init(mi_page_t* page, mi_block_t* block, size_t size /* b
}
#endif
}
/*
#if MI_PADDING // && !MI_TRACK_ENABLED
mi_padding_t* const padding = (mi_padding_t*)((uint8_t*)block + mi_page_usable_block_size(page));
ptrdiff_t delta = ((uint8_t*)padding - (uint8_t*)block - (size - MI_PADDING_SIZE));
#if (MI_DEBUG>=2)
mi_assert_internal(delta >= 0 && mi_page_usable_block_size(page) >= (size - MI_PADDING_SIZE + delta));
#endif
mi_track_mem_defined(padding,sizeof(mi_padding_t)); // note: re-enable since mi_page_usable_block_size may set noaccess
padding->canary = (uint32_t)(mi_ptr_encode(page,block,page->keys));
padding->delta = (uint32_t)(delta);
#if MI_PADDING_CHECK
if (!mi_page_is_huge(page)) {
uint8_t* fill = (uint8_t*)padding - delta;
const size_t maxpad = (delta > MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : delta); // set at most N initial padding bytes
for (size_t i = 0; i < maxpad; i++) { fill[i] = MI_DEBUG_PADDING; }
}
#endif
#endif
*/
#else
static void mi_padding_init(mi_page_t* page, mi_block_t* block, size_t size) {
MI_UNUSED(page); MI_UNUSED(block); MI_UNUSED(size);
@ -50,16 +70,18 @@ static void mi_padding_init(mi_page_t* page, mi_block_t* block, size_t size) {
// Fast allocation in a page: just pop from the free list.
// Fall back to generic allocation only if the list is empty.
extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept {
mi_assert_internal(page->xblock_size == 0 || mi_page_block_size(page) >= size);
// Note: in release mode the (inlined) routine is about 7 instructions with a single test.
extern inline void* _mi_page_malloc_zero(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept
{
mi_assert_internal(page->block_size == 0 /* empty heap */ || mi_page_block_size(page) >= size);
mi_block_t* const block = page->free;
if mi_unlikely(block == NULL) {
return _mi_malloc_generic(heap, size, zero, 0);
}
mi_assert_internal(block != NULL && _mi_ptr_page(block) == page);
// pop from the free list
page->used++;
page->free = mi_block_next(page, block);
page->used++;
mi_assert_internal(page->free == NULL || _mi_ptr_page(page->free) == page);
#if MI_DEBUG>3
if (page->free_is_zero) {
@ -74,41 +96,49 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz
// zero the block? note: we need to zero the full block size (issue #63)
if mi_unlikely(zero) {
mi_assert_internal(page->xblock_size != 0); // do not call with zero'ing for huge blocks (see _mi_malloc_generic)
mi_assert_internal(page->xblock_size >= MI_PADDING_SIZE);
mi_assert_internal(page->block_size != 0); // do not call with zero'ing for huge blocks (see _mi_malloc_generic)
mi_assert_internal(page->block_size >= MI_PADDING_SIZE);
if (page->free_is_zero) {
block->next = 0;
mi_track_mem_defined(block, page->xblock_size - MI_PADDING_SIZE);
mi_track_mem_defined(block, page->block_size - MI_PADDING_SIZE);
}
else {
_mi_memzero_aligned(block, page->xblock_size - MI_PADDING_SIZE);
_mi_memzero_aligned(block, page->block_size - MI_PADDING_SIZE);
}
}
#if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN
#if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN
if (!zero && !mi_page_is_huge(page)) {
memset(block, MI_DEBUG_UNINIT, mi_page_usable_block_size(page));
}
#elif (MI_SECURE!=0)
#elif (MI_SECURE!=0)
if (!zero) { block->next = 0; } // don't leak internal data
#endif
#endif
#if (MI_STAT>0)
#if (MI_STAT>0)
const size_t bsize = mi_page_usable_block_size(page);
if (bsize <= MI_LARGE_OBJ_SIZE_MAX) {
mi_heap_stat_increase(heap, normal, bsize);
mi_heap_stat_counter_increase(heap, normal_count, 1);
#if (MI_STAT>1)
#if (MI_STAT>1)
const size_t bin = _mi_bin(bsize);
mi_heap_stat_increase(heap, normal_bins[bin], 1);
#endif
#endif
}
#endif
#endif
mi_padding_init(page, block, size - MI_PADDING_SIZE);
return block;
}
// extra entries for improved efficiency in `alloc-aligned.c`.
extern void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept {
return _mi_page_malloc_zero(heap,page,size,false);
}
extern void* _mi_page_malloc_zeroed(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept {
return _mi_page_malloc_zero(heap,page,size,true);
}
static inline mi_decl_restrict void* mi_heap_malloc_small_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept {
mi_assert(heap != NULL);
#if MI_DEBUG
@ -119,9 +149,11 @@ static inline mi_decl_restrict void* mi_heap_malloc_small_zero(mi_heap_t* heap,
#if (MI_PADDING)
if (size == 0) { size = sizeof(void*); }
#endif
mi_page_t* page = _mi_heap_get_free_small_page(heap, size + MI_PADDING_SIZE);
void* const p = _mi_page_malloc(heap, page, size + MI_PADDING_SIZE, zero);
void* const p = _mi_page_malloc_zero(heap, page, size + MI_PADDING_SIZE, zero);
mi_track_malloc(p,size,zero);
#if MI_STAT>1
if (p != NULL) {
if (!mi_heap_is_initialized(heap)) { heap = mi_prim_get_default_heap(); }
@ -196,479 +228,6 @@ mi_decl_nodiscard mi_decl_restrict void* mi_zalloc(size_t size) mi_attr_noexcept
return mi_heap_zalloc(mi_prim_get_default_heap(),size);
}
// ------------------------------------------------------
// Check for double free in secure and debug mode
// This is somewhat expensive so only enabled for secure mode 4
// ------------------------------------------------------
#if (MI_ENCODE_FREELIST && (MI_SECURE>=4 || MI_DEBUG!=0))
// linear check if the free list contains a specific element
static bool mi_list_contains(const mi_page_t* page, const mi_block_t* list, const mi_block_t* elem) {
while (list != NULL) {
if (elem==list) return true;
list = mi_block_next(page, list);
}
return false;
}
static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, const mi_block_t* block) {
// The decoded value is in the same page (or NULL).
// Walk the free lists to verify positively if it is already freed
if (mi_list_contains(page, page->free, block) ||
mi_list_contains(page, page->local_free, block) ||
mi_list_contains(page, mi_page_thread_free(page), block))
{
_mi_error_message(EAGAIN, "double free detected of block %p with size %zu\n", block, mi_page_block_size(page));
return true;
}
return false;
}
#define mi_track_page(page,access) { size_t psize; void* pstart = _mi_page_start(_mi_page_segment(page),page,&psize); mi_track_mem_##access( pstart, psize); }
static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) {
bool is_double_free = false;
mi_block_t* n = mi_block_nextx(page, block, page->keys); // pretend it is freed, and get the decoded first field
if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 && // quick check: aligned pointer?
(n==NULL || mi_is_in_same_page(block, n))) // quick check: in same page or NULL?
{
// Suspicous: decoded value a in block is in the same page (or NULL) -- maybe a double free?
// (continue in separate function to improve code generation)
is_double_free = mi_check_is_double_freex(page, block);
}
return is_double_free;
}
#else
static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) {
MI_UNUSED(page);
MI_UNUSED(block);
return false;
}
#endif
// ---------------------------------------------------------------------------
// Check for heap block overflow by setting up padding at the end of the block
// ---------------------------------------------------------------------------
#if MI_PADDING // && !MI_TRACK_ENABLED
static bool mi_page_decode_padding(const mi_page_t* page, const mi_block_t* block, size_t* delta, size_t* bsize) {
*bsize = mi_page_usable_block_size(page);
const mi_padding_t* const padding = (mi_padding_t*)((uint8_t*)block + *bsize);
mi_track_mem_defined(padding,sizeof(mi_padding_t));
*delta = padding->delta;
uint32_t canary = padding->canary;
uintptr_t keys[2];
keys[0] = page->keys[0];
keys[1] = page->keys[1];
bool ok = ((uint32_t)mi_ptr_encode(page,block,keys) == canary && *delta <= *bsize);
mi_track_mem_noaccess(padding,sizeof(mi_padding_t));
return ok;
}
// Return the exact usable size of a block.
static size_t mi_page_usable_size_of(const mi_page_t* page, const mi_block_t* block) {
size_t bsize;
size_t delta;
bool ok = mi_page_decode_padding(page, block, &delta, &bsize);
mi_assert_internal(ok); mi_assert_internal(delta <= bsize);
return (ok ? bsize - delta : 0);
}
// When a non-thread-local block is freed, it becomes part of the thread delayed free
// list that is freed later by the owning heap. If the exact usable size is too small to
// contain the pointer for the delayed list, then shrink the padding (by decreasing delta)
// so it will later not trigger an overflow error in `mi_free_block`.
void _mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size) {
size_t bsize;
size_t delta;
bool ok = mi_page_decode_padding(page, block, &delta, &bsize);
mi_assert_internal(ok);
if (!ok || (bsize - delta) >= min_size) return; // usually already enough space
mi_assert_internal(bsize >= min_size);
if (bsize < min_size) return; // should never happen
size_t new_delta = (bsize - min_size);
mi_assert_internal(new_delta < bsize);
mi_padding_t* padding = (mi_padding_t*)((uint8_t*)block + bsize);
mi_track_mem_defined(padding,sizeof(mi_padding_t));
padding->delta = (uint32_t)new_delta;
mi_track_mem_noaccess(padding,sizeof(mi_padding_t));
}
#else
static size_t mi_page_usable_size_of(const mi_page_t* page, const mi_block_t* block) {
MI_UNUSED(block);
return mi_page_usable_block_size(page);
}
void _mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size) {
MI_UNUSED(page);
MI_UNUSED(block);
MI_UNUSED(min_size);
}
#endif
#if MI_PADDING && MI_PADDING_CHECK
static bool mi_verify_padding(const mi_page_t* page, const mi_block_t* block, size_t* size, size_t* wrong) {
size_t bsize;
size_t delta;
bool ok = mi_page_decode_padding(page, block, &delta, &bsize);
*size = *wrong = bsize;
if (!ok) return false;
mi_assert_internal(bsize >= delta);
*size = bsize - delta;
if (!mi_page_is_huge(page)) {
uint8_t* fill = (uint8_t*)block + bsize - delta;
const size_t maxpad = (delta > MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : delta); // check at most the first N padding bytes
mi_track_mem_defined(fill, maxpad);
for (size_t i = 0; i < maxpad; i++) {
if (fill[i] != MI_DEBUG_PADDING) {
*wrong = bsize - delta + i;
ok = false;
break;
}
}
mi_track_mem_noaccess(fill, maxpad);
}
return ok;
}
static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) {
size_t size;
size_t wrong;
if (!mi_verify_padding(page,block,&size,&wrong)) {
_mi_error_message(EFAULT, "buffer overflow in heap block %p of size %zu: write after %zu bytes\n", block, size, wrong );
}
}
#else
static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) {
MI_UNUSED(page);
MI_UNUSED(block);
}
#endif
// only maintain stats for smaller objects if requested
#if (MI_STAT>0)
static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) {
#if (MI_STAT < 2)
MI_UNUSED(block);
#endif
mi_heap_t* const heap = mi_heap_get_default();
const size_t bsize = mi_page_usable_block_size(page);
#if (MI_STAT>1)
const size_t usize = mi_page_usable_size_of(page, block);
mi_heap_stat_decrease(heap, malloc, usize);
#endif
if (bsize <= MI_LARGE_OBJ_SIZE_MAX) {
mi_heap_stat_decrease(heap, normal, bsize);
#if (MI_STAT > 1)
mi_heap_stat_decrease(heap, normal_bins[_mi_bin(bsize)], 1);
#endif
}
#if !MI_HUGE_PAGE_ABANDON
else {
const size_t bpsize = mi_page_block_size(page);
if (bpsize <= MI_HUGE_OBJ_SIZE_MAX) {
mi_heap_stat_decrease(heap, huge, bpsize);
}
else {
mi_heap_stat_decrease(heap, giant, bpsize);
}
}
#endif
}
#else
static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) {
MI_UNUSED(page); MI_UNUSED(block);
}
#endif
#if MI_HUGE_PAGE_ABANDON
#if (MI_STAT>0)
// maintain stats for huge objects
static void mi_stat_huge_free(const mi_page_t* page) {
mi_heap_t* const heap = mi_heap_get_default();
const size_t bsize = mi_page_block_size(page); // to match stats in `page.c:mi_page_huge_alloc`
if (bsize <= MI_HUGE_OBJ_SIZE_MAX) {
mi_heap_stat_decrease(heap, huge, bsize);
}
else {
mi_heap_stat_decrease(heap, giant, bsize);
}
}
#else
static void mi_stat_huge_free(const mi_page_t* page) {
MI_UNUSED(page);
}
#endif
#endif
// ------------------------------------------------------
// Free
// ------------------------------------------------------
// multi-threaded free (or free in huge block if compiled with MI_HUGE_PAGE_ABANDON)
static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* block)
{
// The padding check may access the non-thread-owned page for the key values.
// that is safe as these are constant and the page won't be freed (as the block is not freed yet).
mi_check_padding(page, block);
_mi_padding_shrink(page, block, sizeof(mi_block_t)); // for small size, ensure we can fit the delayed thread pointers without triggering overflow detection
mi_segment_t* const segment = _mi_page_segment(page);
if (segment->page_kind == MI_PAGE_HUGE) {
#if MI_HUGE_PAGE_ABANDON
// huge page segments are always abandoned and can be freed immediately
mi_stat_huge_free(page);
_mi_segment_huge_page_free(segment, page, block);
return;
#else
// huge pages are special as they occupy the entire segment
// as these are large we reset the memory occupied by the page so it is available to other threads
// (as the owning thread needs to actually free the memory later).
_mi_segment_huge_page_reset(segment, page, block);
#endif
}
#if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN // note: when tracking, cannot use mi_usable_size with multi-threading
memset(block, MI_DEBUG_FREED, mi_usable_size(block));
#endif
// Try to put the block on either the page-local thread free list, or the heap delayed free list.
mi_thread_free_t tfreex;
bool use_delayed;
mi_thread_free_t tfree = mi_atomic_load_relaxed(&page->xthread_free);
do {
use_delayed = (mi_tf_delayed(tfree) == MI_USE_DELAYED_FREE);
if mi_unlikely(use_delayed) {
// unlikely: this only happens on the first concurrent free in a page that is in the full list
tfreex = mi_tf_set_delayed(tfree,MI_DELAYED_FREEING);
}
else {
// usual: directly add to page thread_free list
mi_block_set_next(page, block, mi_tf_block(tfree));
tfreex = mi_tf_set_block(tfree,block);
}
} while (!mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex));
if mi_unlikely(use_delayed) {
// racy read on `heap`, but ok because MI_DELAYED_FREEING is set (see `mi_heap_delete` and `mi_heap_collect_abandon`)
mi_heap_t* const heap = (mi_heap_t*)(mi_atomic_load_acquire(&page->xheap)); //mi_page_heap(page);
mi_assert_internal(heap != NULL);
if (heap != NULL) {
// add to the delayed free list of this heap. (do this atomically as the lock only protects heap memory validity)
mi_block_t* dfree = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free);
do {
mi_block_set_nextx(heap,block,dfree, heap->keys);
} while (!mi_atomic_cas_ptr_weak_release(mi_block_t,&heap->thread_delayed_free, &dfree, block));
}
// and reset the MI_DELAYED_FREEING flag
tfree = mi_atomic_load_relaxed(&page->xthread_free);
do {
tfreex = tfree;
mi_assert_internal(mi_tf_delayed(tfree) == MI_DELAYED_FREEING);
tfreex = mi_tf_set_delayed(tfree,MI_NO_DELAYED_FREE);
} while (!mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex));
}
}
// regular free
static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block)
{
// and push it on the free list
//const size_t bsize = mi_page_block_size(page);
if mi_likely(local) {
// owning thread can free a block directly
if mi_unlikely(mi_check_is_double_free(page, block)) return;
mi_check_padding(page, block);
#if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN
memset(block, MI_DEBUG_FREED, mi_page_block_size(page));
#endif
mi_block_set_next(page, block, page->local_free);
page->local_free = block;
page->used--;
if mi_unlikely(mi_page_all_free(page)) {
_mi_page_retire(page);
}
else if mi_unlikely(mi_page_is_in_full(page)) {
_mi_page_unfull(page);
}
}
else {
_mi_free_block_mt(page,block);
}
}
// Adjust a block that was allocated aligned, to the actual start of the block in the page.
mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p) {
mi_assert_internal(page!=NULL && p!=NULL);
const size_t diff = (uint8_t*)p - _mi_page_start(segment, page, NULL);
const size_t adjust = (diff % mi_page_block_size(page));
return (mi_block_t*)((uintptr_t)p - adjust);
}
void mi_decl_noinline _mi_free_generic(const mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept {
mi_block_t* const block = (mi_page_has_aligned(page) ? _mi_page_ptr_unalign(segment, page, p) : (mi_block_t*)p);
mi_stat_free(page, block); // stat_free may access the padding
mi_track_free_size(block, mi_page_usable_size_of(page,block));
_mi_free_block(page, is_local, block);
}
// Get the segment data belonging to a pointer
// This is just a single `and` in assembly but does further checks in debug mode
// (and secure mode) if this was a valid pointer.
static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* msg)
{
MI_UNUSED(msg);
mi_assert(p != NULL);
#if (MI_DEBUG>0)
if mi_unlikely(((uintptr_t)p & (MI_INTPTR_SIZE - 1)) != 0) {
_mi_error_message(EINVAL, "%s: invalid (unaligned) pointer: %p\n", msg, p);
return NULL;
}
#endif
mi_segment_t* const segment = _mi_ptr_segment(p);
mi_assert_internal(segment != NULL);
#if (MI_DEBUG>0)
if mi_unlikely(!mi_is_in_heap_region(p)) {
_mi_trace_message("%s: pointer might not point to a valid heap region: %p\n" "(this may still be a valid very large allocation (over 64MiB))\n", msg, p);
if mi_likely(_mi_ptr_cookie(segment) == segment->cookie) {
_mi_trace_message("(yes, the previous pointer %p was valid after all)\n", p);
}
}
#endif
#if (MI_DEBUG>0 || MI_SECURE>=4)
if mi_unlikely(_mi_ptr_cookie(segment) != segment->cookie) {
_mi_error_message(EINVAL, "%s: pointer does not point to a valid heap space: %p\n", msg, p);
return NULL;
}
#endif
return segment;
}
// Free a block
// fast path written carefully to prevent spilling on the stack
void mi_free(void* p) mi_attr_noexcept
{
if mi_unlikely(p == NULL) return;
mi_segment_t* const segment = mi_checked_ptr_segment(p,"mi_free");
const bool is_local= (_mi_prim_thread_id() == mi_atomic_load_relaxed(&segment->thread_id));
mi_page_t* const page = _mi_segment_page_of(segment, p);
if mi_likely(is_local) { // thread-local free?
if mi_likely(page->flags.full_aligned == 0) // and it is not a full page (full pages need to move from the full bin), nor has aligned blocks (aligned blocks need to be unaligned)
{
mi_block_t* const block = (mi_block_t*)p;
if mi_unlikely(mi_check_is_double_free(page, block)) return;
mi_check_padding(page, block);
mi_stat_free(page, block);
#if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN
memset(block, MI_DEBUG_FREED, mi_page_block_size(page));
#endif
mi_track_free_size(p, mi_page_usable_size_of(page,block)); // faster then mi_usable_size as we already know the page and that p is unaligned
mi_block_set_next(page, block, page->local_free);
page->local_free = block;
if mi_unlikely(--page->used == 0) { // using this expression generates better code than: page->used--; if (mi_page_all_free(page))
_mi_page_retire(page);
}
}
else {
// page is full or contains (inner) aligned blocks; use generic path
_mi_free_generic(segment, page, true, p);
}
}
else {
// not thread-local; use generic path
_mi_free_generic(segment, page, false, p);
}
}
// return true if successful
bool _mi_free_delayed_block(mi_block_t* block) {
// get segment and page
const mi_segment_t* const segment = _mi_ptr_segment(block);
mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie);
mi_assert_internal(_mi_thread_id() == segment->thread_id);
mi_page_t* const page = _mi_segment_page_of(segment, block);
// Clear the no-delayed flag so delayed freeing is used again for this page.
// This must be done before collecting the free lists on this page -- otherwise
// some blocks may end up in the page `thread_free` list with no blocks in the
// heap `thread_delayed_free` list which may cause the page to be never freed!
// (it would only be freed if we happen to scan it in `mi_page_queue_find_free_ex`)
if (!_mi_page_try_use_delayed_free(page, MI_USE_DELAYED_FREE, false /* dont overwrite never delayed */)) {
return false;
}
// collect all other non-local frees to ensure up-to-date `used` count
_mi_page_free_collect(page, false);
// and free the block (possibly freeing the page as well since used is updated)
_mi_free_block(page, true, block);
return true;
}
// Bytes available in a block
mi_decl_noinline static size_t mi_page_usable_aligned_size_of(const mi_segment_t* segment, const mi_page_t* page, const void* p) mi_attr_noexcept {
const mi_block_t* block = _mi_page_ptr_unalign(segment, page, p);
const size_t size = mi_page_usable_size_of(page, block);
const ptrdiff_t adjust = (uint8_t*)p - (uint8_t*)block;
mi_assert_internal(adjust >= 0 && (size_t)adjust <= size);
return (size - adjust);
}
static inline size_t _mi_usable_size(const void* p, const char* msg) mi_attr_noexcept {
if (p == NULL) return 0;
const mi_segment_t* const segment = mi_checked_ptr_segment(p, msg);
const mi_page_t* const page = _mi_segment_page_of(segment, p);
if mi_likely(!mi_page_has_aligned(page)) {
const mi_block_t* block = (const mi_block_t*)p;
return mi_page_usable_size_of(page, block);
}
else {
// split out to separate routine for improved code generation
return mi_page_usable_aligned_size_of(segment, page, p);
}
}
mi_decl_nodiscard size_t mi_usable_size(const void* p) mi_attr_noexcept {
return _mi_usable_size(p, "mi_usable_size");
}
// ------------------------------------------------------
// Allocation extensions
// ------------------------------------------------------
void mi_free_size(void* p, size_t size) mi_attr_noexcept {
MI_UNUSED_RELEASE(size);
mi_assert(p == NULL || size <= _mi_usable_size(p,"mi_free_size"));
mi_free(p);
}
void mi_free_size_aligned(void* p, size_t size, size_t alignment) mi_attr_noexcept {
MI_UNUSED_RELEASE(alignment);
mi_assert(((uintptr_t)p % alignment) == 0);
mi_free_size(p,size);
}
void mi_free_aligned(void* p, size_t alignment) mi_attr_noexcept {
MI_UNUSED_RELEASE(alignment);
mi_assert(((uintptr_t)p % alignment) == 0);
mi_free(p);
}
mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_calloc(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept {
size_t total;
if (mi_count_size_overflow(count,size,&total)) return NULL;
@ -713,7 +272,7 @@ void* mi_expand(void* p, size_t newsize) mi_attr_noexcept
// shrink padding
mi_page_t* page = _mi_segment_page_of(segment, p);
mi_block_t* block = _mi_page_ptr_unalign(segment, page, p);
mi_block_t* block = _mi_page_ptr_unalign(page, p);
mi_padding_init(page, block, newsize);
mi_track_resize(p, size, newsize);
@ -726,7 +285,7 @@ static void* mi_heap_try_remap_zero(mi_heap_t* heap, mi_segment_t* segment, void
static void mi_padding_init_ptr(void* p, size_t size) {
mi_segment_t* segment = mi_checked_ptr_segment(p, "_mi_padding_init_ptr");
mi_page_t* page = _mi_segment_page_of(segment, p);
mi_block_t* block = _mi_page_ptr_unalign(segment, page, p);
mi_block_t* block = _mi_page_ptr_unalign(page, p);
mi_padding_init(page, block, size);
}
@ -881,7 +440,7 @@ static void* mi_heap_try_expand_zero(mi_heap_t* heap, mi_segment_t* segment, voi
const size_t padsize = newsize + MI_PADDING_SIZE;
mi_assert_internal(segment != NULL);
mi_page_t* page = _mi_segment_page_of(segment, p);
mi_block_t* block = _mi_page_ptr_unalign(segment, page, p);
mi_block_t* block = _mi_page_ptr_unalign(page, p);
// try to use OS expand
mi_assert_internal((void*)block == p);
@ -961,7 +520,7 @@ static void* mi_heap_try_remap_zero(mi_heap_t* heap, mi_segment_t* segment, void
const size_t padsize = newsize + MI_PADDING_SIZE;
mi_assert_internal(segment != NULL);
mi_page_t* page = _mi_segment_page_of(segment, p);
mi_block_t* block = _mi_page_ptr_unalign(segment, page, p);
mi_block_t* block = _mi_page_ptr_unalign(page, p);
const size_t bsize = mi_page_usable_block_size(page);
if (bsize >= padsize && 9*(bsize/10) <= padsize) { // if smaller and not more than 10% waste, keep it
_mi_verbose_message("remapping in the same block (address: %p from %zu bytes to %zu bytes)\n", p, mi_usable_size(p), newsize);
@ -1076,7 +635,8 @@ char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name)
char* rname = realpath(fname, NULL);
if (rname == NULL) return NULL;
char* result = mi_heap_strdup(heap, rname);
free(rname); // use regular free! (which may be redirected to our free but that's ok)
mi_cfree(rname); // use checked free (which may be redirected to our free but that's ok)
// note: with ASAN realpath is intercepted and mi_cfree may leak the returned pointer :-(
return result;
}
/*
@ -1120,9 +680,13 @@ static bool mi_try_new_handler(bool nothrow) {
#endif
if (h==NULL) {
_mi_error_message(ENOMEM, "out of memory in 'new'");
#if defined(_CPPUNWIND) || defined(__cpp_exceptions) // exceptions are not always enabled
if (!nothrow) {
throw std::bad_alloc();
}
#else
MI_UNUSED(nothrow);
#endif
return false;
}
else {

View file

@ -13,7 +13,7 @@ threads and need to be accessed using atomic operations.
Arenas are used to for huge OS page (1GiB) reservations or for reserving
OS memory upfront which can be improve performance or is sometimes needed
on embedded devices. We can also employ this with WASI or `sbrk` systems
on embedded devices. We can also employ this with WASI or `sbrk` systems
to reserve large arenas upfront and be able to reuse the memory more effectively.
The arena allocation needs to be thread safe and we use an atomic bitmap to allocate.
@ -48,13 +48,14 @@ typedef struct mi_arena_s {
size_t meta_size; // size of the arena structure itself (including its bitmaps)
mi_memid_t meta_memid; // memid of the arena structure itself (OS or static allocation)
int numa_node; // associated NUMA node
bool exclusive; // only allow allocations if specifically for this arena
bool exclusive; // only allow allocations if specifically for this arena
bool is_large; // memory area consists of large- or huge OS pages (always committed)
_Atomic(size_t) search_idx; // optimization to start the search for free blocks
_Atomic(mi_msecs_t) purge_expire; // expiration time when blocks should be decommitted from `blocks_decommit`.
_Atomic(mi_msecs_t) purge_expire; // expiration time when blocks should be decommitted from `blocks_decommit`.
mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero?
mi_bitmap_field_t* blocks_committed; // are the blocks committed? (can be NULL for memory that cannot be decommitted)
mi_bitmap_field_t* blocks_purge; // blocks that can be (reset) decommitted. (can be NULL for memory that cannot be (reset) decommitted)
mi_bitmap_field_t* blocks_purge; // blocks that can be (reset) decommitted. (can be NULL for memory that cannot be (reset) decommitted)
mi_bitmap_field_t* blocks_abandoned; // blocks that start with an abandoned segment. (This crosses API's but it is convenient to have here)
mi_bitmap_field_t blocks_inuse[1]; // in-place bitmap of in-use blocks (of size `field_count`)
} mi_arena_t;
@ -94,13 +95,13 @@ bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_id_t request_arena_i
return mi_arena_id_is_suitable(memid.mem.arena.id, memid.mem.arena.is_exclusive, request_arena_id);
}
else {
return mi_arena_id_is_suitable(0, false, request_arena_id);
return mi_arena_id_is_suitable(_mi_arena_id_none(), false, request_arena_id);
}
}
/* -----------------------------------------------------------
Arena allocations get a (currently) 16-bit memory id where the
Arena allocations get a (currently) 16-bit memory id where the
lower 8 bits are the arena id, and the upper bits the block index.
----------------------------------------------------------- */
@ -162,6 +163,7 @@ static void* mi_arena_static_zalloc(size_t size, size_t alignment, mi_memid_t* m
// success
*memid = _mi_memid_create(MI_MEM_STATIC);
memid->initially_zero = true;
const size_t start = _mi_align_up(oldtop, alignment);
uint8_t* const p = &mi_arena_static[start];
_mi_memzero(p, size);
@ -179,8 +181,10 @@ static void* mi_arena_meta_zalloc(size_t size, mi_memid_t* memid, mi_stats_t* st
p = _mi_os_alloc(size, memid, stats);
if (p == NULL) return NULL;
// zero the OS memory if needed
if (!memid->initially_zero) {
_mi_memzero_aligned(p, size);
memid->initially_zero = true;
}
return p;
}
@ -204,11 +208,11 @@ static void* mi_arena_block_start(mi_arena_t* arena, mi_bitmap_index_t bindex) {
----------------------------------------------------------- */
// claim the `blocks_inuse` bits
static bool mi_arena_try_claim(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx)
static bool mi_arena_try_claim(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx, mi_stats_t* stats)
{
size_t idx = 0; // mi_atomic_load_relaxed(&arena->search_idx); // start from last search; ok to be relaxed as the exact start does not matter
if (_mi_bitmap_try_find_from_claim_across(arena->blocks_inuse, arena->field_count, idx, blocks, bitmap_idx)) {
mi_atomic_store_relaxed(&arena->search_idx, mi_bitmap_index_field(*bitmap_idx)); // start search from found location next time around
if (_mi_bitmap_try_find_from_claim_across(arena->blocks_inuse, arena->field_count, idx, blocks, bitmap_idx, stats)) {
mi_atomic_store_relaxed(&arena->search_idx, mi_bitmap_index_field(*bitmap_idx)); // start search from found location next time around
return true;
};
return false;
@ -226,9 +230,9 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t ar
mi_assert_internal(mi_arena_id_index(arena->id) == arena_index);
mi_bitmap_index_t bitmap_index;
if (!mi_arena_try_claim(arena, needed_bcount, &bitmap_index)) return NULL;
if (!mi_arena_try_claim(arena, needed_bcount, &bitmap_index, tld->stats)) return NULL;
// claimed it!
// claimed it!
void* p = mi_arena_block_start(arena, bitmap_index);
*memid = mi_memid_create_arena(arena->id, arena->exclusive, bitmap_index);
memid->is_pinned = arena->memid.is_pinned;
@ -268,21 +272,21 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t ar
// no need to commit, but check if already fully committed
memid->initially_committed = _mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index);
}
return p;
}
// allocate in a speficic arena
static void* mi_arena_try_alloc_at_id(mi_arena_id_t arena_id, bool match_numa_node, int numa_node, size_t size, size_t alignment,
bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld )
static void* mi_arena_try_alloc_at_id(mi_arena_id_t arena_id, bool match_numa_node, int numa_node, size_t size, size_t alignment,
bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld )
{
MI_UNUSED_RELEASE(alignment);
mi_assert_internal(alignment <= MI_SEGMENT_ALIGN);
const size_t bcount = mi_block_count_of_size(size);
const size_t bcount = mi_block_count_of_size(size);
const size_t arena_index = mi_arena_id_index(arena_id);
mi_assert_internal(arena_index < mi_atomic_load_relaxed(&mi_arena_count));
mi_assert_internal(size <= mi_arena_block_size(bcount));
// Check arena suitability
mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_index]);
if (arena == NULL) return NULL;
@ -302,7 +306,7 @@ static void* mi_arena_try_alloc_at_id(mi_arena_id_t arena_id, bool match_numa_no
// allocate from an arena with fallback to the OS
static mi_decl_noinline void* mi_arena_try_alloc(int numa_node, size_t size, size_t alignment,
static mi_decl_noinline void* mi_arena_try_alloc(int numa_node, size_t size, size_t alignment,
bool commit, bool allow_large,
mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld )
{
@ -310,9 +314,9 @@ static mi_decl_noinline void* mi_arena_try_alloc(int numa_node, size_t size, siz
mi_assert_internal(alignment <= MI_SEGMENT_ALIGN);
const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count);
if mi_likely(max_arena == 0) return NULL;
if (req_arena_id != _mi_arena_id_none()) {
// try a specific arena if requested
// try a specific arena if requested
if (mi_arena_id_index(req_arena_id) < max_arena) {
void* p = mi_arena_try_alloc_at_id(req_arena_id, true, numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld);
if (p != NULL) return p;
@ -320,7 +324,7 @@ static mi_decl_noinline void* mi_arena_try_alloc(int numa_node, size_t size, siz
}
else {
// try numa affine allocation
for (size_t i = 0; i < max_arena; i++) {
for (size_t i = 0; i < max_arena; i++) {
void* p = mi_arena_try_alloc_at_id(mi_arena_id_create(i), true, numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld);
if (p != NULL) return p;
}
@ -348,22 +352,22 @@ static bool mi_arena_reserve(size_t req_size, bool allow_large, mi_arena_id_t re
size_t arena_reserve = mi_option_get_size(mi_option_arena_reserve);
if (arena_reserve == 0) return false;
if (!_mi_os_has_virtual_reserve()) {
arena_reserve = arena_reserve/4; // be conservative if virtual reserve is not supported (for some embedded systems for example)
if (!_mi_os_has_virtual_reserve()) {
arena_reserve = arena_reserve/4; // be conservative if virtual reserve is not supported (for WASM for example)
}
arena_reserve = _mi_align_up(arena_reserve, MI_ARENA_BLOCK_SIZE);
if (arena_count >= 8 && arena_count <= 128) {
arena_reserve = ((size_t)1<<(arena_count/8)) * arena_reserve; // scale up the arena sizes exponentially
}
}
if (arena_reserve < req_size) return false; // should be able to at least handle the current allocation size
// commit eagerly?
bool arena_commit = false;
if (mi_option_get(mi_option_arena_eager_commit) == 2) { arena_commit = _mi_os_has_overcommit(); }
else if (mi_option_get(mi_option_arena_eager_commit) == 1) { arena_commit = true; }
return (mi_reserve_os_memory_ex(arena_reserve, arena_commit, allow_large, false /* exclusive */, arena_id) == 0);
}
return (mi_reserve_os_memory_ex(arena_reserve, arena_commit, allow_large, false /* exclusive? */, arena_id) == 0);
}
void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large,
@ -376,35 +380,37 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset
const int numa_node = _mi_os_numa_node(tld); // current numa node
// try to allocate in an arena if the alignment is small enough and the object is not too small (as for heap meta data)
if (size >= MI_ARENA_MIN_OBJ_SIZE && alignment <= MI_SEGMENT_ALIGN && align_offset == 0) {
void* p = mi_arena_try_alloc(numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld);
if (p != NULL) return p;
if (!mi_option_is_enabled(mi_option_disallow_arena_alloc) || req_arena_id != _mi_arena_id_none()) { // is arena allocation allowed?
if (size >= MI_ARENA_MIN_OBJ_SIZE && alignment <= MI_SEGMENT_ALIGN && align_offset == 0) {
void* p = mi_arena_try_alloc(numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld);
if (p != NULL) return p;
// otherwise, try to first eagerly reserve a new arena
if (req_arena_id == _mi_arena_id_none()) {
mi_arena_id_t arena_id = 0;
if (mi_arena_reserve(size, allow_large, req_arena_id, &arena_id)) {
// and try allocate in there
mi_assert_internal(req_arena_id == _mi_arena_id_none());
p = mi_arena_try_alloc_at_id(arena_id, true, numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld);
if (p != NULL) return p;
// otherwise, try to first eagerly reserve a new arena
if (req_arena_id == _mi_arena_id_none()) {
mi_arena_id_t arena_id = 0;
if (mi_arena_reserve(size, allow_large, req_arena_id, &arena_id)) {
// and try allocate in there
mi_assert_internal(req_arena_id == _mi_arena_id_none());
p = mi_arena_try_alloc_at_id(arena_id, true, numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld);
if (p != NULL) return p;
}
}
}
}
// if we cannot use OS allocation, return NULL
if (mi_option_is_enabled(mi_option_limit_os_alloc) || req_arena_id != _mi_arena_id_none()) {
if (mi_option_is_enabled(mi_option_disallow_os_alloc) || req_arena_id != _mi_arena_id_none()) {
errno = ENOMEM;
return NULL;
}
// finally, fall back to the OS
if (align_offset > 0) {
return _mi_os_alloc_aligned_at_offset(size, alignment, align_offset, commit, allow_large, memid, tld->stats);
}
else {
return _mi_os_alloc_aligned(size, alignment, commit, allow_large, memid, tld->stats);
}
}
}
void* _mi_arena_alloc(size_t size, bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld)
@ -440,22 +446,22 @@ static void mi_arena_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks,
mi_assert_internal(arena->blocks_purge != NULL);
mi_assert_internal(!arena->memid.is_pinned);
const size_t size = mi_arena_block_size(blocks);
void* const p = mi_arena_block_start(arena, bitmap_idx);
void* const p = mi_arena_block_start(arena, bitmap_idx);
bool needs_recommit;
if (_mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx)) {
// all blocks are committed, we can purge freely
needs_recommit = _mi_os_purge(p, size, stats);
}
else {
// some blocks are not committed -- this can happen when a partially committed block is freed
// some blocks are not committed -- this can happen when a partially committed block is freed
// in `_mi_arena_free` and it is conservatively marked as uncommitted but still scheduled for a purge
// we need to ensure we do not try to reset (as that may be invalid for uncommitted memory),
// we need to ensure we do not try to reset (as that may be invalid for uncommitted memory),
// and also undo the decommit stats (as it was already adjusted)
mi_assert_internal(mi_option_is_enabled(mi_option_purge_decommits));
needs_recommit = _mi_os_purge_ex(p, size, false /* allow reset? */, stats);
_mi_stat_increase(&stats->committed, size);
if (needs_recommit) { _mi_stat_increase(&_mi_stats_main.committed, size); }
}
// clear the purged blocks
_mi_bitmap_unclaim_across(arena->blocks_purge, arena->field_count, blocks, bitmap_idx);
// update committed bitmap
@ -473,13 +479,13 @@ static void mi_arena_schedule_purge(mi_arena_t* arena, size_t bitmap_idx, size_t
if (_mi_preloading() || delay == 0) {
// decommit directly
mi_arena_purge(arena, bitmap_idx, blocks, stats);
mi_arena_purge(arena, bitmap_idx, blocks, stats);
}
else {
// schedule decommit
mi_msecs_t expire = mi_atomic_loadi64_relaxed(&arena->purge_expire);
if (expire != 0) {
mi_atomic_addi64_acq_rel(&arena->purge_expire, delay/10); // add smallish extra delay
mi_atomic_addi64_acq_rel(&arena->purge_expire, (mi_msecs_t)(delay/10)); // add smallish extra delay
}
else {
mi_atomic_storei64_release(&arena->purge_expire, _mi_clock_now() + delay);
@ -515,7 +521,7 @@ static bool mi_arena_purge_range(mi_arena_t* arena, size_t idx, size_t startidx,
}
// returns true if anything was purged
static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force, mi_stats_t* stats)
static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force, mi_stats_t* stats)
{
if (arena->memid.is_pinned || arena->blocks_purge == NULL) return false;
mi_msecs_t expire = mi_atomic_loadi64_relaxed(&arena->purge_expire);
@ -523,11 +529,11 @@ static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force, mi
if (!force && expire > now) return false;
// reset expire (if not already set concurrently)
mi_atomic_casi64_strong_acq_rel(&arena->purge_expire, &expire, 0);
mi_atomic_casi64_strong_acq_rel(&arena->purge_expire, &expire, (mi_msecs_t)0);
// potential purges scheduled, walk through the bitmap
bool any_purged = false;
bool full_purge = true;
bool full_purge = true;
for (size_t i = 0; i < arena->field_count; i++) {
size_t purge = mi_atomic_load_relaxed(&arena->blocks_purge[i]);
if (purge != 0) {
@ -578,7 +584,7 @@ static void mi_arenas_try_purge( bool force, bool visit_all, mi_stats_t* stats )
// allow only one thread to purge at a time
static mi_atomic_guard_t purge_guard;
mi_atomic_guard(&purge_guard)
mi_atomic_guard(&purge_guard)
{
mi_msecs_t now = _mi_clock_now();
size_t max_purge_count = (visit_all ? max_arena : 1);
@ -591,7 +597,7 @@ static void mi_arenas_try_purge( bool force, bool visit_all, mi_stats_t* stats )
}
}
}
}
}
}
@ -605,12 +611,12 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi
if (p==NULL) return;
if (size==0) return;
const bool all_committed = (committed_size == size);
if (mi_memkind_is_os(memid.memkind)) {
// was a direct OS allocation, pass through
if (!all_committed && committed_size > 0) {
// if partially committed, adjust the committed stats (as `_mi_os_free` will increase decommit by the full size)
_mi_stat_decrease(&stats->committed, committed_size);
_mi_stat_decrease(&_mi_stats_main.committed, committed_size);
}
_mi_os_free(p, size, memid, stats);
}
@ -623,15 +629,15 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi
mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t,&mi_arenas[arena_idx]);
mi_assert_internal(arena != NULL);
const size_t blocks = mi_block_count_of_size(size);
// checks
if (arena == NULL) {
_mi_error_message(EINVAL, "trying to free from non-existent arena: %p, size %zu, memid: 0x%zx\n", p, size, memid);
_mi_error_message(EINVAL, "trying to free from an invalid arena: %p, size %zu, memid: 0x%zx\n", p, size, memid);
return;
}
mi_assert_internal(arena->field_count > mi_bitmap_index_field(bitmap_idx));
if (arena->field_count <= mi_bitmap_index_field(bitmap_idx)) {
_mi_error_message(EINVAL, "trying to free from non-existent arena block: %p, size %zu, memid: 0x%zx\n", p, size, memid);
_mi_error_message(EINVAL, "trying to free from an invalid arena block: %p, size %zu, memid: 0x%zx\n", p, size, memid);
return;
}
@ -645,7 +651,7 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi
else {
mi_assert_internal(arena->blocks_committed != NULL);
mi_assert_internal(arena->blocks_purge != NULL);
if (!all_committed) {
// mark the entire range as no longer committed (so we recommit the full range when re-using)
_mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx);
@ -653,16 +659,16 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi
if (committed_size > 0) {
// if partially committed, adjust the committed stats (is it will be recommitted when re-using)
// in the delayed purge, we now need to not count a decommit if the range is not marked as committed.
_mi_stat_decrease(&stats->committed, committed_size);
_mi_stat_decrease(&_mi_stats_main.committed, committed_size);
}
// note: if not all committed, it may be that the purge will reset/decommit the entire range
// that contains already decommitted parts. Since purge consistently uses reset or decommit that
// works (as we should never reset decommitted parts).
}
// (delay) purge the entire range
mi_arena_schedule_purge(arena, bitmap_idx, blocks, stats);
mi_arena_schedule_purge(arena, bitmap_idx, blocks, stats);
}
// and make it available to others again
bool all_inuse = _mi_bitmap_unclaim_across(arena->blocks_inuse, arena->field_count, blocks, bitmap_idx);
if (!all_inuse) {
@ -687,9 +693,9 @@ static void mi_arenas_unsafe_destroy(void) {
for (size_t i = 0; i < max_arena; i++) {
mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]);
if (arena != NULL) {
if (arena->start != NULL && mi_memkind_is_os(arena->memid.memkind)) {
if (arena->start != NULL && mi_memkind_is_os(arena->memid.memkind)) {
mi_atomic_store_ptr_release(mi_arena_t, &mi_arenas[i], NULL);
_mi_os_free(arena->start, mi_arena_size(arena), arena->memid, &_mi_stats_main);
_mi_os_free(arena->start, mi_arena_size(arena), arena->memid, &_mi_stats_main);
}
else {
new_max_arena = i;
@ -704,15 +710,15 @@ static void mi_arenas_unsafe_destroy(void) {
}
// Purge the arenas; if `force_purge` is true, amenable parts are purged even if not yet expired
void _mi_arena_collect(bool force_purge, mi_stats_t* stats) {
mi_arenas_try_purge(force_purge, true /* visit all */, stats);
void _mi_arenas_collect(bool force_purge, mi_stats_t* stats) {
mi_arenas_try_purge(force_purge, force_purge /* visit all? */, stats);
}
// destroy owned arenas; this is unsafe and should only be done using `mi_option_destroy_on_exit`
// for dynamic libraries that are unloaded and need to release all their allocated memory.
void _mi_arena_unsafe_destroy_all(mi_stats_t* stats) {
mi_arenas_unsafe_destroy();
_mi_arena_collect(true /* force purge */, stats); // purge non-owned arenas
_mi_arenas_collect(true /* force purge */, stats); // purge non-owned arenas
}
// Is a pointer inside any of our arenas?
@ -720,19 +726,151 @@ bool _mi_arena_contains(const void* p) {
const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count);
for (size_t i = 0; i < max_arena; i++) {
mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]);
if (arena != NULL && arena->start <= (const uint8_t*)p && arena->start + mi_arena_block_size(arena->block_count) > (const uint8_t*)p) {
return true;
if (arena != NULL && arena->start <= (const uint8_t*)p && arena->start + mi_arena_block_size(arena->block_count) > (const uint8_t*)p) {
return true;
}
}
return false;
}
/* -----------------------------------------------------------
Abandoned blocks/segments.
This is used to atomically abandon/reclaim segments
(and crosses the arena API but it is convenient to have here).
Abandoned segments still have live blocks; they get reclaimed
when a thread frees a block in it, or when a thread needs a fresh
segment; these threads scan the abandoned segments through
the arena bitmaps.
----------------------------------------------------------- */
// Maintain a count of all abandoned segments
static mi_decl_cache_align _Atomic(size_t)abandoned_count;
size_t _mi_arena_segment_abandoned_count(void) {
return mi_atomic_load_relaxed(&abandoned_count);
}
// reclaim a specific abandoned segment; `true` on success.
// sets the thread_id.
bool _mi_arena_segment_clear_abandoned(mi_segment_t* segment )
{
if (segment->memid.memkind != MI_MEM_ARENA) {
// not in an arena, consider it un-abandoned now.
// but we need to still claim it atomically -- we use the thread_id for that.
size_t expected = 0;
if (mi_atomic_cas_strong_acq_rel(&segment->thread_id, &expected, _mi_thread_id())) {
mi_atomic_decrement_relaxed(&abandoned_count);
return true;
}
else {
return false;
}
}
// arena segment: use the blocks_abandoned bitmap.
size_t arena_idx;
size_t bitmap_idx;
mi_arena_memid_indices(segment->memid, &arena_idx, &bitmap_idx);
mi_assert_internal(arena_idx < MI_MAX_ARENAS);
mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_idx]);
mi_assert_internal(arena != NULL);
bool was_marked = _mi_bitmap_unclaim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx);
if (was_marked) {
mi_assert_internal(mi_atomic_load_relaxed(&segment->thread_id) == 0);
mi_atomic_decrement_relaxed(&abandoned_count);
mi_atomic_store_release(&segment->thread_id, _mi_thread_id());
}
// mi_assert_internal(was_marked);
mi_assert_internal(!was_marked || _mi_bitmap_is_claimed(arena->blocks_inuse, arena->field_count, 1, bitmap_idx));
//mi_assert_internal(arena->blocks_committed == NULL || _mi_bitmap_is_claimed(arena->blocks_committed, arena->field_count, 1, bitmap_idx));
return was_marked;
}
// mark a specific segment as abandoned
// clears the thread_id.
void _mi_arena_segment_mark_abandoned(mi_segment_t* segment)
{
mi_atomic_store_release(&segment->thread_id, 0);
mi_assert_internal(segment->used == segment->abandoned);
if (segment->memid.memkind != MI_MEM_ARENA) {
// not in an arena; count it as abandoned and return
mi_atomic_increment_relaxed(&abandoned_count);
return;
}
size_t arena_idx;
size_t bitmap_idx;
mi_arena_memid_indices(segment->memid, &arena_idx, &bitmap_idx);
mi_assert_internal(arena_idx < MI_MAX_ARENAS);
mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_idx]);
mi_assert_internal(arena != NULL);
const bool was_unmarked = _mi_bitmap_claim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx, NULL);
if (was_unmarked) { mi_atomic_increment_relaxed(&abandoned_count); }
mi_assert_internal(was_unmarked);
mi_assert_internal(_mi_bitmap_is_claimed(arena->blocks_inuse, arena->field_count, 1, bitmap_idx));
}
// start a cursor at a randomized arena
void _mi_arena_field_cursor_init(mi_heap_t* heap, mi_arena_field_cursor_t* current) {
const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count);
current->start = (max_arena == 0 ? 0 : (mi_arena_id_t)( _mi_heap_random_next(heap) % max_arena));
current->count = 0;
current->bitmap_idx = 0;
}
// reclaim abandoned segments
// this does not set the thread id (so it appears as still abandoned)
mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_field_cursor_t* previous )
{
const int max_arena = (int)mi_atomic_load_relaxed(&mi_arena_count);
if (max_arena <= 0 || mi_atomic_load_relaxed(&abandoned_count) == 0) return NULL;
int count = previous->count;
size_t field_idx = mi_bitmap_index_field(previous->bitmap_idx);
size_t bit_idx = mi_bitmap_index_bit_in_field(previous->bitmap_idx) + 1;
// visit arena's (from previous)
for (; count < max_arena; count++, field_idx = 0, bit_idx = 0) {
mi_arena_id_t arena_idx = previous->start + count;
if (arena_idx >= max_arena) { arena_idx = arena_idx % max_arena; } // wrap around
mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_idx]);
if (arena != NULL) {
// visit the abandoned fields (starting at previous_idx)
for ( ; field_idx < arena->field_count; field_idx++, bit_idx = 0) {
size_t field = mi_atomic_load_relaxed(&arena->blocks_abandoned[field_idx]);
if mi_unlikely(field != 0) { // skip zero fields quickly
// visit each set bit in the field (todo: maybe use `ctz` here?)
for ( ; bit_idx < MI_BITMAP_FIELD_BITS; bit_idx++) {
// pre-check if the bit is set
size_t mask = ((size_t)1 << bit_idx);
if mi_unlikely((field & mask) == mask) {
mi_bitmap_index_t bitmap_idx = mi_bitmap_index_create(field_idx, bit_idx);
// try to reclaim it atomically
if (_mi_bitmap_unclaim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx)) {
mi_atomic_decrement_relaxed(&abandoned_count);
previous->bitmap_idx = bitmap_idx;
previous->count = count;
mi_assert_internal(_mi_bitmap_is_claimed(arena->blocks_inuse, arena->field_count, 1, bitmap_idx));
mi_segment_t* segment = (mi_segment_t*)mi_arena_block_start(arena, bitmap_idx);
mi_assert_internal(mi_atomic_load_relaxed(&segment->thread_id) == 0);
//mi_assert_internal(arena->blocks_committed == NULL || _mi_bitmap_is_claimed(arena->blocks_committed, arena->field_count, 1, bitmap_idx));
return segment;
}
}
}
}
}
}
}
// no more found
previous->bitmap_idx = 0;
previous->count = 0;
return NULL;
}
/* -----------------------------------------------------------
Add an arena.
----------------------------------------------------------- */
static bool mi_arena_add(mi_arena_t* arena, mi_arena_id_t* arena_id) {
static bool mi_arena_add(mi_arena_t* arena, mi_arena_id_t* arena_id, mi_stats_t* stats) {
mi_assert_internal(arena != NULL);
mi_assert_internal((uintptr_t)mi_atomic_load_ptr_relaxed(uint8_t,&arena->start) % MI_SEGMENT_ALIGN == 0);
mi_assert_internal(arena->block_count > 0);
@ -743,6 +881,7 @@ static bool mi_arena_add(mi_arena_t* arena, mi_arena_id_t* arena_id) {
mi_atomic_decrement_acq_rel(&mi_arena_count);
return false;
}
_mi_stat_counter_increase(&stats->arena_count,1);
arena->id = mi_arena_id_create(i);
mi_atomic_store_ptr_release(mi_arena_t,&mi_arenas[i], arena);
if (arena_id != NULL) { *arena_id = arena->id; }
@ -760,13 +899,13 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int
const size_t bcount = size / MI_ARENA_BLOCK_SIZE;
const size_t fields = _mi_divide_up(bcount, MI_BITMAP_FIELD_BITS);
const size_t bitmaps = (memid.is_pinned ? 2 : 4);
const size_t bitmaps = (memid.is_pinned ? 3 : 5);
const size_t asize = sizeof(mi_arena_t) + (bitmaps*fields*sizeof(mi_bitmap_field_t));
mi_memid_t meta_memid;
mi_arena_t* arena = (mi_arena_t*)mi_arena_meta_zalloc(asize, &meta_memid, &_mi_stats_main); // TODO: can we avoid allocating from the OS?
if (arena == NULL) return false;
// already zero'd due to os_alloc
// already zero'd due to zalloc
// _mi_memzero(arena, asize);
arena->id = _mi_arena_id_none();
arena->memid = memid;
@ -780,14 +919,16 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int
arena->is_large = is_large;
arena->purge_expire = 0;
arena->search_idx = 0;
arena->blocks_dirty = &arena->blocks_inuse[fields]; // just after inuse bitmap
arena->blocks_committed = (arena->memid.is_pinned ? NULL : &arena->blocks_inuse[2*fields]); // just after dirty bitmap
arena->blocks_purge = (arena->memid.is_pinned ? NULL : &arena->blocks_inuse[3*fields]); // just after committed bitmap
// consequetive bitmaps
arena->blocks_dirty = &arena->blocks_inuse[fields]; // just after inuse bitmap
arena->blocks_abandoned = &arena->blocks_inuse[2 * fields]; // just after dirty bitmap
arena->blocks_committed = (arena->memid.is_pinned ? NULL : &arena->blocks_inuse[3*fields]); // just after abandoned bitmap
arena->blocks_purge = (arena->memid.is_pinned ? NULL : &arena->blocks_inuse[4*fields]); // just after committed bitmap
// initialize committed bitmap?
if (arena->blocks_committed != NULL && arena->memid.initially_committed) {
memset((void*)arena->blocks_committed, 0xFF, fields*sizeof(mi_bitmap_field_t)); // cast to void* to avoid atomic warning
}
// and claim leftover blocks if needed (so we never allocate there)
ptrdiff_t post = (fields * MI_BITMAP_FIELD_BITS) - bcount;
mi_assert_internal(post >= 0);
@ -796,7 +937,7 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int
mi_bitmap_index_t postidx = mi_bitmap_index_create(fields - 1, MI_BITMAP_FIELD_BITS - post);
_mi_bitmap_claim(arena->blocks_inuse, fields, post, postidx, NULL);
}
return mi_arena_add(arena, arena_id);
return mi_arena_add(arena, arena_id, &_mi_stats_main);
}
@ -841,32 +982,55 @@ int mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noe
Debugging
----------------------------------------------------------- */
static size_t mi_debug_show_bitmap(const char* prefix, mi_bitmap_field_t* fields, size_t field_count ) {
static size_t mi_debug_show_bitmap(const char* prefix, const char* header, size_t block_count, mi_bitmap_field_t* fields, size_t field_count ) {
_mi_verbose_message("%s%s:\n", prefix, header);
size_t bcount = 0;
size_t inuse_count = 0;
for (size_t i = 0; i < field_count; i++) {
char buf[MI_BITMAP_FIELD_BITS + 1];
uintptr_t field = mi_atomic_load_relaxed(&fields[i]);
for (size_t bit = 0; bit < MI_BITMAP_FIELD_BITS; bit++) {
bool inuse = ((((uintptr_t)1 << bit) & field) != 0);
if (inuse) inuse_count++;
buf[MI_BITMAP_FIELD_BITS - 1 - bit] = (inuse ? 'x' : '.');
for (size_t bit = 0; bit < MI_BITMAP_FIELD_BITS; bit++, bcount++) {
if (bcount < block_count) {
bool inuse = ((((uintptr_t)1 << bit) & field) != 0);
if (inuse) inuse_count++;
buf[bit] = (inuse ? 'x' : '.');
}
else {
buf[bit] = ' ';
}
}
buf[MI_BITMAP_FIELD_BITS] = 0;
_mi_verbose_message("%s%s\n", prefix, buf);
_mi_verbose_message("%s %s\n", prefix, buf);
}
_mi_verbose_message("%s total ('x'): %zu\n", prefix, inuse_count);
return inuse_count;
}
void mi_debug_show_arenas(void) mi_attr_noexcept {
void mi_debug_show_arenas(bool show_inuse, bool show_abandoned, bool show_purge) mi_attr_noexcept {
size_t max_arenas = mi_atomic_load_relaxed(&mi_arena_count);
size_t inuse_total = 0;
size_t abandoned_total = 0;
size_t purge_total = 0;
for (size_t i = 0; i < max_arenas; i++) {
mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]);
if (arena == NULL) break;
size_t inuse_count = 0;
_mi_verbose_message("arena %zu: %zu blocks with %zu fields\n", i, arena->block_count, arena->field_count);
inuse_count += mi_debug_show_bitmap(" ", arena->blocks_inuse, arena->field_count);
_mi_verbose_message(" blocks in use ('x'): %zu\n", inuse_count);
_mi_verbose_message("arena %zu: %zu blocks of size %zuMiB (in %zu fields) %s\n", i, arena->block_count, MI_ARENA_BLOCK_SIZE / MI_MiB, arena->field_count, (arena->memid.is_pinned ? ", pinned" : ""));
if (show_inuse) {
inuse_total += mi_debug_show_bitmap(" ", "inuse blocks", arena->block_count, arena->blocks_inuse, arena->field_count);
}
if (arena->blocks_committed != NULL) {
mi_debug_show_bitmap(" ", "committed blocks", arena->block_count, arena->blocks_committed, arena->field_count);
}
if (show_abandoned) {
abandoned_total += mi_debug_show_bitmap(" ", "abandoned blocks", arena->block_count, arena->blocks_abandoned, arena->field_count);
}
if (show_purge && arena->blocks_purge != NULL) {
purge_total += mi_debug_show_bitmap(" ", "purgeable blocks", arena->block_count, arena->blocks_purge, arena->field_count);
}
}
if (show_inuse) _mi_verbose_message("total inuse blocks : %zu\n", inuse_total);
if (show_abandoned) _mi_verbose_message("total abandoned blocks: %zu\n", abandoned_total);
if (show_purge) _mi_verbose_message("total purgeable blocks: %zu\n", purge_total);
}

View file

@ -7,7 +7,7 @@ terms of the MIT license. A copy of the license can be found in the file
/* ----------------------------------------------------------------------------
Concurrent bitmap that can set/reset sequences of bits atomically,
represeted as an array of fields where each field is a machine word (`size_t`)
represented as an array of fields where each field is a machine word (`size_t`)
There are two api's; the standard one cannot have sequences that cross
between the bitmap fields (and a sequence must be <= MI_BITMAP_FIELD_BITS).
@ -182,7 +182,7 @@ bool _mi_bitmap_is_any_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t
// Try to atomically claim a sequence of `count` bits starting from the field
// at `idx` in `bitmap` and crossing into subsequent fields. Returns `true` on success.
// Only needs to consider crossing into the next fields (see `mi_bitmap_try_find_from_claim_across`)
static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t idx, const size_t count, const size_t retries, mi_bitmap_index_t* bitmap_idx)
static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t idx, const size_t count, const size_t retries, mi_bitmap_index_t* bitmap_idx, mi_stats_t* stats)
{
mi_assert_internal(bitmap_idx != NULL);
@ -242,6 +242,7 @@ static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bit
} while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap));
// claimed!
mi_stat_counter_increase(stats->arena_crossover_count,1);
*bitmap_idx = mi_bitmap_index_create(idx, initial_idx);
return true;
@ -261,9 +262,10 @@ rollback:
newmap = (map & ~initial_mask);
} while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap));
}
mi_stat_counter_increase(stats->arena_rollback_count,1);
// retry? (we make a recursive call instead of goto to be able to use const declarations)
if (retries <= 2) {
return mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, retries+1, bitmap_idx);
return mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, retries+1, bitmap_idx, stats);
}
else {
return false;
@ -273,7 +275,7 @@ rollback:
// Find `count` bits of zeros and set them to 1 atomically; returns `true` on success.
// Starts at idx, and wraps around to search in all `bitmap_fields` fields.
bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx) {
bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx, mi_stats_t* stats) {
mi_assert_internal(count > 0);
if (count <= 2) {
// we don't bother with crossover fields for small counts
@ -285,13 +287,15 @@ bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitm
for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) {
if (idx >= bitmap_fields) { idx = 0; } // wrap
// first try to claim inside a field
/*
if (count <= MI_BITMAP_FIELD_BITS) {
if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) {
return true;
}
}
*/
// if that fails, then try to claim across fields
if (mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, 0, bitmap_idx)) {
if (mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, 0, bitmap_idx, stats)) {
return true;
}
}

View file

@ -7,7 +7,7 @@ terms of the MIT license. A copy of the license can be found in the file
/* ----------------------------------------------------------------------------
Concurrent bitmap that can set/reset sequences of bits atomically,
represeted as an array of fields where each field is a machine word (`size_t`)
represented as an array of fields where each field is a machine word (`size_t`)
There are two api's; the standard one cannot have sequences that cross
between the bitmap fields (and a sequence must be <= MI_BITMAP_FIELD_BITS).
@ -90,7 +90,7 @@ bool _mi_bitmap_is_any_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t
// Find `count` bits of zeros and set them to 1 atomically; returns `true` on success.
// Starts at idx, and wraps around to search in all `bitmap_fields` fields.
bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx);
bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx, mi_stats_t* stats);
// Set `count` bits at `bitmap_idx` to 0 atomically
// Returns `true` if all `count` bits were 1 previously.

520
src/free.c Normal file
View file

@ -0,0 +1,520 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2018-2024, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
#if !defined(MI_IN_ALLOC_C)
#error "this file should be included from 'alloc.c' (so aliases can work from alloc-override)"
// add includes help an IDE
#include "mimalloc.h"
#include "mimalloc/internal.h"
#include "mimalloc/atomic.h"
#include "mimalloc/prim.h" // _mi_prim_thread_id()
#endif
// forward declarations
static void mi_check_padding(const mi_page_t* page, const mi_block_t* block);
static bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block);
static size_t mi_page_usable_size_of(const mi_page_t* page, const mi_block_t* block);
static void mi_stat_free(const mi_page_t* page, const mi_block_t* block);
// ------------------------------------------------------
// Free
// ------------------------------------------------------
// forward declaration of multi-threaded free (`_mt`) (or free in huge block if compiled with MI_HUGE_PAGE_ABANDON)
static mi_decl_noinline void mi_free_block_mt(mi_page_t* page, mi_segment_t* segment, mi_block_t* block);
// regular free of a (thread local) block pointer
// fast path written carefully to prevent spilling on the stack
static inline void mi_free_block_local(mi_page_t* page, mi_block_t* block, bool track_stats, bool check_full)
{
// checks
if mi_unlikely(mi_check_is_double_free(page, block)) return;
mi_check_padding(page, block);
if (track_stats) { mi_stat_free(page, block); }
#if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN
memset(block, MI_DEBUG_FREED, mi_page_block_size(page));
#endif
if (track_stats) { mi_track_free_size(block, mi_page_usable_size_of(page, block)); } // faster then mi_usable_size as we already know the page and that p is unaligned
// actual free: push on the local free list
mi_block_set_next(page, block, page->local_free);
page->local_free = block;
if mi_unlikely(--page->used == 0) {
_mi_page_retire(page);
}
else if mi_unlikely(check_full && mi_page_is_in_full(page)) {
_mi_page_unfull(page);
}
}
// Adjust a block that was allocated aligned, to the actual start of the block in the page.
// note: this can be called from `mi_free_generic_mt` where a non-owning thread accesses the
// `page_start` and `block_size` fields; however these are constant and the page won't be
// deallocated (as the block we are freeing keeps it alive) and thus safe to read concurrently.
mi_block_t* _mi_page_ptr_unalign(const mi_page_t* page, const void* p) {
mi_assert_internal(page!=NULL && p!=NULL);
size_t diff = (uint8_t*)p - page->page_start;
size_t adjust;
if mi_likely(page->block_size_shift != 0) {
adjust = diff & (((size_t)1 << page->block_size_shift) - 1);
}
else {
adjust = diff % mi_page_block_size(page);
}
return (mi_block_t*)((uintptr_t)p - adjust);
}
// free a local pointer (page parameter comes first for better codegen)
static void mi_decl_noinline mi_free_generic_local(mi_page_t* page, mi_segment_t* segment, void* p) mi_attr_noexcept {
MI_UNUSED(segment);
mi_block_t* const block = (mi_page_has_aligned(page) ? _mi_page_ptr_unalign(page, p) : (mi_block_t*)p);
mi_free_block_local(page, block, true /* track stats */, true /* check for a full page */);
}
// free a pointer owned by another thread (page parameter comes first for better codegen)
static void mi_decl_noinline mi_free_generic_mt(mi_page_t* page, mi_segment_t* segment, void* p) mi_attr_noexcept {
mi_block_t* const block = _mi_page_ptr_unalign(page, p); // don't check `has_aligned` flag to avoid a race (issue #865)
mi_free_block_mt(page, segment, block);
}
// generic free (for runtime integration)
void mi_decl_noinline _mi_free_generic(mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept {
if (is_local) mi_free_generic_local(page,segment,p);
else mi_free_generic_mt(page,segment,p);
}
// Get the segment data belonging to a pointer
// This is just a single `and` in release mode but does further checks in debug mode
// (and secure mode) to see if this was a valid pointer.
static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* msg)
{
MI_UNUSED(msg);
#if (MI_DEBUG>0)
if mi_unlikely(((uintptr_t)p & (MI_INTPTR_SIZE - 1)) != 0) {
_mi_error_message(EINVAL, "%s: invalid (unaligned) pointer: %p\n", msg, p);
return NULL;
}
#endif
mi_segment_t* const segment = _mi_ptr_segment(p);
if mi_unlikely(segment==NULL) return segment;
#if (MI_DEBUG>0)
if mi_unlikely(!mi_is_in_heap_region(p)) {
_mi_warning_message("%s: pointer might not point to a valid heap region: %p\n"
"(this may still be a valid very large allocation (over 64MiB))\n", msg, p);
if mi_likely(_mi_ptr_cookie(segment) == segment->cookie) {
_mi_warning_message("(yes, the previous pointer %p was valid after all)\n", p);
}
}
#endif
#if (MI_DEBUG>0 || MI_SECURE>=4)
if mi_unlikely(_mi_ptr_cookie(segment) != segment->cookie) {
_mi_error_message(EINVAL, "%s: pointer does not point to a valid heap space: %p\n", msg, p);
return NULL;
}
#endif
return segment;
}
// Free a block
// Fast path written carefully to prevent register spilling on the stack
void mi_free(void* p) mi_attr_noexcept
{
mi_segment_t* const segment = mi_checked_ptr_segment(p,"mi_free");
if mi_unlikely(segment==NULL) return;
const bool is_local = (_mi_prim_thread_id() == mi_atomic_load_relaxed(&segment->thread_id));
mi_page_t* const page = _mi_segment_page_of(segment, p);
if mi_likely(is_local) { // thread-local free?
if mi_likely(page->flags.full_aligned == 0) { // and it is not a full page (full pages need to move from the full bin), nor has aligned blocks (aligned blocks need to be unaligned)
// thread-local, aligned, and not a full page
mi_block_t* const block = (mi_block_t*)p;
mi_free_block_local(page, block, true /* track stats */, false /* no need to check if the page is full */);
}
else {
// page is full or contains (inner) aligned blocks; use generic path
mi_free_generic_local(page, segment, p);
}
}
else {
// not thread-local; use generic path
mi_free_generic_mt(page, segment, p);
}
}
// return true if successful
bool _mi_free_delayed_block(mi_block_t* block) {
// get segment and page
mi_assert_internal(block!=NULL);
const mi_segment_t* const segment = _mi_ptr_segment(block);
mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie);
mi_assert_internal(_mi_thread_id() == segment->thread_id);
mi_page_t* const page = _mi_segment_page_of(segment, block);
// Clear the no-delayed flag so delayed freeing is used again for this page.
// This must be done before collecting the free lists on this page -- otherwise
// some blocks may end up in the page `thread_free` list with no blocks in the
// heap `thread_delayed_free` list which may cause the page to be never freed!
// (it would only be freed if we happen to scan it in `mi_page_queue_find_free_ex`)
if (!_mi_page_try_use_delayed_free(page, MI_USE_DELAYED_FREE, false /* dont overwrite never delayed */)) {
return false;
}
// collect all other non-local frees (move from `thread_free` to `free`) to ensure up-to-date `used` count
_mi_page_free_collect(page, false);
// and free the block (possibly freeing the page as well since `used` is updated)
mi_free_block_local(page, block, false /* stats have already been adjusted */, true /* check for a full page */);
return true;
}
// ------------------------------------------------------
// Multi-threaded Free (`_mt`)
// ------------------------------------------------------
// Push a block that is owned by another thread on its page-local thread free
// list or it's heap delayed free list. Such blocks are later collected by
// the owning thread in `_mi_free_delayed_block`.
static void mi_decl_noinline mi_free_block_delayed_mt( mi_page_t* page, mi_block_t* block )
{
// Try to put the block on either the page-local thread free list,
// or the heap delayed free list (if this is the first non-local free in that page)
mi_thread_free_t tfreex;
bool use_delayed;
mi_thread_free_t tfree = mi_atomic_load_relaxed(&page->xthread_free);
do {
use_delayed = (mi_tf_delayed(tfree) == MI_USE_DELAYED_FREE);
if mi_unlikely(use_delayed) {
// unlikely: this only happens on the first concurrent free in a page that is in the full list
tfreex = mi_tf_set_delayed(tfree,MI_DELAYED_FREEING);
}
else {
// usual: directly add to page thread_free list
mi_block_set_next(page, block, mi_tf_block(tfree));
tfreex = mi_tf_set_block(tfree,block);
}
} while (!mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex));
// If this was the first non-local free, we need to push it on the heap delayed free list instead
if mi_unlikely(use_delayed) {
// racy read on `heap`, but ok because MI_DELAYED_FREEING is set (see `mi_heap_delete` and `mi_heap_collect_abandon`)
mi_heap_t* const heap = (mi_heap_t*)(mi_atomic_load_acquire(&page->xheap)); //mi_page_heap(page);
mi_assert_internal(heap != NULL);
if (heap != NULL) {
// add to the delayed free list of this heap. (do this atomically as the lock only protects heap memory validity)
mi_block_t* dfree = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free);
do {
mi_block_set_nextx(heap,block,dfree, heap->keys);
} while (!mi_atomic_cas_ptr_weak_release(mi_block_t,&heap->thread_delayed_free, &dfree, block));
}
// and reset the MI_DELAYED_FREEING flag
tfree = mi_atomic_load_relaxed(&page->xthread_free);
do {
tfreex = tfree;
mi_assert_internal(mi_tf_delayed(tfree) == MI_DELAYED_FREEING);
tfreex = mi_tf_set_delayed(tfree,MI_NO_DELAYED_FREE);
} while (!mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex));
}
}
// Multi-threaded free (`_mt`) (or free in huge block if compiled with MI_HUGE_PAGE_ABANDON)
static void mi_decl_noinline mi_free_block_mt(mi_page_t* page, mi_segment_t* segment, mi_block_t* block)
{
// first see if the segment was abandoned and if we can reclaim it into our thread
if (mi_option_is_enabled(mi_option_abandoned_reclaim_on_free) &&
#if MI_HUGE_PAGE_ABANDON
segment->page_kind != MI_PAGE_HUGE &&
#endif
mi_atomic_load_relaxed(&segment->thread_id) == 0)
{
// the segment is abandoned, try to reclaim it into our heap
if (_mi_segment_attempt_reclaim(mi_heap_get_default(), segment)) {
mi_assert_internal(_mi_prim_thread_id() == mi_atomic_load_relaxed(&segment->thread_id));
mi_free(block); // recursively free as now it will be a local free in our heap
return;
}
}
// The padding check may access the non-thread-owned page for the key values.
// that is safe as these are constant and the page won't be freed (as the block is not freed yet).
mi_check_padding(page, block);
// adjust stats (after padding check and potentially recursive `mi_free` above)
mi_stat_free(page, block); // stat_free may access the padding
mi_track_free_size(block, mi_page_usable_size_of(page,block));
// for small size, ensure we can fit the delayed thread pointers without triggering overflow detection
_mi_padding_shrink(page, block, sizeof(mi_block_t));
if (segment->page_kind == MI_PAGE_HUGE) {
#if MI_HUGE_PAGE_ABANDON
// huge page segments are always abandoned and can be freed immediately
_mi_segment_huge_page_free(segment, page, block);
return;
#else
// huge pages are special as they occupy the entire segment
// as these are large we reset the memory occupied by the page so it is available to other threads
// (as the owning thread needs to actually free the memory later).
_mi_segment_huge_page_reset(segment, page, block);
#endif
}
else {
#if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN // note: when tracking, cannot use mi_usable_size with multi-threading
memset(block, MI_DEBUG_FREED, mi_usable_size(block));
#endif
}
// and finally free the actual block by pushing it on the owning heap
// thread_delayed free list (or heap delayed free list)
mi_free_block_delayed_mt(page,block);
}
// ------------------------------------------------------
// Usable size
// ------------------------------------------------------
// Bytes available in a block
static size_t mi_decl_noinline mi_page_usable_aligned_size_of(const mi_page_t* page, const void* p) mi_attr_noexcept {
const mi_block_t* block = _mi_page_ptr_unalign(page, p);
const size_t size = mi_page_usable_size_of(page, block);
const ptrdiff_t adjust = (uint8_t*)p - (uint8_t*)block;
mi_assert_internal(adjust >= 0 && (size_t)adjust <= size);
return (size - adjust);
}
static inline size_t _mi_usable_size(const void* p, const char* msg) mi_attr_noexcept {
const mi_segment_t* const segment = mi_checked_ptr_segment(p, msg);
if mi_unlikely(segment==NULL) return 0;
const mi_page_t* const page = _mi_segment_page_of(segment, p);
if mi_likely(!mi_page_has_aligned(page)) {
const mi_block_t* block = (const mi_block_t*)p;
return mi_page_usable_size_of(page, block);
}
else {
// split out to separate routine for improved code generation
return mi_page_usable_aligned_size_of(page, p);
}
}
mi_decl_nodiscard size_t mi_usable_size(const void* p) mi_attr_noexcept {
return _mi_usable_size(p, "mi_usable_size");
}
// ------------------------------------------------------
// Free variants
// ------------------------------------------------------
void mi_free_size(void* p, size_t size) mi_attr_noexcept {
MI_UNUSED_RELEASE(size);
mi_assert(p == NULL || size <= _mi_usable_size(p,"mi_free_size"));
mi_free(p);
}
void mi_free_size_aligned(void* p, size_t size, size_t alignment) mi_attr_noexcept {
MI_UNUSED_RELEASE(alignment);
mi_assert(((uintptr_t)p % alignment) == 0);
mi_free_size(p,size);
}
void mi_free_aligned(void* p, size_t alignment) mi_attr_noexcept {
MI_UNUSED_RELEASE(alignment);
mi_assert(((uintptr_t)p % alignment) == 0);
mi_free(p);
}
// ------------------------------------------------------
// Check for double free in secure and debug mode
// This is somewhat expensive so only enabled for secure mode 4
// ------------------------------------------------------
#if (MI_ENCODE_FREELIST && (MI_SECURE>=4 || MI_DEBUG!=0))
// linear check if the free list contains a specific element
static bool mi_list_contains(const mi_page_t* page, const mi_block_t* list, const mi_block_t* elem) {
while (list != NULL) {
if (elem==list) return true;
list = mi_block_next(page, list);
}
return false;
}
static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, const mi_block_t* block) {
// The decoded value is in the same page (or NULL).
// Walk the free lists to verify positively if it is already freed
if (mi_list_contains(page, page->free, block) ||
mi_list_contains(page, page->local_free, block) ||
mi_list_contains(page, mi_page_thread_free(page), block))
{
_mi_error_message(EAGAIN, "double free detected of block %p with size %zu\n", block, mi_page_block_size(page));
return true;
}
return false;
}
#define mi_track_page(page,access) { size_t psize; void* pstart = _mi_page_start(_mi_page_segment(page),page,&psize); mi_track_mem_##access( pstart, psize); }
static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) {
bool is_double_free = false;
mi_block_t* n = mi_block_nextx(page, block, page->keys); // pretend it is freed, and get the decoded first field
if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 && // quick check: aligned pointer?
(n==NULL || mi_is_in_same_page(block, n))) // quick check: in same page or NULL?
{
// Suspicious: decoded value a in block is in the same page (or NULL) -- maybe a double free?
// (continue in separate function to improve code generation)
is_double_free = mi_check_is_double_freex(page, block);
}
return is_double_free;
}
#else
static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) {
MI_UNUSED(page);
MI_UNUSED(block);
return false;
}
#endif
// ---------------------------------------------------------------------------
// Check for heap block overflow by setting up padding at the end of the block
// ---------------------------------------------------------------------------
#if MI_PADDING // && !MI_TRACK_ENABLED
static bool mi_page_decode_padding(const mi_page_t* page, const mi_block_t* block, size_t* delta, size_t* bsize) {
*bsize = mi_page_usable_block_size(page);
const mi_padding_t* const padding = (mi_padding_t*)((uint8_t*)block + *bsize);
mi_track_mem_defined(padding,sizeof(mi_padding_t));
*delta = padding->delta;
uint32_t canary = padding->canary;
uintptr_t keys[2];
keys[0] = page->keys[0];
keys[1] = page->keys[1];
bool ok = ((uint32_t)mi_ptr_encode(page,block,keys) == canary && *delta <= *bsize);
mi_track_mem_noaccess(padding,sizeof(mi_padding_t));
return ok;
}
// Return the exact usable size of a block.
static size_t mi_page_usable_size_of(const mi_page_t* page, const mi_block_t* block) {
size_t bsize;
size_t delta;
bool ok = mi_page_decode_padding(page, block, &delta, &bsize);
mi_assert_internal(ok); mi_assert_internal(delta <= bsize);
return (ok ? bsize - delta : 0);
}
// When a non-thread-local block is freed, it becomes part of the thread delayed free
// list that is freed later by the owning heap. If the exact usable size is too small to
// contain the pointer for the delayed list, then shrink the padding (by decreasing delta)
// so it will later not trigger an overflow error in `mi_free_block`.
void _mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size) {
size_t bsize;
size_t delta;
bool ok = mi_page_decode_padding(page, block, &delta, &bsize);
mi_assert_internal(ok);
if (!ok || (bsize - delta) >= min_size) return; // usually already enough space
mi_assert_internal(bsize >= min_size);
if (bsize < min_size) return; // should never happen
size_t new_delta = (bsize - min_size);
mi_assert_internal(new_delta < bsize);
mi_padding_t* padding = (mi_padding_t*)((uint8_t*)block + bsize);
mi_track_mem_defined(padding,sizeof(mi_padding_t));
padding->delta = (uint32_t)new_delta;
mi_track_mem_noaccess(padding,sizeof(mi_padding_t));
}
#else
static size_t mi_page_usable_size_of(const mi_page_t* page, const mi_block_t* block) {
MI_UNUSED(block);
return mi_page_usable_block_size(page);
}
void _mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size) {
MI_UNUSED(page);
MI_UNUSED(block);
MI_UNUSED(min_size);
}
#endif
#if MI_PADDING && MI_PADDING_CHECK
static bool mi_verify_padding(const mi_page_t* page, const mi_block_t* block, size_t* size, size_t* wrong) {
size_t bsize;
size_t delta;
bool ok = mi_page_decode_padding(page, block, &delta, &bsize);
*size = *wrong = bsize;
if (!ok) return false;
mi_assert_internal(bsize >= delta);
*size = bsize - delta;
if (!mi_page_is_huge(page)) {
uint8_t* fill = (uint8_t*)block + bsize - delta;
const size_t maxpad = (delta > MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : delta); // check at most the first N padding bytes
mi_track_mem_defined(fill, maxpad);
for (size_t i = 0; i < maxpad; i++) {
if (fill[i] != MI_DEBUG_PADDING) {
*wrong = bsize - delta + i;
ok = false;
break;
}
}
mi_track_mem_noaccess(fill, maxpad);
}
return ok;
}
static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) {
size_t size;
size_t wrong;
if (!mi_verify_padding(page,block,&size,&wrong)) {
_mi_error_message(EFAULT, "buffer overflow in heap block %p of size %zu: write after %zu bytes\n", block, size, wrong );
}
}
#else
static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) {
MI_UNUSED(page);
MI_UNUSED(block);
}
#endif
// only maintain stats for smaller objects if requested
#if (MI_STAT>0)
static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) {
#if (MI_STAT < 2)
MI_UNUSED(block);
#endif
mi_heap_t* const heap = mi_heap_get_default();
const size_t bsize = mi_page_usable_block_size(page);
#if (MI_STAT>1)
const size_t usize = mi_page_usable_size_of(page, block);
mi_heap_stat_decrease(heap, malloc, usize);
#endif
if (bsize <= MI_LARGE_OBJ_SIZE_MAX) {
mi_heap_stat_decrease(heap, normal, bsize);
#if (MI_STAT > 1)
mi_heap_stat_decrease(heap, normal_bins[_mi_bin(bsize)], 1);
#endif
}
else {
const size_t bpsize = mi_page_block_size(page); // match stat in page.c:mi_huge_page_alloc
mi_heap_stat_decrease(heap, huge, bpsize);
}
}
#else
static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) {
MI_UNUSED(page); MI_UNUSED(block);
}
#endif

View file

@ -119,7 +119,9 @@ static bool mi_heap_page_never_delayed_free(mi_heap_t* heap, mi_page_queue_t* pq
static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
{
if (heap==NULL || !mi_heap_is_initialized(heap)) return;
_mi_deferred_free(heap, collect >= MI_FORCE);
const bool force = (collect >= MI_FORCE);
_mi_deferred_free(heap, force);
// note: never reclaim on collect but leave it to threads that need storage to reclaim
if (
@ -145,22 +147,22 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
_mi_heap_delayed_free_all(heap);
// collect retired pages
_mi_heap_collect_retired(heap, collect >= MI_FORCE);
_mi_heap_collect_retired(heap, force);
// collect all pages owned by this thread
mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL);
mi_assert_internal( collect != MI_ABANDON || mi_atomic_load_ptr_acquire(mi_block_t,&heap->thread_delayed_free) == NULL );
// collect segment and thread caches
if (collect >= MI_FORCE) {
_mi_segment_thread_collect(&heap->tld->segments);
}
// collect segments (purge pages, this can be expensive so don't force on abandonment)
_mi_segments_collect(collect == MI_FORCE, &heap->tld->segments);
// collect arenas on program-exit (or shared library unload)
if (collect >= MI_FORCE && _mi_is_main_thread() && mi_heap_is_backing(heap)) {
// if forced, collect thread data cache on program-exit (or shared library unload)
if (force && _mi_is_main_thread() && mi_heap_is_backing(heap)) {
_mi_thread_data_collect(); // collect thread data cache
_mi_arena_collect(true /* force purge */, &heap->tld->stats);
}
// collect arenas (this is program wide so don't force purges on abandonment of threads)
_mi_arenas_collect(collect == MI_FORCE /* force purge? */, &heap->tld->stats);
}
void _mi_heap_collect_abandon(mi_heap_t* heap) {
@ -289,12 +291,7 @@ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_
// stats
const size_t bsize = mi_page_block_size(page);
if (bsize > MI_LARGE_OBJ_SIZE_MAX) {
if (bsize > MI_HUGE_OBJ_SIZE_MAX) {
mi_heap_stat_decrease(heap, giant, bsize);
}
else {
mi_heap_stat_decrease(heap, huge, bsize);
}
mi_heap_stat_decrease(heap, huge, bsize);
}
#if (MI_STAT)
_mi_page_free_collect(page, false); // update used count
@ -418,7 +415,7 @@ void mi_heap_delete(mi_heap_t* heap)
if (heap==NULL || !mi_heap_is_initialized(heap)) return;
if (!mi_heap_is_backing(heap)) {
// tranfer still used pages to the backing heap
// transfer still used pages to the backing heap
mi_heap_absorb(heap->tld->heap_backing, heap);
}
else {
@ -467,8 +464,7 @@ static bool mi_heap_page_check_owned(mi_heap_t* heap, mi_page_queue_t* pq, mi_pa
MI_UNUSED(heap);
MI_UNUSED(pq);
bool* found = (bool*)vfound;
mi_segment_t* segment = _mi_page_segment(page);
void* start = _mi_page_start(segment, page, NULL);
void* start = mi_page_start(page);
void* end = (uint8_t*)start + (page->capacity * mi_page_block_size(page));
*found = (p >= start && p < end);
return (!*found); // continue if not found
@ -514,7 +510,7 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v
const size_t bsize = mi_page_block_size(page);
const size_t ubsize = mi_page_usable_block_size(page); // without padding
size_t psize;
uint8_t* pstart = _mi_page_start(_mi_page_segment(page), page, &psize);
uint8_t* pstart = _mi_segment_page_start(_mi_page_segment(page), page, &psize);
if (page->capacity == 1) {
// optimize page with one block
@ -581,7 +577,7 @@ static bool mi_heap_visit_areas_page(mi_heap_t* heap, mi_page_queue_t* pq, mi_pa
xarea.page = page;
xarea.area.reserved = page->reserved * bsize;
xarea.area.committed = page->capacity * bsize;
xarea.area.blocks = _mi_page_start(_mi_page_segment(page), page, NULL);
xarea.area.blocks = mi_page_start(page);
xarea.area.used = page->used; // number of blocks in use (#553)
xarea.area.block_size = ubsize;
xarea.area.full_block_size = bsize;

View file

@ -14,16 +14,19 @@ terms of the MIT license. A copy of the license can be found in the file
// Empty page used to initialize the small free pages array
const mi_page_t _mi_page_empty = {
0, false, false, false,
0,
false, false, false, false,
0, // capacity
0, // reserved capacity
{ 0 }, // flags
false, // is_zero
0, // retire_expire
NULL, // free
0, // used
0, // xblock_size
NULL, // local_free
0, // used
0, // block size shift
0, // block_size
NULL, // page_start
#if (MI_PADDING || MI_ENCODE_FREELIST)
{ 0, 0 },
#endif
@ -78,7 +81,8 @@ const mi_page_t _mi_page_empty = {
MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \
MI_STAT_COUNT_NULL(), \
{ 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, \
{ 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 } \
{ 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, \
{ 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 } \
MI_STAT_COUNT_END_NULL()
// --------------------------------------------------------
@ -92,8 +96,6 @@ const mi_page_t _mi_page_empty = {
mi_decl_cache_align const mi_heap_t _mi_heap_empty = {
NULL,
MI_SMALL_PAGES_EMPTY,
MI_PAGE_QUEUES_EMPTY,
MI_ATOMIC_VAR_INIT(NULL),
0, // tid
0, // cookie
@ -103,7 +105,9 @@ mi_decl_cache_align const mi_heap_t _mi_heap_empty = {
0, // page count
MI_BIN_FULL, 0, // page retired min/max
NULL, // next
false
false,
MI_SMALL_PAGES_EMPTY,
MI_PAGE_QUEUES_EMPTY
};
@ -120,7 +124,7 @@ static mi_tld_t tld_main = {
0, false,
&_mi_heap_main, &_mi_heap_main,
{ { NULL, NULL }, {NULL ,NULL}, {NULL ,NULL, 0},
0, 0, 0, 0,
0, 0, 0, 0, 0,
&tld_main.stats, &tld_main.os
}, // segments
{ 0, &tld_main.stats }, // os
@ -129,8 +133,6 @@ static mi_tld_t tld_main = {
mi_heap_t _mi_heap_main = {
&tld_main,
MI_SMALL_PAGES_EMPTY,
MI_PAGE_QUEUES_EMPTY,
MI_ATOMIC_VAR_INIT(NULL),
0, // thread id
0, // initial cookie
@ -140,7 +142,9 @@ mi_heap_t _mi_heap_main = {
0, // page count
MI_BIN_FULL, 0, // page retired min/max
NULL, // next heap
false // can reclaim
false, // can reclaim
MI_SMALL_PAGES_EMPTY,
MI_PAGE_QUEUES_EMPTY
};
bool _mi_process_is_initialized = false; // set to `true` in `mi_process_init`.
@ -221,7 +225,7 @@ static mi_thread_data_t* mi_thread_data_zalloc(void) {
is_zero = memid.initially_zero;
}
}
if (td != NULL && !is_zero) {
_mi_memzero_aligned(td, offsetof(mi_thread_data_t,memid));
}
@ -396,23 +400,23 @@ void mi_thread_done(void) mi_attr_noexcept {
_mi_thread_done(NULL);
}
void _mi_thread_done(mi_heap_t* heap)
void _mi_thread_done(mi_heap_t* heap)
{
// calling with NULL implies using the default heap
if (heap == NULL) {
heap = mi_prim_get_default_heap();
if (heap == NULL) {
heap = mi_prim_get_default_heap();
if (heap == NULL) return;
}
// prevent re-entrancy through heap_done/heap_set_default_direct (issue #699)
if (!mi_heap_is_initialized(heap)) {
return;
return;
}
// adjust stats
mi_atomic_decrement_relaxed(&thread_count);
_mi_stat_decrease(&_mi_stats_main.threads, 1);
// check thread-id as on Windows shutdown with FLS the main (exit) thread may call this on thread-local heaps...
if (heap->thread_id != _mi_thread_id()) return;
@ -425,7 +429,7 @@ void _mi_heap_set_default_direct(mi_heap_t* heap) {
#if defined(MI_TLS_SLOT)
mi_prim_tls_slot_set(MI_TLS_SLOT,heap);
#elif defined(MI_TLS_PTHREAD_SLOT_OFS)
*mi_tls_pthread_heap_slot() = heap;
*mi_prim_tls_pthread_heap_slot() = heap;
#elif defined(MI_TLS_PTHREAD)
// we use _mi_heap_default_key
#else
@ -434,7 +438,7 @@ void _mi_heap_set_default_direct(mi_heap_t* heap) {
// ensure the default heap is passed to `_mi_thread_done`
// setting to a non-NULL value also ensures `mi_thread_done` is called.
_mi_prim_thread_associate_default_heap(heap);
_mi_prim_thread_associate_default_heap(heap);
}
@ -594,7 +598,7 @@ static void mi_cdecl mi_process_done(void) {
// release any thread specific resources and ensure _mi_thread_done is called on all but the main thread
_mi_prim_thread_done_auto_done();
#ifndef MI_SKIP_COLLECT_ON_EXIT
#if (MI_DEBUG || !defined(MI_SHARED_LIB))
// free all memory if possible on process exit. This is not needed for a stand-alone process

View file

@ -210,7 +210,7 @@ void _mi_vsnprintf(char* buf, size_t bufsize, const char* fmt, va_list args) {
if (c == 'x' || c == 'u') {
if (numtype == 'z') x = va_arg(args, size_t);
else if (numtype == 't') x = va_arg(args, uintptr_t); // unsigned ptrdiff_t
else if (numtype == 'L') x = va_arg(args, unsigned long long);
else if (numtype == 'L') x = (uintptr_t)va_arg(args, unsigned long long);
else x = va_arg(args, unsigned long);
}
else if (c == 'p') {
@ -231,7 +231,7 @@ void _mi_vsnprintf(char* buf, size_t bufsize, const char* fmt, va_list args) {
intptr_t x = 0;
if (numtype == 'z') x = va_arg(args, intptr_t );
else if (numtype == 't') x = va_arg(args, ptrdiff_t);
else if (numtype == 'L') x = va_arg(args, long long);
else if (numtype == 'L') x = (intptr_t)va_arg(args, long long);
else x = va_arg(args, long);
char pre = 0;
if (x < 0) {

View file

@ -65,7 +65,7 @@ static mi_option_desc_t options[_mi_option_last] =
{ 0, UNINIT, MI_OPTION_LEGACY(allow_large_os_pages,large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's
{ 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, // per 1GiB huge pages
{-1, UNINIT, MI_OPTION(reserve_huge_os_pages_at) }, // reserve huge pages at node N
{ 0, UNINIT, MI_OPTION(reserve_os_memory) },
{ 0, UNINIT, MI_OPTION(reserve_os_memory) }, // reserve OS memory in advance
{ 0, UNINIT, MI_OPTION(deprecated_segment_cache) }, // cache N segments per thread
{ 0, UNINIT, MI_OPTION(deprecated_page_reset) }, // reset page memory on free
{ 0, UNINIT, MI_OPTION(abandoned_page_purge) }, // purge free page memory when a thread terminates
@ -77,11 +77,11 @@ static mi_option_desc_t options[_mi_option_last] =
#endif
{ 10, UNINIT, MI_OPTION_LEGACY(purge_delay,reset_delay) }, // purge delay in milli-seconds
{ 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes.
{ 0, UNINIT, MI_OPTION(limit_os_alloc) }, // 1 = do not use OS memory for allocation (but only reserved arenas)
{ 0, UNINIT, MI_OPTION_LEGACY(disallow_os_alloc,limit_os_alloc) }, // 1 = do not use OS memory for allocation (but only reserved arenas)
{ 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose
{ 16, UNINIT, MI_OPTION(max_errors) }, // maximum errors that are output
{ 16, UNINIT, MI_OPTION(max_warnings) }, // maximum warnings that are output
{ 8, UNINIT, MI_OPTION(max_segment_reclaim)}, // max. number of segment reclaims from the abandoned segments per try.
{ 10, UNINIT, MI_OPTION(max_segment_reclaim)}, // max. percentage of the abandoned segments per try.
{ 0, UNINIT, MI_OPTION(destroy_on_exit)}, // release all OS memory on process exit; careful with dangling pointer or after-exit frees!
#if (MI_INTPTR_SIZE>4)
{ 1024L * 1024L, UNINIT, MI_OPTION(arena_reserve) }, // reserve memory N KiB at a time
@ -89,9 +89,11 @@ static mi_option_desc_t options[_mi_option_last] =
{ 128L * 1024L, UNINIT, MI_OPTION(arena_reserve) },
#endif
{ 10, UNINIT, MI_OPTION(arena_purge_mult) }, // purge delay multiplier for arena's
{ 10, UNINIT, MI_OPTION(arena_purge_mult) }, // purge delay multiplier for arena's
{ 1, UNINIT, MI_OPTION_LEGACY(purge_extend_delay, decommit_extend_delay) },
{ 1024,UNINIT, MI_OPTION(remap_threshold) }, // size in KiB after which realloc starts using OS remap (0 to disable auto remap)
{ 1, UNINIT, MI_OPTION(abandoned_reclaim_on_free) },// reclaim an abandoned segment on a free
{ 0, UNINIT, MI_OPTION(disallow_arena_alloc) }, // 1 = do not use arena's for allocation (except if using specific arena id's)
{ 1024,UNINIT, MI_OPTION(remap_threshold) }, // size in KiB after which realloc starts using OS remap (0 to disable auto remap)
};

View file

@ -30,7 +30,7 @@ bool _mi_os_has_overcommit(void) {
return mi_os_mem_config.has_overcommit;
}
bool _mi_os_has_virtual_reserve(void) {
bool _mi_os_has_virtual_reserve(void) {
return mi_os_mem_config.has_virtual_reserve;
}
@ -156,7 +156,7 @@ void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size) {
static void mi_os_free_huge_os_pages(void* p, size_t size, mi_stats_t* stats);
static void mi_os_prim_free(void* addr, size_t size, bool still_committed, mi_stats_t* tld_stats) {
MI_UNUSED(tld_stats);
MI_UNUSED(tld_stats);
mi_assert_internal((size % _mi_os_page_size()) == 0);
if (addr == NULL || size == 0) return; // || _mi_os_is_huge_reserved(addr)
int err = _mi_prim_free(addr, size);
@ -212,7 +212,7 @@ void _mi_os_free_ex(void* addr, size_t size, bool still_committed, mi_memid_t me
}
}
else {
// nothing to do
// nothing to do
mi_assert(memid.memkind < MI_MEM_OS);
}
}
@ -235,24 +235,24 @@ static void* mi_os_prim_alloc_at(void* hint, size_t size, size_t try_alignment,
if (size == 0) return NULL;
if (!commit) { allow_large = false; }
if (try_alignment == 0) { try_alignment = 1; } // avoid 0 to ensure there will be no divide by zero when aligning
*is_zero = false;
void* p = NULL;
int err = _mi_prim_alloc(hint, size, try_alignment, commit, allow_large, is_large, is_zero, &p);
if (err != 0) {
_mi_warning_message("unable to allocate OS memory (error: %d (0x%02x), size: 0x%zx bytes, align: 0x%zx, commit: %d, allow large: %d)\n", err, err, size, try_alignment, commit, allow_large);
}
mi_stat_counter_increase(stats->mmap_calls, 1);
if (p != NULL) {
_mi_stat_increase(&stats->reserved, size);
if (commit) {
_mi_stat_increase(&stats->committed, size);
if (commit) {
_mi_stat_increase(&stats->committed, size);
// seems needed for asan (or `mimalloc-test-api` fails)
#ifdef MI_TRACK_ASAN
if (*is_zero) { mi_track_mem_defined(p,size); }
else { mi_track_mem_undefined(p,size); }
#endif
}
}
}
return p;
}
@ -338,10 +338,8 @@ static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit
OS API: alloc and alloc_aligned
----------------------------------------------------------- */
void* _mi_os_alloc(size_t size, mi_memid_t* memid, mi_stats_t* tld_stats) {
MI_UNUSED(tld_stats);
void* _mi_os_alloc(size_t size, mi_memid_t* memid, mi_stats_t* stats) {
*memid = _mi_memid_none();
mi_stats_t* stats = &_mi_stats_main;
if (size == 0) return NULL;
size = _mi_os_good_alloc_size(size);
bool os_is_large = false;
@ -361,18 +359,18 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allo
if (size == 0) return NULL;
size = _mi_os_good_alloc_size(size);
alignment = _mi_align_up(alignment, _mi_os_page_size());
return mi_os_prim_alloc_aligned(size, alignment, commit, allow_large, memid, &_mi_stats_main /*tld->stats*/ );
return mi_os_prim_alloc_aligned(size, alignment, commit, allow_large, memid, &_mi_stats_main );
}
/* -----------------------------------------------------------
OS aligned allocation with an offset. This is used
for large alignments > MI_ALIGNMENT_MAX. We use a large mimalloc
for large alignments > MI_BLOCK_ALIGNMENT_MAX. We use a large mimalloc
page where the object can be aligned at an offset from the start of the segment.
As we may need to overallocate, we need to free such pointers using `mi_free_aligned`
to use the actual start of the memory region.
----------------------------------------------------------- */
void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t offset, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* tld_stats) {
void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t offset, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* stats) {
mi_assert(offset <= MI_SEGMENT_SIZE);
mi_assert(offset <= size);
mi_assert((alignment % _mi_os_page_size()) == 0);
@ -380,20 +378,20 @@ void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t offse
if (offset > MI_SEGMENT_SIZE) return NULL;
if (offset == 0) {
// regular aligned allocation
return _mi_os_alloc_aligned(size, alignment, commit, allow_large, memid, tld_stats);
return _mi_os_alloc_aligned(size, alignment, commit, allow_large, memid, stats);
}
else {
// overallocate to align at an offset
const size_t extra = _mi_align_up(offset, alignment) - offset;
const size_t oversize = mi_os_get_alloc_size(size + extra);
void* const start = _mi_os_alloc_aligned(oversize, alignment, commit, allow_large, memid, tld_stats);
void* const start = _mi_os_alloc_aligned(oversize, alignment, commit, allow_large, memid, stats);
if (start == NULL) return NULL;
void* const p = (uint8_t*)start + extra;
mi_assert(_mi_is_aligned((uint8_t*)p + offset, alignment));
// decommit the overallocation at the start
if (memid->initially_committed && !memid->is_pinned && (extra > _mi_os_page_size())) {
_mi_os_decommit(start, extra, tld_stats);
_mi_os_decommit(start, extra, stats);
}
return p;
}
@ -596,7 +594,7 @@ static void* mi_os_page_align_area_conservative(void* addr, size_t size, size_t*
bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats) {
MI_UNUSED(tld_stats);
mi_stats_t* stats = &_mi_stats_main;
mi_stats_t* stats = &_mi_stats_main;
if (is_zero != NULL) { *is_zero = false; }
_mi_stat_increase(&stats->committed, size); // use size for precise commit vs. decommit
_mi_stat_counter_increase(&stats->commit_calls, 1);
@ -606,21 +604,21 @@ bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats
void* start = mi_os_page_align_areax(false /* conservative? */, addr, size, &csize);
if (csize == 0) return true;
// commit
// commit
bool os_is_zero = false;
int err = _mi_prim_commit(start, csize, &os_is_zero);
int err = _mi_prim_commit(start, csize, &os_is_zero);
if (err != 0) {
_mi_warning_message("cannot commit OS memory (error: %d (0x%02x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize);
return false;
}
if (os_is_zero && is_zero != NULL) {
if (os_is_zero && is_zero != NULL) {
*is_zero = true;
mi_assert_expensive(mi_mem_is_zero(start, csize));
}
// note: the following seems required for asan (otherwise `mimalloc-test-stress` fails)
#ifdef MI_TRACK_ASAN
if (os_is_zero) { mi_track_mem_defined(start,csize); }
else { mi_track_mem_undefined(start,csize); }
else { mi_track_mem_undefined(start,csize); }
#endif
return true;
}
@ -634,11 +632,11 @@ static bool mi_os_decommit_ex(void* addr, size_t size, bool* needs_recommit, mi_
// page align
size_t csize;
void* start = mi_os_page_align_area_conservative(addr, size, &csize);
if (csize == 0) return true;
if (csize == 0) return true;
// decommit
*needs_recommit = true;
int err = _mi_prim_decommit(start,csize,needs_recommit);
int err = _mi_prim_decommit(start,csize,needs_recommit);
if (err != 0) {
_mi_warning_message("cannot decommit OS memory (error: %d (0x%02x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize);
}
@ -656,7 +654,7 @@ bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* tld_stats) {
// but may be used later again. This will release physical memory
// pages and reduce swapping while keeping the memory committed.
// We page align to a conservative area inside the range to reset.
bool _mi_os_reset(void* addr, size_t size, mi_stats_t* stats) {
bool _mi_os_reset(void* addr, size_t size, mi_stats_t* stats) {
// page align conservatively within the range
size_t csize;
void* start = mi_os_page_align_area_conservative(addr, size, &csize);
@ -676,7 +674,7 @@ bool _mi_os_reset(void* addr, size_t size, mi_stats_t* stats) {
}
// either resets or decommits memory, returns true if the memory needs
// either resets or decommits memory, returns true if the memory needs
// to be recommitted if it is to be re-used later on.
bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, mi_stats_t* stats)
{
@ -689,7 +687,7 @@ bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, mi_stats_t* stats)
{
bool needs_recommit = true;
mi_os_decommit_ex(p, size, &needs_recommit, stats);
return needs_recommit;
return needs_recommit;
}
else {
if (allow_reset) { // this can sometimes be not allowed if the range is not fully committed
@ -699,7 +697,7 @@ bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, mi_stats_t* stats)
}
}
// either resets or decommits memory, returns true if the memory needs
// either resets or decommits memory, returns true if the memory needs
// to be recommitted if it is to be re-used later on.
bool _mi_os_purge(void* p, size_t size, mi_stats_t * stats) {
return _mi_os_purge_ex(p, size, true, stats);

View file

@ -1,5 +1,5 @@
/*----------------------------------------------------------------------------
Copyright (c) 2018-2020, Microsoft Research, Daan Leijen
Copyright (c) 2018-2024, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
@ -11,6 +11,10 @@ terms of the MIT license. A copy of the license can be found in the file
#ifndef MI_IN_PAGE_C
#error "this file should be included from 'page.c'"
// include to help an IDE
#include "mimalloc.h"
#include "mimalloc/internal.h"
#include "mimalloc/atomic.h"
#endif
/* -----------------------------------------------------------
@ -109,10 +113,10 @@ size_t _mi_bin_size(uint8_t bin) {
// Good size for allocation
size_t mi_good_size(size_t size) mi_attr_noexcept {
if (size <= MI_LARGE_OBJ_SIZE_MAX) {
return _mi_bin_size(mi_bin(size));
return _mi_bin_size(mi_bin(size + MI_PADDING_SIZE));
}
else {
return _mi_align_up(size,_mi_os_page_size());
return _mi_align_up(size + MI_PADDING_SIZE,_mi_os_page_size());
}
}
@ -137,21 +141,21 @@ static bool mi_heap_contains_queue(const mi_heap_t* heap, const mi_page_queue_t*
}
#endif
static mi_page_queue_t* mi_page_queue_of(const mi_page_t* page) {
uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : mi_bin(page->xblock_size));
mi_heap_t* heap = mi_page_heap(page);
mi_assert_internal(heap != NULL && bin <= MI_BIN_FULL);
static mi_page_queue_t* mi_heap_page_queue_of(mi_heap_t* heap, const mi_page_t* page) {
mi_assert_internal(heap!=NULL);
uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : (mi_page_is_huge(page) ? MI_BIN_HUGE : mi_bin(mi_page_block_size(page))));
mi_assert_internal(bin <= MI_BIN_FULL);
mi_page_queue_t* pq = &heap->pages[bin];
mi_assert_internal(bin >= MI_BIN_HUGE || page->xblock_size == pq->block_size);
mi_assert_expensive(mi_page_queue_contains(pq, page));
mi_assert_internal((mi_page_block_size(page) == pq->block_size) ||
(mi_page_is_huge(page) && mi_page_queue_is_huge(pq)) ||
(mi_page_is_in_full(page) && mi_page_queue_is_full(pq)));
return pq;
}
static mi_page_queue_t* mi_heap_page_queue_of(mi_heap_t* heap, const mi_page_t* page) {
uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : mi_bin(page->xblock_size));
mi_assert_internal(bin <= MI_BIN_FULL);
mi_page_queue_t* pq = &heap->pages[bin];
mi_assert_internal(mi_page_is_in_full(page) || page->xblock_size == pq->block_size);
static mi_page_queue_t* mi_page_queue_of(const mi_page_t* page) {
mi_heap_t* heap = mi_page_heap(page);
mi_page_queue_t* pq = mi_heap_page_queue_of(heap, page);
mi_assert_expensive(mi_page_queue_contains(pq, page));
return pq;
}
@ -206,7 +210,9 @@ static bool mi_page_queue_is_empty(mi_page_queue_t* queue) {
static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) {
mi_assert_internal(page != NULL);
mi_assert_expensive(mi_page_queue_contains(queue, page));
mi_assert_internal(page->xblock_size == queue->block_size || (mi_page_is_huge(page) && mi_page_queue_is_huge(queue)) || (mi_page_is_in_full(page) && mi_page_queue_is_full(queue)));
mi_assert_internal(mi_page_block_size(page) == queue->block_size ||
(mi_page_is_huge(page) && mi_page_queue_is_huge(queue)) ||
(mi_page_is_in_full(page) && mi_page_queue_is_full(queue)));
mi_heap_t* heap = mi_page_heap(page);
if (page->prev != NULL) page->prev->next = page->next;
if (page->next != NULL) page->next->prev = page->prev;
@ -231,9 +237,9 @@ static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_
#if MI_HUGE_PAGE_ABANDON
mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE);
#endif
mi_assert_internal((page->xblock_size == queue->block_size) ||
(mi_page_is_huge(page) && mi_page_queue_is_huge(queue)) || // not: && page->xblock_size > MI_LARGE_OBJ_SIZE_MAX since it could be due to page alignment
(mi_page_is_in_full(page) && mi_page_queue_is_full(queue)) );
mi_assert_internal(mi_page_block_size(page) == queue->block_size ||
(mi_page_is_huge(page) && mi_page_queue_is_huge(queue)) ||
(mi_page_is_in_full(page) && mi_page_queue_is_full(queue)));
mi_page_set_in_full(page, mi_page_queue_is_full(queue));
// mi_atomic_store_ptr_release(mi_atomic_cast(void*, &page->heap), heap);
@ -258,11 +264,13 @@ static void mi_page_queue_enqueue_from(mi_page_queue_t* to, mi_page_queue_t* fro
mi_assert_internal(page != NULL);
mi_assert_expensive(mi_page_queue_contains(from, page));
mi_assert_expensive(!mi_page_queue_contains(to, page));
mi_assert_internal((page->xblock_size == to->block_size && page->xblock_size == from->block_size) ||
(page->xblock_size == to->block_size && mi_page_queue_is_full(from)) ||
(page->xblock_size == from->block_size && mi_page_queue_is_full(to)) ||
(mi_page_is_huge(page) && mi_page_queue_is_huge(from)) ||
(mi_page_is_in_full(page) && mi_page_queue_is_full(to)));
const size_t bsize = mi_page_block_size(page);
MI_UNUSED(bsize);
mi_assert_internal((bsize == to->block_size && bsize == from->block_size) ||
(bsize == to->block_size && mi_page_queue_is_full(from)) ||
(bsize == from->block_size && mi_page_queue_is_full(to)) ||
(mi_page_is_huge(page) && mi_page_queue_is_huge(to)) ||
(mi_page_is_huge(page) && mi_page_queue_is_full(to)));
mi_heap_t* heap = mi_page_heap(page);
if (page->prev != NULL) page->prev->next = page->next;

View file

@ -1,5 +1,5 @@
/*----------------------------------------------------------------------------
Copyright (c) 2018-2020, Microsoft Research, Daan Leijen
Copyright (c) 2018-2024, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
@ -59,7 +59,7 @@ static inline uint8_t* mi_page_area(const mi_page_t* page) {
static bool mi_page_list_is_valid(mi_page_t* page, mi_block_t* p) {
size_t psize;
uint8_t* page_area = _mi_page_start(_mi_page_segment(page), page, &psize);
uint8_t* page_area = _mi_segment_page_start(_mi_page_segment(page), page, &psize);
mi_block_t* start = (mi_block_t*)page_area;
mi_block_t* end = (mi_block_t*)(page_area + psize);
while(p != NULL) {
@ -78,14 +78,15 @@ static bool mi_page_list_is_valid(mi_page_t* page, mi_block_t* p) {
}
static bool mi_page_is_valid_init(mi_page_t* page) {
mi_assert_internal(page->xblock_size > 0);
mi_assert_internal(mi_page_block_size(page) > 0);
mi_assert_internal(page->used <= page->capacity);
mi_assert_internal(page->capacity <= page->reserved);
const size_t bsize = mi_page_block_size(page);
// const size_t bsize = mi_page_block_size(page);
mi_segment_t* segment = _mi_page_segment(page);
uint8_t* start = _mi_page_start(segment,page,NULL);
mi_assert_internal(start == _mi_segment_page_start(segment,page,bsize,NULL,NULL));
uint8_t* start = mi_page_start(page);
mi_assert_internal(start == _mi_segment_page_start(segment,page,NULL));
mi_assert_internal(page->is_huge == (segment->page_kind == MI_PAGE_HUGE));
//mi_assert_internal(start + page->capacity*page->block_size == page->top);
mi_assert_internal(mi_page_list_is_valid(page,page->free));
@ -198,8 +199,8 @@ static void _mi_page_thread_free_collect(mi_page_t* page)
if (head == NULL) return;
// find the tail -- also to get a proper count (without data races)
uint32_t max_count = page->capacity; // cannot collect more than capacity
uint32_t count = 1;
size_t max_count = page->capacity; // cannot collect more than capacity
size_t count = 1;
mi_block_t* tail = head;
mi_block_t* next;
while ((next = mi_block_next(page,tail)) != NULL && count <= max_count) {
@ -217,7 +218,7 @@ static void _mi_page_thread_free_collect(mi_page_t* page)
page->local_free = head;
// update counts now
page->used -= count;
page->used -= (uint16_t)count;
}
void _mi_page_free_collect(mi_page_t* page, bool force) {
@ -267,7 +268,7 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) {
#if MI_HUGE_PAGE_ABANDON
mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE);
#endif
// TODO: push on full queue immediately if it is full?
mi_page_queue_t* pq = mi_page_queue(heap, mi_page_block_size(page));
mi_page_queue_push(heap, pq, page);
@ -289,13 +290,11 @@ static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size
#if MI_HUGE_PAGE_ABANDON
mi_assert_internal(pq==NULL || _mi_page_segment(page)->page_kind != MI_PAGE_HUGE);
#endif
mi_assert_internal(pq!=NULL || mi_page_block_size(page) >= block_size);
// a fresh page was found, initialize it
const size_t xblock_size = ((pq == NULL || mi_page_is_huge(page)) ? MI_HUGE_BLOCK_SIZE : block_size);
//((pq == NULL || mi_page_queue_is_huge(pq)) ? mi_page_block_size(page) : block_size); // see also: mi_segment_huge_page_alloc
//mi_assert_internal(xblock_size >= block_size);
mi_page_init(heap, page, xblock_size, heap->tld);
mi_assert_internal(mi_page_block_size(page) >= block_size);
const size_t full_block_size = (pq == NULL || mi_page_is_huge(page) ? mi_page_block_size(page) : block_size); // see also: mi_segment_huge_page_alloc
mi_assert_internal(full_block_size >= block_size);
mi_page_init(heap, page, full_block_size, heap->tld);
mi_heap_stat_increase(heap, pages, 1);
if (pq != NULL) { mi_page_queue_push(heap, pq, page); }
mi_assert_expensive(_mi_page_is_valid(page));
@ -424,7 +423,7 @@ void _mi_heap_huge_page_detach(mi_heap_t* heap, mi_page_t* page) {
void _mi_heap_huge_page_attach(mi_heap_t* heap, mi_page_t* page) {
mi_assert_internal(mi_page_heap(page) == heap);
#if !MI_HUGE_PAGE_ABANDON
mi_page_queue_t* pq = mi_page_queue(heap, MI_HUGE_OBJ_SIZE_MAX); // not block_size as that can be low if the page_alignment > 0
mi_page_queue_t* pq = mi_page_queue(heap, MI_LARGE_OBJ_SIZE_MAX+1); // not block_size as that can be low if the page_alignment > 0
mi_assert_internal(mi_page_queue_is_huge(pq));
mi_page_queue_push(heap, pq, page);
#endif
@ -453,7 +452,7 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) {
_mi_segment_page_free(page, force, segments_tld);
}
#define MI_MAX_RETIRE_SIZE MI_LARGE_OBJ_SIZE_MAX
#define MI_MAX_RETIRE_SIZE MI_LARGE_OBJ_SIZE_MAX // should be less than size for MI_BIN_HUGE
#define MI_RETIRE_CYCLES (16)
// Retire a page with no more used blocks
@ -476,10 +475,11 @@ void _mi_page_retire(mi_page_t* page) mi_attr_noexcept {
// how to check this efficiently though...
// for now, we don't retire if it is the only page left of this size class.
mi_page_queue_t* pq = mi_page_queue_of(page);
if mi_likely(page->xblock_size <= MI_MAX_RETIRE_SIZE && !mi_page_queue_is_special(pq)) { // not too large && not full or huge queue?
const size_t bsize = mi_page_block_size(page);
if mi_likely( /* bsize < MI_MAX_RETIRE_SIZE && */ !mi_page_queue_is_special(pq)) { // not full or huge queue?
if (pq->last==page && pq->first==page) { // the only page in the queue?
mi_stat_counter_increase(_mi_stats_main.page_no_retire,1);
page->retire_expire = (page->xblock_size <= MI_SMALL_OBJ_SIZE_MAX ? MI_RETIRE_CYCLES : MI_RETIRE_CYCLES/4);
page->retire_expire = (bsize <= MI_SMALL_OBJ_SIZE_MAX ? MI_RETIRE_CYCLES : MI_RETIRE_CYCLES/4);
mi_heap_t* heap = mi_page_heap(page);
mi_assert_internal(pq >= heap->pages);
const size_t index = pq - heap->pages;
@ -487,7 +487,7 @@ void _mi_page_retire(mi_page_t* page) mi_attr_noexcept {
if (index < heap->page_retired_min) heap->page_retired_min = index;
if (index > heap->page_retired_max) heap->page_retired_max = index;
mi_assert_internal(mi_page_all_free(page));
return; // dont't free after all
return; // don't free after all
}
}
@ -542,7 +542,7 @@ static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* co
#endif
mi_assert_internal(page->capacity + extend <= page->reserved);
mi_assert_internal(bsize == mi_page_block_size(page));
void* const page_area = _mi_page_start(_mi_page_segment(page), page, NULL);
void* const page_area = mi_page_start(page);
// initialize a randomized free list
// set up `slice_count` slices to alternate between
@ -600,7 +600,7 @@ static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, co
#endif
mi_assert_internal(page->capacity + extend <= page->reserved);
mi_assert_internal(bsize == mi_page_block_size(page));
void* const page_area = _mi_page_start(_mi_page_segment(page), page, NULL );
void* const page_area = mi_page_start(page);
mi_block_t* const start = mi_page_block_at(page, page_area, bsize, page->capacity);
@ -644,15 +644,15 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld)
size_t page_size;
//uint8_t* page_start =
_mi_page_start(_mi_page_segment(page), page, &page_size);
_mi_segment_page_start(_mi_page_segment(page), page, &page_size);
mi_stat_counter_increase(tld->stats.pages_extended, 1);
// calculate the extend count
const size_t bsize = (page->xblock_size < MI_HUGE_BLOCK_SIZE ? page->xblock_size : page_size);
const size_t bsize = mi_page_block_size(page);
size_t extend = page->reserved - page->capacity;
mi_assert_internal(extend > 0);
size_t max_extend = (bsize >= MI_MAX_EXTEND_SIZE ? MI_MIN_EXTEND : MI_MAX_EXTEND_SIZE/(uint32_t)bsize);
size_t max_extend = (bsize >= MI_MAX_EXTEND_SIZE ? MI_MIN_EXTEND : MI_MAX_EXTEND_SIZE/bsize);
if (max_extend < MI_MIN_EXTEND) { max_extend = MI_MIN_EXTEND; }
mi_assert_internal(max_extend > 0);
@ -686,20 +686,12 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
mi_assert_internal(block_size > 0);
// set fields
mi_page_set_heap(page, heap);
page->block_size = block_size;
size_t page_size;
const void* page_start = _mi_segment_page_start(segment, page, block_size, &page_size, NULL);
MI_UNUSED(page_start);
mi_track_mem_noaccess(page_start,page_size);
if (segment->page_kind == MI_PAGE_HUGE) {
page->xblock_size = MI_HUGE_BLOCK_SIZE;
page->reserved = 1;
}
else {
mi_assert_internal(block_size < MI_HUGE_BLOCK_SIZE);
page->xblock_size = (uint32_t)block_size;
mi_assert_internal(page_size / block_size < (1L << 16));
page->reserved = (uint16_t)(page_size / block_size);
}
page->page_start = _mi_segment_page_start(segment, page, &page_size);
mi_track_mem_noaccess(page->page_start,page_size);
mi_assert_internal(page_size / block_size < (1L<<16));
page->reserved = (uint16_t)(page_size / block_size);
mi_assert_internal(page->reserved > 0);
#if (MI_PADDING || MI_ENCODE_FREELIST)
page->keys[0] = _mi_heap_random_next(heap);
@ -708,11 +700,17 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
page->free_is_zero = page->is_zero_init;
#if MI_DEBUG>2
if (page->is_zero_init) {
mi_track_mem_defined(page_start, page_size);
mi_assert_expensive(!page->is_zero_init || mi_mem_is_zero(page_start, page_size));
mi_track_mem_defined(page->page_start, page_size);
mi_assert_expensive(mi_mem_is_zero(page->page_start, page_size));
}
#endif
if (block_size > 0 && _mi_is_power_of_two(block_size)) {
page->block_size_shift = (uint8_t)(mi_ctz((uintptr_t)block_size));
}
else {
page->block_size_shift = 0;
}
mi_assert_internal(page->capacity == 0);
mi_assert_internal(page->free == NULL);
mi_assert_internal(page->used == 0);
@ -725,6 +723,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
mi_assert_internal(page->keys[0] != 0);
mi_assert_internal(page->keys[1] != 0);
#endif
mi_assert_internal(page->block_size_shift == 0 || (block_size == ((size_t)1 << page->block_size_shift)));
mi_assert_expensive(mi_page_is_valid_init(page));
// initialize an initial free list
@ -748,7 +747,7 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p
while (page != NULL)
{
mi_page_t* next = page->next; // remember next
#if MI_STAT
#if MI_STAT
count++;
#endif
@ -849,39 +848,31 @@ void mi_register_deferred_free(mi_deferred_free_fun* fn, void* arg) mi_attr_noex
General allocation
----------------------------------------------------------- */
// A huge page always occupies a single segment.
// It is used for large allocations, (very) large alignments (> MI_ALIGNMENT_MAX), or remappable blocks.
// When a huge page is freed from another thread, it is immediately reset to reduce memory pressure.
// We use a page_alignment of 1 for remappable memory.
// Huge pages contain just one block, and the segment contains just that page.
// Huge pages are also use if the requested alignment is very large (> MI_BLOCK_ALIGNMENT_MAX)
// so their size is not always `> MI_LARGE_OBJ_SIZE_MAX`.
static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size, size_t page_alignment) {
size_t block_size = _mi_os_good_alloc_size(size);
mi_assert_internal(mi_bin(block_size) == MI_BIN_HUGE || page_alignment > 0);
#if MI_HUGE_PAGE_ABANDON
mi_page_queue_t* pq = NULL;
#else
mi_page_queue_t* pq = mi_page_queue(heap, MI_HUGE_OBJ_SIZE_MAX); // not block_size as that can be low if the page_alignment > 0
mi_page_queue_t* pq = mi_page_queue(heap, MI_LARGE_OBJ_SIZE_MAX+1); // always in the huge queue regardless of the block size
mi_assert_internal(mi_page_queue_is_huge(pq));
#endif
mi_page_t* page = mi_page_fresh_alloc(heap, pq, block_size, page_alignment);
if (page != NULL) {
const size_t bsize = mi_page_block_size(page); // note: not `mi_page_usable_block_size` as `size` includes padding already
mi_assert_internal(bsize >= size);
mi_assert_internal(mi_page_block_size(page) >= size);
mi_assert_internal(mi_page_immediate_available(page));
mi_assert_internal(_mi_page_segment(page)->page_kind==MI_PAGE_HUGE);
mi_assert_internal(mi_page_is_huge(page));
mi_assert_internal(_mi_page_segment(page)->page_kind == MI_PAGE_HUGE);
mi_assert_internal(_mi_page_segment(page)->used==1);
#if MI_HUGE_PAGE_ABANDON
mi_assert_internal(_mi_page_segment(page)->thread_id==0); // abandoned, not in the huge queue
mi_page_set_heap(page, NULL);
#endif
if (bsize > MI_HUGE_OBJ_SIZE_MAX) {
mi_heap_stat_increase(heap, giant, bsize);
mi_heap_stat_counter_increase(heap, giant_count, 1);
}
else {
mi_heap_stat_increase(heap, huge, bsize);
mi_heap_stat_counter_increase(heap, huge_count, 1);
}
mi_heap_stat_increase(heap, huge, mi_page_block_size(page));
mi_heap_stat_counter_increase(heap, huge_count, 1);
}
return page;
}
@ -893,7 +884,7 @@ static mi_page_t* mi_find_page(mi_heap_t* heap, size_t size, size_t huge_alignme
// huge allocation?
const size_t req_size = size - MI_PADDING_SIZE; // correct for padding_size in case of an overflow on `size`
if mi_unlikely(req_size > (MI_LARGE_OBJ_SIZE_MAX - MI_PADDING_SIZE) || huge_alignment > 0) {
if mi_unlikely(req_size > PTRDIFF_MAX) { // we don't allocate more than PTRDIFF_MAX (see <https://sourceware.org/ml/libc-announce/2019/msg00001.html>)
if mi_unlikely(req_size > MI_MAX_ALLOC_SIZE) {
_mi_error_message(EOVERFLOW, "allocation request is too large (%zu bytes)\n", req_size);
return NULL;
}
@ -904,7 +895,7 @@ static mi_page_t* mi_find_page(mi_heap_t* heap, size_t size, size_t huge_alignme
else {
// otherwise find a page with free blocks in our size segregated queues
#if MI_PADDING
mi_assert_internal(size >= MI_PADDING_SIZE);
mi_assert_internal(size >= MI_PADDING_SIZE);
#endif
return mi_find_free_page(heap, size);
}
@ -920,7 +911,7 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_al
// initialize if necessary
if mi_unlikely(!mi_heap_is_initialized(heap)) {
heap = mi_heap_get_default(); // calls mi_thread_init
heap = mi_heap_get_default(); // calls mi_thread_init
if mi_unlikely(!mi_heap_is_initialized(heap)) { return NULL; }
}
mi_assert_internal(mi_heap_is_initialized(heap));
@ -950,7 +941,7 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_al
// and try again, this time succeeding! (i.e. this should never recurse through _mi_page_malloc)
if mi_unlikely(zero && mi_page_is_huge(page)) {
// note: we cannot call _mi_page_malloc with zeroing for huge blocks; we zero it afterwards in that case.
void* p = _mi_page_malloc(heap, page, size, false);
void* p = _mi_page_malloc(heap, page, size);
mi_assert_internal(p != NULL);
if (page->free_is_zero) {
((mi_block_t*)p)->next = 0;
@ -962,6 +953,6 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_al
return p;
}
else {
return _mi_page_malloc(heap, page, size, zero);
return _mi_page_malloc_zero(heap, page, size, zero);
}
}

244
src/prim/emscripten/prim.c Normal file
View file

@ -0,0 +1,244 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2018-2023, Microsoft Research, Daan Leijen, Alon Zakai
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
// This file is included in `src/prim/prim.c`
#include "mimalloc.h"
#include "mimalloc/internal.h"
#include "mimalloc/atomic.h"
#include "mimalloc/prim.h"
// Design
// ======
//
// mimalloc is built on top of emmalloc. emmalloc is a minimal allocator on top
// of sbrk. The reason for having three layers here is that we want mimalloc to
// be able to allocate and release system memory properly, the same way it would
// when using VirtualAlloc on Windows or mmap on POSIX, and sbrk is too limited.
// Specifically, sbrk can only go up and down, and not "skip" over regions, and
// so we end up either never freeing memory to the system, or we can get stuck
// with holes.
//
// Atm wasm generally does *not* free memory back the system: once grown, we do
// not shrink back down (https://github.com/WebAssembly/design/issues/1397).
// However, that is expected to improve
// (https://github.com/WebAssembly/memory-control/blob/main/proposals/memory-control/Overview.md)
// and so we do not want to bake those limitations in here.
//
// Even without that issue, we want our system allocator to handle holes, that
// is, it should merge freed regions and allow allocating new content there of
// the full size, etc., so that we do not waste space. That means that the
// system allocator really does need to handle the general problem of allocating
// and freeing variable-sized chunks of memory in a random order, like malloc/
// free do. And so it makes sense to layer mimalloc on top of such an
// implementation.
//
// emmalloc makes sense for the lower level because it is small and simple while
// still fully handling merging of holes etc. It is not the most efficient
// allocator, but our assumption is that mimalloc needs to be fast while the
// system allocator underneath it is called much less frequently.
//
//---------------------------------------------
// init
//---------------------------------------------
void _mi_prim_mem_init( mi_os_mem_config_t* config) {
config->page_size = 64*MI_KiB; // WebAssembly has a fixed page size: 64KiB
config->alloc_granularity = 16;
config->has_overcommit = false;
config->must_free_whole = true;
config->has_virtual_reserve = false;
}
extern void emmalloc_free(void*);
int _mi_prim_free(void* addr, size_t size) {
MI_UNUSED(size);
emmalloc_free(addr);
return 0;
}
//---------------------------------------------
// Allocation
//---------------------------------------------
extern void* emmalloc_memalign(size_t alignment, size_t size);
// Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned.
int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) {
MI_UNUSED(try_alignment); MI_UNUSED(allow_large); MI_UNUSED(commit);
*is_large = false;
// TODO: Track the highest address ever seen; first uses of it are zeroes.
// That assumes no one else uses sbrk but us (they could go up,
// scribble, and then down), but we could assert on that perhaps.
*is_zero = false;
// emmalloc has a minimum alignment size.
#define MIN_EMMALLOC_ALIGN 8
if (try_alignment < MIN_EMMALLOC_ALIGN) {
try_alignment = MIN_EMMALLOC_ALIGN;
}
void* p = emmalloc_memalign(try_alignment, size);
*addr = p;
if (p == 0) {
return ENOMEM;
}
return 0;
}
//---------------------------------------------
// Commit/Reset
//---------------------------------------------
int _mi_prim_commit(void* addr, size_t size, bool* is_zero) {
MI_UNUSED(addr); MI_UNUSED(size);
// See TODO above.
*is_zero = false;
return 0;
}
int _mi_prim_decommit(void* addr, size_t size, bool* needs_recommit) {
MI_UNUSED(addr); MI_UNUSED(size);
*needs_recommit = false;
return 0;
}
int _mi_prim_reset(void* addr, size_t size) {
MI_UNUSED(addr); MI_UNUSED(size);
return 0;
}
int _mi_prim_protect(void* addr, size_t size, bool protect) {
MI_UNUSED(addr); MI_UNUSED(size); MI_UNUSED(protect);
return 0;
}
//---------------------------------------------
// Huge pages and NUMA nodes
//---------------------------------------------
int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) {
MI_UNUSED(hint_addr); MI_UNUSED(size); MI_UNUSED(numa_node);
*is_zero = true;
*addr = NULL;
return ENOSYS;
}
size_t _mi_prim_numa_node(void) {
return 0;
}
size_t _mi_prim_numa_node_count(void) {
return 1;
}
//----------------------------------------------------------------
// Clock
//----------------------------------------------------------------
#include <emscripten/html5.h>
mi_msecs_t _mi_prim_clock_now(void) {
return emscripten_date_now();
}
//----------------------------------------------------------------
// Process info
//----------------------------------------------------------------
void _mi_prim_process_info(mi_process_info_t* pinfo)
{
// use defaults
MI_UNUSED(pinfo);
}
//----------------------------------------------------------------
// Output
//----------------------------------------------------------------
#include <emscripten/console.h>
void _mi_prim_out_stderr( const char* msg) {
emscripten_console_error(msg);
}
//----------------------------------------------------------------
// Environment
//----------------------------------------------------------------
bool _mi_prim_getenv(const char* name, char* result, size_t result_size) {
// For code size reasons, do not support environ customization for now.
MI_UNUSED(name);
MI_UNUSED(result);
MI_UNUSED(result_size);
return false;
}
//----------------------------------------------------------------
// Random
//----------------------------------------------------------------
bool _mi_prim_random_buf(void* buf, size_t buf_len) {
int err = getentropy(buf, buf_len);
return !err;
}
//----------------------------------------------------------------
// Thread init/done
//----------------------------------------------------------------
#ifdef __EMSCRIPTEN_SHARED_MEMORY__
// use pthread local storage keys to detect thread ending
// (and used with MI_TLS_PTHREADS for the default heap)
pthread_key_t _mi_heap_default_key = (pthread_key_t)(-1);
static void mi_pthread_done(void* value) {
if (value!=NULL) {
_mi_thread_done((mi_heap_t*)value);
}
}
void _mi_prim_thread_init_auto_done(void) {
mi_assert_internal(_mi_heap_default_key == (pthread_key_t)(-1));
pthread_key_create(&_mi_heap_default_key, &mi_pthread_done);
}
void _mi_prim_thread_done_auto_done(void) {
// nothing to do
}
void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) {
if (_mi_heap_default_key != (pthread_key_t)(-1)) { // can happen during recursive invocation on freeBSD
pthread_setspecific(_mi_heap_default_key, heap);
}
}
#else
void _mi_prim_thread_init_auto_done(void) {
// nothing
}
void _mi_prim_thread_done_auto_done(void) {
// nothing
}
void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) {
MI_UNUSED(heap);
}
#endif

View file

@ -225,7 +225,9 @@ static malloc_zone_t mi_malloc_zone = {
// switch to version 9+ on OSX 10.6 to support memalign.
.memalign = &zone_memalign,
.free_definite_size = &zone_free_definite_size,
#if defined(MAC_OS_X_VERSION_10_7) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_7)
.pressure_relief = &zone_pressure_relief,
#endif
#if defined(MAC_OS_X_VERSION_10_14) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_14)
.claimed_address = &zone_claimed_address,
#endif

View file

@ -18,6 +18,9 @@ terms of the MIT license. A copy of the license can be found in the file
#define MI_USE_SBRK
#include "wasi/prim.c" // memory-grow or sbrk (Wasm)
#elif defined(__EMSCRIPTEN__)
#include "emscripten/prim.c" // emmalloc_*, + pthread support
#else
#include "unix/prim.c" // mmap() (Linux, macOSX, BSD, Illumnos, Haiku, DragonFly, etc.)

View file

@ -31,19 +31,26 @@ terms of the MIT license. A copy of the license can be found in the file
#include <sys/mman.h> // mmap
#include <unistd.h> // sysconf
#include <fcntl.h> // open, close, read, access
#if defined(__linux__)
#include <features.h>
#include <fcntl.h>
#if defined(MI_NO_THP)
#include <sys/prctl.h>
#endif
#if defined(__GLIBC__)
#include <linux/mman.h> // linux mmap flags
#else
#include <sys/mman.h>
#endif
#elif defined(__APPLE__)
#include <AvailabilityMacros.h>
#include <TargetConditionals.h>
#if !TARGET_IOS_IPHONE && !TARGET_IOS_SIMULATOR
#include <mach/vm_statistics.h>
#if !defined(TARGET_OS_OSX) || TARGET_OS_OSX // see issue #879, used to be (!TARGET_IOS_IPHONE && !TARGET_IOS_SIMULATOR)
#include <mach/vm_statistics.h> // VM_MAKE_TAG, VM_FLAGS_SUPERPAGE_SIZE_2MB, etc.
#endif
#if !defined(MAC_OS_X_VERSION_10_7)
#define MAC_OS_X_VERSION_10_7 1070
#endif
#elif defined(__FreeBSD__) || defined(__DragonFly__)
#include <sys/param.h>
@ -54,17 +61,19 @@ terms of the MIT license. A copy of the license can be found in the file
#include <sys/sysctl.h>
#endif
#if !defined(__HAIKU__) && !defined(__APPLE__) && !defined(__CYGWIN__)
#if !defined(__HAIKU__) && !defined(__APPLE__) && !defined(__CYGWIN__) && !defined(__OpenBSD__) && !defined(__sun)
#define MI_HAS_SYSCALL_H
#include <sys/syscall.h>
#endif
//------------------------------------------------------------------------------------
// Use syscalls for some primitives to allow for libraries that override open/read/close etc.
// and do allocation themselves; using syscalls prevents recursion when mimalloc is
// still initializing (issue #713)
//------------------------------------------------------------------------------------
#if defined(MI_HAS_SYSCALL_H) && defined(SYS_open) && defined(SYS_close) && defined(SYS_read) && defined(SYS_access)
static int mi_prim_open(const char* fpath, int open_flags) {
@ -80,7 +89,8 @@ static int mi_prim_access(const char *fpath, int mode) {
return syscall(SYS_access,fpath,mode);
}
#elif !defined(__APPLE__) // avoid unused warnings
#elif !defined(__sun) && \
(!defined(__APPLE__) || (MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_X_VERSION_10_7)) // avoid unused warnings on macOS and Solaris
static int mi_prim_open(const char* fpath, int open_flags) {
return open(fpath,open_flags);
@ -129,7 +139,8 @@ static bool mi_unix_detect_overcommit(void) {
return os_overcommit;
}
void _mi_prim_mem_init( mi_os_mem_config_t* config ) {
void _mi_prim_mem_init( mi_os_mem_config_t* config )
{
long psize = sysconf(_SC_PAGESIZE);
if (psize > 0) {
config->page_size = (size_t)psize;
@ -142,6 +153,24 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config ) {
#if defined(MREMAP_MAYMOVE) && defined(MREMAP_FIXED)
config->has_remap = true;
#endif
// disable transparent huge pages for this process?
#if (defined(__linux__) || defined(__ANDROID__)) && defined(PR_GET_THP_DISABLE)
#if defined(MI_NO_THP)
if (true)
#else
if (!mi_option_is_enabled(mi_option_allow_large_os_pages)) // disable THP also if large OS pages are not allowed in the options
#endif
{
int val = 0;
if (prctl(PR_GET_THP_DISABLE, &val, 0, 0, 0) != 0) {
// Most likely since distros often come with always/madvise settings.
val = 1;
// Disabling only for mimalloc process rather than touching system wide settings
(void)prctl(PR_SET_THP_DISABLE, &val, 0, 0, 0);
}
}
#endif
}
@ -283,7 +312,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
*is_large = true;
p = mi_unix_mmap_prim(addr, size, try_alignment, protect_flags, lflags, lfd);
#ifdef MAP_HUGE_1GB
if (p == NULL && (lflags & MAP_HUGE_1GB) != 0) {
if (p == NULL && (lflags & MAP_HUGE_1GB) == MAP_HUGE_1GB) {
mi_huge_pages_available = false; // don't try huge 1GiB pages again
_mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (errno: %i)\n", errno);
lflags = ((lflags & ~MAP_HUGE_1GB) | MAP_HUGE_2MB);
@ -317,7 +346,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
#elif defined(__sun)
if (allow_large && _mi_os_use_large_page(size, try_alignment)) {
struct memcntl_mha cmd = {0};
cmd.mha_pagesize = large_os_page_size;
cmd.mha_pagesize = _mi_os_large_page_size();
cmd.mha_cmd = MHA_MAPSIZE_VA;
if (memcntl((caddr_t)p, size, MC_HAT_ADVISE, (caddr_t)&cmd, 0, 0) == 0) {
*is_large = true;
@ -738,28 +767,20 @@ bool _mi_prim_getenv(const char* name, char* result, size_t result_size) {
// Random
//----------------------------------------------------------------
#if defined(__APPLE__)
#include <AvailabilityMacros.h>
#if defined(MAC_OS_X_VERSION_10_10) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_10
#if defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_15) && (MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_15)
#include <CommonCrypto/CommonCryptoError.h>
#include <CommonCrypto/CommonRandom.h>
#endif
bool _mi_prim_random_buf(void* buf, size_t buf_len) {
#if defined(MAC_OS_X_VERSION_10_15) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_15
// We prefere CCRandomGenerateBytes as it returns an error code while arc4random_buf
// may fail silently on macOS. See PR #390, and <https://opensource.apple.com/source/Libc/Libc-1439.40.11/gen/FreeBSD/arc4random.c.auto.html>
return (CCRandomGenerateBytes(buf, buf_len) == kCCSuccess);
#else
// fall back on older macOS
arc4random_buf(buf, buf_len);
return true;
#endif
// We prefere CCRandomGenerateBytes as it returns an error code while arc4random_buf
// may fail silently on macOS. See PR #390, and <https://opensource.apple.com/source/Libc/Libc-1439.40.11/gen/FreeBSD/arc4random.c.auto.html>
return (CCRandomGenerateBytes(buf, buf_len) == kCCSuccess);
}
#elif defined(__ANDROID__) || defined(__DragonFly__) || \
defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
defined(__sun)
defined(__sun) || \
(defined(__APPLE__) && (MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_7))
#include <stdlib.h>
bool _mi_prim_random_buf(void* buf, size_t buf_len) {
@ -767,11 +788,10 @@ bool _mi_prim_random_buf(void* buf, size_t buf_len) {
return true;
}
#elif defined(__linux__) || defined(__HAIKU__)
#elif defined(__APPLE__) || defined(__linux__) || defined(__HAIKU__) // also for old apple versions < 10.7 (issue #829)
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
bool _mi_prim_random_buf(void* buf, size_t buf_len) {
@ -842,7 +862,9 @@ void _mi_prim_thread_init_auto_done(void) {
}
void _mi_prim_thread_done_auto_done(void) {
// nothing to do
if (_mi_heap_default_key != (pthread_key_t)(-1)) { // do not leak the key, see issue #809
pthread_key_delete(_mi_heap_default_key);
}
}
void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) {

View file

@ -12,6 +12,9 @@ terms of the MIT license. A copy of the license can be found in the file
#include "mimalloc/atomic.h"
#include "mimalloc/prim.h"
#include <stdio.h> // fputs
#include <stdlib.h> // getenv
//---------------------------------------------
// Initialize
//---------------------------------------------
@ -41,6 +44,8 @@ int _mi_prim_free(void* addr, size_t size ) {
//---------------------------------------------
#if defined(MI_USE_SBRK)
#include <unistd.h> // for sbrk
static void* mi_memory_grow( size_t size ) {
void* p = sbrk(size);
if (p == (void*)(-1)) return NULL;

View file

@ -16,7 +16,9 @@ terms of the MIT license. A copy of the license can be found in the file
#include "mimalloc/internal.h"
#include "mimalloc/atomic.h"
#if (MI_INTPTR_SIZE==8)
#if (MI_INTPTR_SIZE>=8) && MI_TRACK_ASAN
#define MI_MAX_ADDRESS ((size_t)140 << 40) // 140TB (see issue #881)
#elif (MI_INTPTR_SIZE >= 8)
#define MI_MAX_ADDRESS ((size_t)40 << 40) // 40TB (to include huge page areas)
#else
#define MI_MAX_ADDRESS ((size_t)2 << 30) // 2Gb
@ -29,6 +31,7 @@ terms of the MIT license. A copy of the license can be found in the file
static _Atomic(uintptr_t) mi_segment_map[MI_SEGMENT_MAP_WSIZE + 1]; // 2KiB per TB with 64MiB segments
static size_t mi_segment_map_index_of(const mi_segment_t* segment, size_t* bitidx) {
// note: segment can be invalid or NULL.
mi_assert_internal(_mi_ptr_segment(segment + 1) == segment); // is it aligned on MI_SEGMENT_SIZE?
if ((uintptr_t)segment >= MI_MAX_ADDRESS) {
*bitidx = 0;
@ -70,8 +73,7 @@ void _mi_segment_map_freed_at(const mi_segment_t* segment) {
// Determine the segment belonging to a pointer or NULL if it is not in a valid segment.
static mi_segment_t* _mi_segment_of(const void* p) {
if (p == NULL) return NULL;
mi_segment_t* segment = _mi_ptr_segment(p);
mi_assert_internal(segment != NULL);
mi_segment_t* segment = _mi_ptr_segment(p); // segment can be NULL
size_t bitidx;
size_t index = mi_segment_map_index_of(segment, &bitidx);
// fast path: for any pointer to valid small/medium/large object or first MI_SEGMENT_SIZE in huge

View file

@ -1,5 +1,5 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2018-2020, Microsoft Research, Daan Leijen
Copyright (c) 2018-2024, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
@ -26,14 +26,15 @@ static uint8_t* mi_segment_raw_page_start(const mi_segment_t* segment, const mi_
- small pages (64KiB), 64 in one segment
- medium pages (512KiB), 8 in one segment
- large pages (4MiB), 1 in one segment
- huge blocks > MI_LARGE_OBJ_SIZE_MAX become large segment with 1 page
- huge segments have 1 page in one segment that can be larger than `MI_SEGMENT_SIZE`.
it is used for blocks `> MI_LARGE_OBJ_SIZE_MAX` or with alignment `> MI_BLOCK_ALIGNMENT_MAX`.
In any case the memory for a segment is virtual and usually committed on demand.
The memory for a segment is usually committed on demand.
(i.e. we are careful to not touch the memory until we actually allocate a block there)
If a thread ends, it "abandons" pages with used blocks
and there is an abandoned segment list whose segments can
be reclaimed by still running threads, much like work-stealing.
If a thread ends, it "abandons" pages that still contain live blocks.
Such segments are abondoned and these can be reclaimed by still running threads,
(much like work-stealing).
-------------------------------------------------------------------------------- */
@ -143,6 +144,7 @@ static bool mi_segment_is_valid(const mi_segment_t* segment, mi_segments_tld_t*
mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie);
mi_assert_internal(segment->used <= segment->capacity);
mi_assert_internal(segment->abandoned <= segment->used);
mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM || segment->capacity == 1); // one large or huge page per segment
size_t nfree = 0;
for (size_t i = 0; i < segment->capacity; i++) {
const mi_page_t* const page = &segment->pages[i];
@ -152,6 +154,7 @@ static bool mi_segment_is_valid(const mi_segment_t* segment, mi_segments_tld_t*
if (page->segment_in_use) {
mi_assert_expensive(!mi_pages_purge_contains(page, tld));
}
mi_assert_internal(page->is_huge == (segment->page_kind == MI_PAGE_HUGE));
}
mi_assert_internal(nfree + segment->used == segment->capacity);
// mi_assert_internal(segment->thread_id == _mi_thread_id() || (segment->thread_id==0)); // or 0
@ -244,19 +247,19 @@ static void mi_page_purge(mi_segment_t* segment, mi_page_t* page, mi_segments_tl
mi_assert_internal(!page->segment_in_use);
if (!segment->allow_purge) return;
mi_assert_internal(page->used == 0);
mi_assert_internal(page->free == NULL);
mi_assert_expensive(!mi_pages_purge_contains(page, tld));
size_t psize;
void* start = mi_segment_raw_page_start(segment, page, &psize);
void* start = mi_segment_raw_page_start(segment, page, &psize);
const bool needs_recommit = _mi_os_purge(start, psize, tld->stats);
if (needs_recommit) { page->is_committed = false; }
page->used = 0;
}
static bool mi_page_ensure_committed(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) {
if (page->is_committed) return true;
mi_assert_internal(segment->allow_decommit);
mi_assert_expensive(!mi_pages_purge_contains(page, tld));
size_t psize;
uint8_t* start = mi_segment_raw_page_start(segment, page, &psize);
bool is_zero = false;
@ -265,9 +268,10 @@ static bool mi_page_ensure_committed(mi_segment_t* segment, mi_page_t* page, mi_
if (!ok) return false; // failed to commit!
page->is_committed = true;
page->used = 0;
page->free = NULL;
page->is_zero_init = is_zero;
if (gsize > 0) {
mi_segment_protect_range(start + psize, gsize, true);
if (gsize > 0) {
mi_segment_protect_range(start + psize, gsize, true);
}
return true;
}
@ -277,18 +281,30 @@ static bool mi_page_ensure_committed(mi_segment_t* segment, mi_page_t* page, mi_
The free page queue
----------------------------------------------------------- */
// we re-use the `used` field for the expiration counter. Since this is a
// a 32-bit field while the clock is always 64-bit we need to guard
// against overflow, we use substraction to check for expiry which work
// we re-use the `free` field for the expiration counter. Since this is a
// a pointer size field while the clock is always 64-bit we need to guard
// against overflow, we use substraction to check for expiry which works
// as long as the reset delay is under (2^30 - 1) milliseconds (~12 days)
static void mi_page_purge_set_expire(mi_page_t* page) {
mi_assert_internal(page->used == 0);
uint32_t expire = (uint32_t)_mi_clock_now() + mi_option_get(mi_option_purge_delay);
page->used = expire;
static uint32_t mi_page_get_expire( mi_page_t* page ) {
return (uint32_t)((uintptr_t)page->free);
}
static void mi_page_set_expire( mi_page_t* page, uint32_t expire ) {
page->free = (mi_block_t*)((uintptr_t)expire);
}
static void mi_page_purge_set_expire(mi_page_t* page) {
mi_assert_internal(mi_page_get_expire(page)==0);
uint32_t expire = (uint32_t)_mi_clock_now() + mi_option_get(mi_option_purge_delay);
mi_page_set_expire(page, expire);
}
// we re-use the `free` field for the expiration counter. Since this is a
// a pointer size field while the clock is always 64-bit we need to guard
// against overflow, we use substraction to check for expiry which work
// as long as the reset delay is under (2^30 - 1) milliseconds (~12 days)
static bool mi_page_purge_is_expired(mi_page_t* page, mi_msecs_t now) {
int32_t expire = (int32_t)(page->used);
int32_t expire = (int32_t)mi_page_get_expire(page);
return (((int32_t)now - expire) >= 0);
}
@ -303,7 +319,7 @@ static void mi_segment_schedule_purge(mi_segment_t* segment, mi_page_t* page, mi
// purge immediately?
mi_page_purge(segment, page, tld);
}
else {
else if (mi_option_get(mi_option_purge_delay) > 0) { // no purging if the delay is negative
// otherwise push on the delayed page reset queue
mi_page_queue_t* pq = &tld->pages_purge;
// push on top
@ -327,14 +343,14 @@ static void mi_page_purge_remove(mi_page_t* page, mi_segments_tld_t* tld) {
mi_page_queue_t* pq = &tld->pages_purge;
mi_assert_internal(pq!=NULL);
mi_assert_internal(!page->segment_in_use);
mi_assert_internal(page->used != 0);
mi_assert_internal(mi_page_get_expire(page) != 0);
mi_assert_internal(mi_pages_purge_contains(page, tld));
if (page->prev != NULL) page->prev->next = page->next;
if (page->next != NULL) page->next->prev = page->prev;
if (page == pq->last) pq->last = page->prev;
if (page == pq->first) pq->first = page->next;
page->next = page->prev = NULL;
page->used = 0;
mi_page_set_expire(page,0);
}
static void mi_segment_remove_all_purges(mi_segment_t* segment, bool force_purge, mi_segments_tld_t* tld) {
@ -343,7 +359,7 @@ static void mi_segment_remove_all_purges(mi_segment_t* segment, bool force_purge
mi_page_t* page = &segment->pages[i];
if (!page->segment_in_use) {
mi_page_purge_remove(page, tld);
if (force_purge) {
if (force_purge && page->is_committed) {
mi_page_purge(segment, page, tld);
}
}
@ -353,14 +369,14 @@ static void mi_segment_remove_all_purges(mi_segment_t* segment, bool force_purge
}
}
static void mi_pages_try_purge(mi_segments_tld_t* tld) {
static void mi_pages_try_purge(bool force, mi_segments_tld_t* tld) {
if (mi_option_get(mi_option_purge_delay) < 0) return; // purging is not allowed
mi_msecs_t now = _mi_clock_now();
mi_page_queue_t* pq = &tld->pages_purge;
// from oldest up to the first that has not expired yet
mi_page_t* page = pq->last;
while (page != NULL && mi_page_purge_is_expired(page,now)) {
while (page != NULL && (force || mi_page_purge_is_expired(page,now))) {
mi_page_t* const prev = page->prev; // save previous field
mi_page_purge_remove(page, tld); // remove from the list to maintain invariant for mi_page_purge
mi_page_purge(_mi_page_segment(page), page, tld);
@ -406,26 +422,24 @@ static uint8_t* mi_segment_raw_page_start(const mi_segment_t* segment, const mi_
#endif
if (page_size != NULL) *page_size = psize;
mi_assert_internal(page->xblock_size == 0 || _mi_ptr_page(p) == page);
mi_assert_internal(page->block_size == 0 || _mi_ptr_page(p) == page);
mi_assert_internal(_mi_ptr_segment(p) == segment);
return p;
}
// Start of the page available memory; can be used on uninitialized pages (only `segment_idx` must be set)
uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t block_size, size_t* page_size, size_t* pre_size)
uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size)
{
size_t psize;
uint8_t* p = mi_segment_raw_page_start(segment, page, &psize);
if (pre_size != NULL) *pre_size = 0;
if (page->segment_idx == 0 && block_size > 0 && segment->page_kind <= MI_PAGE_MEDIUM) {
const size_t block_size = mi_page_block_size(page);
if (/*page->segment_idx == 0 &&*/ block_size > 0 && block_size <= MI_MAX_ALIGN_GUARANTEE) {
// for small and medium objects, ensure the page start is aligned with the block size (PR#66 by kickunderscore)
mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM);
size_t adjust = block_size - ((uintptr_t)p % block_size);
if (psize - adjust >= block_size) {
if (adjust < block_size) {
p += adjust;
psize -= adjust;
if (pre_size != NULL) *pre_size = adjust;
}
if (adjust < block_size && psize >= block_size + adjust) {
p += adjust;
psize -= adjust;
mi_assert_internal((uintptr_t)p % block_size == 0);
}
}
@ -436,6 +450,7 @@ uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* pa
return p;
}
static size_t mi_segment_calculate_sizes(size_t capacity, size_t required, size_t* pre_size, size_t* info_size)
{
const size_t minsize = sizeof(mi_segment_t) + ((capacity - 1) * sizeof(mi_page_t)) + 16 /* padding */;
@ -480,6 +495,11 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se
segment->thread_id = 0;
_mi_segment_map_freed_at(segment);
mi_segments_track_size(-((long)segment_size),tld);
if (segment->was_reclaimed) {
tld->reclaim_count--;
segment->was_reclaimed = false;
}
if (MI_SECURE != 0) {
mi_assert_internal(!segment->memid.is_pinned);
mi_segment_protect(segment, false, tld->os); // ensure no more guard pages are set
@ -491,24 +511,24 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se
for (size_t i = 0; i < segment->capacity; i++) {
mi_page_t* page = &segment->pages[i];
if (page->is_committed) { committed_size += page_size; }
if (!page->is_committed) { fully_committed = false; }
if (!page->is_committed) { fully_committed = false; }
}
MI_UNUSED(fully_committed);
mi_assert_internal((fully_committed && committed_size == segment_size) || (!fully_committed && committed_size < segment_size));
_mi_abandoned_await_readers(); // prevent ABA issue if concurrent readers try to access our memory (that might be purged)
_mi_arena_free(segment, segment_size, committed_size, segment->memid, tld->stats);
}
// called by threads that are terminating to free cached segments
void _mi_segment_thread_collect(mi_segments_tld_t* tld) {
MI_UNUSED(tld);
#if MI_DEBUG>=2
// called from `heap_collect`.
void _mi_segments_collect(bool force, mi_segments_tld_t* tld) {
mi_pages_try_purge(force,tld);
#if MI_DEBUG>=2
if (!_mi_is_main_thread()) {
mi_assert_internal(tld->pages_purge.first == NULL);
mi_assert_internal(tld->pages_purge.last == NULL);
}
#endif
#endif
}
@ -524,7 +544,7 @@ static mi_segment_t* mi_segment_os_alloc(bool eager_delayed, size_t page_alignme
bool allow_large = (!eager_delayed && (MI_SECURE == 0)); // only allow large OS pages once we are no longer lazy
size_t align_offset = 0;
size_t alignment = MI_SEGMENT_SIZE;
if (page_alignment >= MI_ALIGN_HUGE) {
if (page_alignment >= MI_BLOCK_ALIGNMENT_MAX) {
alignment = page_alignment;
align_offset = _mi_align_up(pre_size, MI_SEGMENT_SIZE);
segment_size = segment_size + (align_offset - pre_size); // adjust the segment size
@ -540,7 +560,7 @@ static mi_segment_t* mi_segment_os_alloc(bool eager_delayed, size_t page_alignme
segment = (mi_segment_t*)_mi_os_alloc_expandable(segment_size, alignment, future_reserve, &memid, tld_os->stats);
}
else {
mi_assert_internal(page_alignment == 0 || page_alignment >= MI_ALIGN_HUGE);
mi_assert_internal(page_alignment == 0 || page_alignment >= MI_BLOCK_ALIGNMENT_MAX);
segment = (mi_segment_t*)_mi_arena_alloc_aligned(segment_size, alignment, align_offset, commit, allow_large, req_arena_id, &memid, tld_os);
}
@ -556,9 +576,9 @@ static mi_segment_t* mi_segment_os_alloc(bool eager_delayed, size_t page_alignme
// commit failed; we cannot touch the memory: free the segment directly and return `NULL`
_mi_arena_free(segment, segment_size, 0, memid, tld_os->stats);
return NULL;
}
}
}
MI_UNUSED(info_size);
segment->memid = memid;
segment->allow_decommit = !memid.is_pinned;
@ -601,25 +621,25 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
tld->peak_count < (size_t)mi_option_get(mi_option_eager_commit_delay));
const bool eager = !eager_delayed && mi_option_is_enabled(mi_option_eager_commit);
const bool init_commit = eager; // || (page_kind >= MI_PAGE_LARGE);
// Allocate the segment from the OS (segment_size can change due to alignment)
mi_segment_t* segment = mi_segment_os_alloc(eager_delayed, page_alignment, req_arena_id, pre_size, info_size, init_commit, init_segment_size, tld, os_tld);
if (segment == NULL) return NULL;
mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0);
mi_assert_internal(segment->memid.is_pinned ? segment->memid.initially_committed : true);
mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL); // tsan
// zero the segment info (but not the `mem` fields)
ptrdiff_t ofs = offsetof(mi_segment_t, next);
_mi_memzero((uint8_t*)segment + ofs, info_size - ofs);
// initialize pages info
const bool is_huge = (page_kind == MI_PAGE_HUGE);
for (size_t i = 0; i < capacity; i++) {
mi_assert_internal(i <= 255);
segment->pages[i].segment_idx = (uint8_t)i;
segment->pages[i].is_committed = segment->memid.initially_committed;
segment->pages[i].is_zero_init = segment->memid.initially_zero;
segment->pages[i].is_huge = is_huge;
}
// initialize
@ -629,7 +649,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
segment->segment_info_size = pre_size;
segment->thread_id = _mi_thread_id();
segment->cookie = _mi_ptr_cookie(segment);
// set protection
mi_segment_protect(segment, true, tld->os);
@ -648,7 +668,7 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t
// don't purge as we are freeing now
mi_segment_remove_all_purges(segment, false /* don't force as we are about to free */, tld);
mi_segment_remove_from_free_queue(segment, tld);
mi_assert_expensive(!mi_segment_queue_contains(&tld->small_free, segment));
mi_assert_expensive(!mi_segment_queue_contains(&tld->medium_free, segment));
mi_assert(segment->next == NULL);
@ -675,10 +695,10 @@ static bool mi_segment_page_claim(mi_segment_t* segment, mi_page_t* page, mi_seg
// check commit
if (!mi_page_ensure_committed(segment, page, tld)) return false;
// set in-use before doing unreset to prevent delayed reset
page->segment_in_use = true;
segment->used++;
segment->used++;
mi_assert_internal(page->segment_in_use && page->is_committed && page->used==0 && !mi_pages_purge_contains(page,tld));
mi_assert_internal(segment->used <= segment->capacity);
if (segment->used == segment->capacity && segment->page_kind <= MI_PAGE_MEDIUM) {
@ -711,15 +731,19 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_seg
page->is_zero_init = false;
page->segment_in_use = false;
// zero the page data, but not the segment fields and capacity, and block_size (for page size calculations)
uint32_t xblock_size = page->xblock_size;
// zero the page data, but not the segment fields and capacity, page start, and block_size (for page size calculations)
size_t block_size = page->block_size;
uint8_t block_size_shift = page->block_size_shift;
uint8_t* page_start = page->page_start;
uint16_t capacity = page->capacity;
uint16_t reserved = page->reserved;
ptrdiff_t ofs = offsetof(mi_page_t,capacity);
_mi_memzero((uint8_t*)page + ofs, sizeof(*page) - ofs);
page->capacity = capacity;
page->reserved = reserved;
page->xblock_size = xblock_size;
page->block_size = block_size;
page->block_size_shift = block_size_shift;
page->page_start = page_start;
segment->used--;
// schedule purge
@ -734,7 +758,7 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld)
mi_assert(page != NULL);
mi_segment_t* segment = _mi_page_segment(page);
mi_assert_expensive(mi_segment_is_valid(segment,tld));
mi_pages_try_purge(tld);
mi_pages_try_purge(false /*force?*/, tld);
// mark it as free now
mi_segment_page_clear(segment, page, tld);
@ -749,9 +773,11 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld)
mi_segment_abandon(segment,tld);
}
else if (segment->used + 1 == segment->capacity) {
mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM); // for now we only support small and medium pages
// move back to segments free list
mi_segment_insert_in_free_queue(segment,tld);
mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM); // large and huge pages are always the single page in a segment
if (segment->page_kind <= MI_PAGE_MEDIUM) {
// move back to segments free list
mi_segment_insert_in_free_queue(segment,tld);
}
}
}
}
@ -763,171 +789,21 @@ Abandonment
When threads terminate, they can leave segments with
live blocks (reached through other threads). Such segments
are "abandoned" and will be reclaimed by other threads to
reuse their pages and/or free them eventually
reuse their pages and/or free them eventually. The
`thread_id` of such segments is 0.
We maintain a global list of abandoned segments that are
reclaimed on demand. Since this is shared among threads
the implementation needs to avoid the A-B-A problem on
popping abandoned segments: <https://en.wikipedia.org/wiki/ABA_problem>
We use tagged pointers to avoid accidentally identifying
reused segments, much like stamped references in Java.
Secondly, we maintain a reader counter to avoid resetting
or decommitting segments that have a pending read operation.
When a block is freed in an abandoned segment, the segment
is reclaimed into that thread.
Note: the current implementation is one possible design;
another way might be to keep track of abandoned segments
in the regions. This would have the advantage of keeping
all concurrent code in one place and not needing to deal
with ABA issues. The drawback is that it is unclear how to
scan abandoned segments efficiently in that case as they
would be spread among all other segments in the regions.
Moreover, if threads are looking for a fresh segment, they
will first consider abondoned segments -- these can be found
by scanning the arena memory
(segments outside arena memoryare only reclaimed by a free).
----------------------------------------------------------- */
// Use the bottom 20-bits (on 64-bit) of the aligned segment pointers
// to put in a tag that increments on update to avoid the A-B-A problem.
#define MI_TAGGED_MASK MI_SEGMENT_MASK
typedef uintptr_t mi_tagged_segment_t;
static mi_segment_t* mi_tagged_segment_ptr(mi_tagged_segment_t ts) {
return (mi_segment_t*)(ts & ~MI_TAGGED_MASK);
}
static mi_tagged_segment_t mi_tagged_segment(mi_segment_t* segment, mi_tagged_segment_t ts) {
mi_assert_internal(((uintptr_t)segment & MI_TAGGED_MASK) == 0);
uintptr_t tag = ((ts & MI_TAGGED_MASK) + 1) & MI_TAGGED_MASK;
return ((uintptr_t)segment | tag);
}
// This is a list of visited abandoned pages that were full at the time.
// this list migrates to `abandoned` when that becomes NULL. The use of
// this list reduces contention and the rate at which segments are visited.
static mi_decl_cache_align _Atomic(mi_segment_t*) abandoned_visited; // = NULL
// The abandoned page list (tagged as it supports pop)
static mi_decl_cache_align _Atomic(mi_tagged_segment_t) abandoned; // = NULL
// Maintain these for debug purposes (these counts may be a bit off)
static mi_decl_cache_align _Atomic(size_t) abandoned_count;
static mi_decl_cache_align _Atomic(size_t) abandoned_visited_count;
// We also maintain a count of current readers of the abandoned list
// in order to prevent resetting/decommitting segment memory if it might
// still be read.
static mi_decl_cache_align _Atomic(size_t) abandoned_readers; // = 0
// Push on the visited list
static void mi_abandoned_visited_push(mi_segment_t* segment) {
mi_assert_internal(segment->thread_id == 0);
mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_segment_t,&segment->abandoned_next) == NULL);
mi_assert_internal(segment->next == NULL && segment->prev == NULL);
mi_assert_internal(segment->used > 0);
mi_segment_t* anext = mi_atomic_load_ptr_relaxed(mi_segment_t, &abandoned_visited);
do {
mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, anext);
} while (!mi_atomic_cas_ptr_weak_release(mi_segment_t, &abandoned_visited, &anext, segment));
mi_atomic_increment_relaxed(&abandoned_visited_count);
}
// Move the visited list to the abandoned list.
static bool mi_abandoned_visited_revisit(void)
{
// quick check if the visited list is empty
if (mi_atomic_load_ptr_relaxed(mi_segment_t, &abandoned_visited) == NULL) return false;
// grab the whole visited list
mi_segment_t* first = mi_atomic_exchange_ptr_acq_rel(mi_segment_t, &abandoned_visited, NULL);
if (first == NULL) return false;
// first try to swap directly if the abandoned list happens to be NULL
mi_tagged_segment_t afirst;
mi_tagged_segment_t ts = mi_atomic_load_relaxed(&abandoned);
if (mi_tagged_segment_ptr(ts)==NULL) {
size_t count = mi_atomic_load_relaxed(&abandoned_visited_count);
afirst = mi_tagged_segment(first, ts);
if (mi_atomic_cas_strong_acq_rel(&abandoned, &ts, afirst)) {
mi_atomic_add_relaxed(&abandoned_count, count);
mi_atomic_sub_relaxed(&abandoned_visited_count, count);
return true;
}
}
// find the last element of the visited list: O(n)
mi_segment_t* last = first;
mi_segment_t* next;
while ((next = mi_atomic_load_ptr_relaxed(mi_segment_t, &last->abandoned_next)) != NULL) {
last = next;
}
// and atomically prepend to the abandoned list
// (no need to increase the readers as we don't access the abandoned segments)
mi_tagged_segment_t anext = mi_atomic_load_relaxed(&abandoned);
size_t count;
do {
count = mi_atomic_load_relaxed(&abandoned_visited_count);
mi_atomic_store_ptr_release(mi_segment_t, &last->abandoned_next, mi_tagged_segment_ptr(anext));
afirst = mi_tagged_segment(first, anext);
} while (!mi_atomic_cas_weak_release(&abandoned, &anext, afirst));
mi_atomic_add_relaxed(&abandoned_count, count);
mi_atomic_sub_relaxed(&abandoned_visited_count, count);
return true;
}
// Push on the abandoned list.
static void mi_abandoned_push(mi_segment_t* segment) {
mi_assert_internal(segment->thread_id == 0);
mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_segment_t, &segment->abandoned_next) == NULL);
mi_assert_internal(segment->next == NULL && segment->prev == NULL);
mi_assert_internal(segment->used > 0);
mi_tagged_segment_t next;
mi_tagged_segment_t ts = mi_atomic_load_relaxed(&abandoned);
do {
mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, mi_tagged_segment_ptr(ts));
next = mi_tagged_segment(segment, ts);
} while (!mi_atomic_cas_weak_release(&abandoned, &ts, next));
mi_atomic_increment_relaxed(&abandoned_count);
}
// Wait until there are no more pending reads on segments that used to be in the abandoned list
// legacy: Wait until there are no more pending reads on segments that used to be in the abandoned list
void _mi_abandoned_await_readers(void) {
size_t n;
do {
n = mi_atomic_load_acquire(&abandoned_readers);
if (n != 0) mi_atomic_yield();
} while (n != 0);
}
// Pop from the abandoned list
static mi_segment_t* mi_abandoned_pop(void) {
mi_segment_t* segment;
// Check efficiently if it is empty (or if the visited list needs to be moved)
mi_tagged_segment_t ts = mi_atomic_load_relaxed(&abandoned);
segment = mi_tagged_segment_ptr(ts);
if mi_likely(segment == NULL) {
if mi_likely(!mi_abandoned_visited_revisit()) { // try to swap in the visited list on NULL
return NULL;
}
}
// Do a pop. We use a reader count to prevent
// a segment to be decommitted while a read is still pending,
// and a tagged pointer to prevent A-B-A link corruption.
// (this is called from `region.c:_mi_mem_free` for example)
mi_atomic_increment_relaxed(&abandoned_readers); // ensure no segment gets decommitted
mi_tagged_segment_t next = 0;
ts = mi_atomic_load_acquire(&abandoned);
do {
segment = mi_tagged_segment_ptr(ts);
if (segment != NULL) {
mi_segment_t* anext = mi_atomic_load_ptr_relaxed(mi_segment_t, &segment->abandoned_next);
next = mi_tagged_segment(anext, ts); // note: reads the segment's `abandoned_next` field so should not be decommitted
}
} while (segment != NULL && !mi_atomic_cas_weak_acq_rel(&abandoned, &ts, next));
mi_atomic_decrement_relaxed(&abandoned_readers); // release reader lock
if (segment != NULL) {
mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL);
mi_atomic_decrement_relaxed(&abandoned_count);
}
return segment;
// nothing needed
}
/* -----------------------------------------------------------
@ -937,22 +813,27 @@ static mi_segment_t* mi_abandoned_pop(void) {
static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
mi_assert_internal(segment->used == segment->abandoned);
mi_assert_internal(segment->used > 0);
mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_segment_t, &segment->abandoned_next) == NULL);
mi_assert_expensive(mi_segment_is_valid(segment, tld));
// Potentially force purge. Only abandoned segments in arena memory can be
// reclaimed without a free so if a segment is not from an arena we force purge here to be conservative.
mi_pages_try_purge(false /*force?*/,tld);
const bool force_purge = (segment->memid.memkind != MI_MEM_ARENA) || mi_option_is_enabled(mi_option_abandoned_page_purge);
mi_segment_remove_all_purges(segment, force_purge, tld);
// remove the segment from the free page queue if needed
mi_pages_try_purge(tld);
mi_segment_remove_all_purges(segment, mi_option_is_enabled(mi_option_abandoned_page_purge), tld);
mi_segment_remove_from_free_queue(segment, tld);
mi_assert_internal(segment->next == NULL && segment->prev == NULL);
// all pages in the segment are abandoned; add it to the abandoned list
_mi_stat_increase(&tld->stats->segments_abandoned, 1);
mi_segments_track_size(-((long)segment->segment_size), tld);
segment->thread_id = 0;
segment->abandoned_visits = 0;
mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL);
mi_abandoned_push(segment);
if (segment->was_reclaimed) {
tld->reclaim_count--;
segment->was_reclaimed = false;
}
_mi_arena_segment_mark_abandoned(segment);
}
void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) {
@ -978,7 +859,6 @@ void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) {
// Possibly clear pages and check if free space is available
static bool mi_segment_check_free(mi_segment_t* segment, size_t block_size, bool* all_pages_free)
{
mi_assert_internal(block_size < MI_HUGE_BLOCK_SIZE);
bool has_page = false;
size_t pages_used = 0;
size_t pages_used_empty = 0;
@ -994,7 +874,7 @@ static bool mi_segment_check_free(mi_segment_t* segment, size_t block_size, bool
pages_used_empty++;
has_page = true;
}
else if (page->xblock_size == block_size && mi_page_has_any_available(page)) {
else if (mi_page_block_size(page) == block_size && mi_page_has_any_available(page)) {
// a page has available free blocks of the right size
has_page = true;
}
@ -1015,11 +895,13 @@ static bool mi_segment_check_free(mi_segment_t* segment, size_t block_size, bool
// Reclaim a segment; returns NULL if the segment was freed
// set `right_page_reclaimed` to `true` if it reclaimed a page of the right `block_size` that was not full.
static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, size_t requested_block_size, bool* right_page_reclaimed, mi_segments_tld_t* tld) {
mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_segment_t, &segment->abandoned_next) == NULL);
if (right_page_reclaimed != NULL) { *right_page_reclaimed = false; }
segment->thread_id = _mi_thread_id();
// can be 0 still with abandoned_next, or already a thread id for segments outside an arena that are reclaimed on a free.
mi_assert_internal(mi_atomic_load_relaxed(&segment->thread_id) == 0 || mi_atomic_load_relaxed(&segment->thread_id) == _mi_thread_id());
mi_atomic_store_release(&segment->thread_id, _mi_thread_id());
segment->abandoned_visits = 0;
segment->was_reclaimed = true;
tld->reclaim_count++;
mi_segments_track_size((long)segment->segment_size, tld);
mi_assert_internal(segment->next == NULL && segment->prev == NULL);
mi_assert_expensive(mi_segment_is_valid(segment, tld));
@ -1038,7 +920,6 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap,
// set the heap again and allow heap thread delayed free again.
mi_page_set_heap(page, heap);
_mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, true); // override never (after heap is set)
// TODO: should we not collect again given that we just collected in `check_free`?
_mi_page_free_collect(page, false); // ensure used count is up to date
if (mi_page_all_free(page)) {
// if everything free already, clear the page directly
@ -1047,7 +928,7 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap,
else {
// otherwise reclaim it into the heap
_mi_page_reclaim(heap, page);
if (requested_block_size == page->xblock_size && mi_page_has_any_available(page)) {
if (requested_block_size == mi_page_block_size(page) && mi_page_has_any_available(page)) {
if (right_page_reclaimed != NULL) { *right_page_reclaimed = true; }
}
}
@ -1073,23 +954,54 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap,
}
}
// attempt to reclaim a particular segment (called from multi threaded free `alloc.c:mi_free_block_mt`)
bool _mi_segment_attempt_reclaim(mi_heap_t* heap, mi_segment_t* segment) {
if (mi_atomic_load_relaxed(&segment->thread_id) != 0) return false; // it is not abandoned
// don't reclaim more from a free than half the current segments
// this is to prevent a pure free-ing thread to start owning too many segments
if (heap->tld->segments.reclaim_count * 2 > heap->tld->segments.count) return false;
if (_mi_arena_segment_clear_abandoned(segment)) { // atomically unabandon
mi_segment_t* res = mi_segment_reclaim(segment, heap, 0, NULL, &heap->tld->segments);
mi_assert_internal(res == segment);
return (res != NULL);
}
return false;
}
void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld) {
mi_segment_t* segment;
while ((segment = mi_abandoned_pop()) != NULL) {
mi_arena_field_cursor_t current; _mi_arena_field_cursor_init(heap, &current);
while ((segment = _mi_arena_segment_clear_abandoned_next(&current)) != NULL) {
mi_segment_reclaim(segment, heap, 0, NULL, tld);
}
}
static long mi_segment_get_reclaim_tries(void) {
// limit the tries to 10% (default) of the abandoned segments with at least 8 and at most 1024 tries.
const size_t perc = (size_t)mi_option_get_clamp(mi_option_max_segment_reclaim, 0, 100);
if (perc <= 0) return 0;
const size_t total_count = _mi_arena_segment_abandoned_count();
if (total_count == 0) return 0;
const size_t relative_count = (total_count > 10000 ? (total_count / 100) * perc : (total_count * perc) / 100); // avoid overflow
long max_tries = (long)(relative_count <= 1 ? 1 : (relative_count > 1024 ? 1024 : relative_count));
if (max_tries < 8 && total_count > 8) { max_tries = 8; }
return max_tries;
}
static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t block_size, mi_page_kind_t page_kind, bool* reclaimed, mi_segments_tld_t* tld)
{
*reclaimed = false;
long max_tries = mi_segment_get_reclaim_tries();
if (max_tries <= 0) return NULL;
mi_segment_t* segment;
long max_tries = mi_option_get_clamp(mi_option_max_segment_reclaim, 8, 1024); // limit the work to bound allocation times
while ((max_tries-- > 0) && ((segment = mi_abandoned_pop()) != NULL)) {
mi_arena_field_cursor_t current; _mi_arena_field_cursor_init(heap, &current);
while ((max_tries-- > 0) && ((segment = _mi_arena_segment_clear_abandoned_next(&current)) != NULL))
{
segment->abandoned_visits++;
// todo: an arena exclusive heap will potentially visit many abandoned unsuitable segments
// and push them into the visited list and use many tries. Perhaps we can skip non-suitable ones in a better way?
// todo: should we respect numa affinity for abondoned reclaim? perhaps only for the first visit?
// todo: an arena exclusive heap will potentially visit many abandoned unsuitable segments and use many tries
// Perhaps we can skip non-suitable ones in a better way?
bool is_suitable = _mi_heap_memid_is_suitable(heap, segment->memid);
bool all_pages_free;
bool has_page = mi_segment_check_free(segment,block_size,&all_pages_free); // try to free up pages (due to concurrent frees)
@ -1112,9 +1024,9 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t block_size,
mi_segment_reclaim(segment, heap, 0, NULL, tld);
}
else {
// otherwise, push on the visited list so it gets not looked at too quickly again
// otherwise, mark it back as abandoned
// todo: reset delayed pages in the segment?
mi_abandoned_visited_push(segment);
_mi_arena_segment_mark_abandoned(segment);
}
}
return NULL;
@ -1128,7 +1040,7 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t block_size,
static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t block_size, mi_page_kind_t page_kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
{
mi_assert_internal(page_kind <= MI_PAGE_LARGE);
mi_assert_internal(block_size < MI_HUGE_BLOCK_SIZE);
mi_assert_internal(block_size <= MI_LARGE_OBJ_SIZE_MAX);
// 1. try to reclaim an abandoned segment
bool reclaimed;
@ -1154,7 +1066,7 @@ static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t block_s
static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_segments_tld_t* tld) {
mi_assert_internal(mi_segment_has_free(segment));
mi_assert_expensive(mi_segment_is_valid(segment, tld));
mi_assert_expensive(mi_segment_is_valid(segment, tld));
for (size_t i = 0; i < segment->capacity; i++) { // TODO: use a bitmap instead of search?
mi_page_t* page = &segment->pages[i];
if (!page->segment_in_use) {
@ -1197,7 +1109,7 @@ static mi_page_t* mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, mi_p
mi_assert_internal(page != NULL);
#if MI_DEBUG>=2 && !MI_TRACK_ENABLED // && !MI_TSAN
// verify it is committed
_mi_segment_page_start(_mi_page_segment(page), page, sizeof(void*), NULL, NULL)[0] = 0;
mi_segment_raw_page_start(_mi_page_segment(page), page, NULL)[0] = 0;
#endif
return page;
}
@ -1220,7 +1132,7 @@ static mi_page_t* mi_segment_large_page_alloc(mi_heap_t* heap, size_t block_size
mi_page_t* page = mi_segment_find_free(segment, tld);
mi_assert_internal(page != NULL);
#if MI_DEBUG>=2 && !MI_TRACK_ENABLED // && !MI_TSAN
_mi_segment_page_start(segment, page, sizeof(void*), NULL, NULL)[0] = 0;
mi_segment_raw_page_start(segment, page, NULL)[0] = 0;
#endif
return page;
}
@ -1236,15 +1148,16 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, size_t page_alignment,
#endif
mi_page_t* page = mi_segment_find_free(segment, tld);
mi_assert_internal(page != NULL);
mi_assert_internal(page->is_huge);
// for huge pages we initialize the xblock_size as we may
// for huge pages we initialize the block_size as we may
// overallocate to accommodate large alignments.
size_t psize;
uint8_t* start = _mi_segment_page_start(segment, page, 0, &psize, NULL);
page->xblock_size = MI_HUGE_BLOCK_SIZE;
uint8_t* start = mi_segment_raw_page_start(segment, page, &psize);
page->block_size = psize;
// reset the part of the page that will not be used; this can be quite large (close to MI_SEGMENT_SIZE)
if (page_alignment >= MI_ALIGN_HUGE && segment->allow_decommit && page->is_committed) {
if (page_alignment >= MI_BLOCK_ALIGNMENT_MAX && segment->allow_decommit && page->is_committed) {
uint8_t* aligned_p = (uint8_t*)_mi_align_up((uintptr_t)start, page_alignment);
mi_assert_internal(_mi_is_aligned(aligned_p, page_alignment));
mi_assert_internal(psize - (aligned_p - start) >= size);
@ -1272,7 +1185,7 @@ void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block
mi_block_set_next(page, block, page->free);
page->free = block;
page->used--;
page->is_zero = false;
page->is_zero_init = false;
mi_assert(page->used == 0);
mi_tld_t* tld = heap->tld;
mi_segments_track_size((long)segment->segment_size, &tld->segments);
@ -1294,7 +1207,7 @@ void _mi_segment_huge_page_reset(mi_segment_t* segment, mi_page_t* page, mi_bloc
mi_assert_internal(page->free == NULL);
if (segment->allow_decommit && page->is_committed) {
size_t usize = mi_usable_size(block);
if (usize > sizeof(mi_block_t)) {
if (usize > sizeof(mi_block_t)) {
usize = usize - sizeof(mi_block_t);
uint8_t* p = (uint8_t*)block + sizeof(mi_block_t);
_mi_os_reset(p, usize, &_mi_stats_main);
@ -1318,9 +1231,9 @@ mi_block_t* _mi_segment_huge_page_expand(mi_segment_t* segment, mi_page_t* page,
// adjust segment and page size
segment->segment_size = newssize;
size_t psize = 0;
_mi_segment_page_start(segment, page, 0, &psize, NULL);
_mi_segment_page_start(segment, page, &psize);
mi_assert_internal(psize >= newsize);
page->xblock_size = MI_HUGE_BLOCK_SIZE;
page->block_size = psize;
return block;
}
@ -1367,9 +1280,9 @@ mi_block_t* _mi_segment_huge_page_remap(mi_segment_t* segment, mi_page_t* page,
mi_page_t* newpage = _mi_ptr_page(newblock);
size_t psize = 0;
_mi_segment_page_start(newsegment, newpage, 0, &psize, NULL);
_mi_segment_page_start(newsegment, newpage, &psize);
mi_assert_internal(psize >= newsize);
newpage->xblock_size = MI_HUGE_BLOCK_SIZE;
newpage->block_size = psize;
mi_assert_internal(mi_page_block_size(newpage) >= newsize);
_mi_heap_huge_page_attach(heap, newpage);
@ -1383,8 +1296,8 @@ mi_block_t* _mi_segment_huge_page_remap(mi_segment_t* segment, mi_page_t* page,
mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) {
mi_page_t* page;
if mi_unlikely(page_alignment > 0) {
mi_assert_internal(page_alignment <= MI_ALIGN_REMAP || (_mi_is_power_of_two(page_alignment) && page_alignment >= MI_ALIGN_HUGE));
if (page_alignment >= MI_ALIGN_HUGE && page_alignment < MI_SEGMENT_SIZE) { page_alignment = MI_SEGMENT_SIZE; }
mi_assert_internal(page_alignment <= MI_ALIGN_REMAP || (_mi_is_power_of_two(page_alignment) && page_alignment >= MI_BLOCK_ALIGNMENT_MAX));
if (page_alignment >= MI_BLOCK_ALIGNMENT_MAX && page_alignment < MI_SEGMENT_SIZE) { page_alignment = MI_SEGMENT_SIZE; }
page = mi_segment_huge_page_alloc(block_size, page_alignment, heap->arena_id, tld, os_tld);
}
else if (block_size <= MI_SMALL_OBJ_SIZE_MAX) {
@ -1397,7 +1310,8 @@ mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t pag
page = mi_segment_large_page_alloc(heap, block_size, tld, os_tld);
}
else {
page = mi_segment_huge_page_alloc(block_size, page_alignment, heap->arena_id, tld, os_tld);
mi_assert_internal(page_alignment==0);
page = mi_segment_huge_page_alloc(block_size, 0 /*page_alignment*/, heap->arena_id, tld, os_tld);
}
mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld));
mi_assert_internal(page == NULL || (mi_segment_page_size(_mi_page_segment(page)) - (MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= block_size);

View file

@ -117,8 +117,7 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) {
mi_stat_counter_add(&stats->page_no_retire, &src->page_no_retire, 1);
mi_stat_counter_add(&stats->searches, &src->searches, 1);
mi_stat_counter_add(&stats->normal_count, &src->normal_count, 1);
mi_stat_counter_add(&stats->huge_count, &src->huge_count, 1);
mi_stat_counter_add(&stats->giant_count, &src->giant_count, 1);
mi_stat_counter_add(&stats->huge_count, &src->huge_count, 1);
#if MI_STAT>1
for (size_t i = 0; i <= MI_BIN_HUGE; i++) {
if (src->normal_bins[i].allocated > 0 || src->normal_bins[i].freed > 0) {
@ -175,13 +174,28 @@ static void mi_print_count(int64_t n, int64_t unit, mi_output_fun* out, void* ar
static void mi_stat_print_ex(const mi_stat_count_t* stat, const char* msg, int64_t unit, mi_output_fun* out, void* arg, const char* notok ) {
_mi_fprintf(out, arg,"%10s:", msg);
if (unit > 0) {
mi_print_amount(stat->peak, unit, out, arg);
mi_print_amount(stat->allocated, unit, out, arg);
mi_print_amount(stat->freed, unit, out, arg);
mi_print_amount(stat->current, unit, out, arg);
mi_print_amount(unit, 1, out, arg);
mi_print_count(stat->allocated, unit, out, arg);
if (unit != 0) {
if (unit > 0) {
mi_print_amount(stat->peak, unit, out, arg);
mi_print_amount(stat->allocated, unit, out, arg);
mi_print_amount(stat->freed, unit, out, arg);
mi_print_amount(stat->current, unit, out, arg);
mi_print_amount(unit, 1, out, arg);
mi_print_count(stat->allocated, unit, out, arg);
}
else {
mi_print_amount(stat->peak, -1, out, arg);
mi_print_amount(stat->allocated, -1, out, arg);
mi_print_amount(stat->freed, -1, out, arg);
mi_print_amount(stat->current, -1, out, arg);
if (unit == -1) {
_mi_fprintf(out, arg, "%24s", "");
}
else {
mi_print_amount(-unit, 1, out, arg);
mi_print_count((stat->allocated / -unit), 0, out, arg);
}
}
if (stat->allocated > stat->freed) {
_mi_fprintf(out, arg, " ");
_mi_fprintf(out, arg, (notok == NULL ? "not all freed" : notok));
@ -191,23 +205,6 @@ static void mi_stat_print_ex(const mi_stat_count_t* stat, const char* msg, int64
_mi_fprintf(out, arg, " ok\n");
}
}
else if (unit<0) {
mi_print_amount(stat->peak, -1, out, arg);
mi_print_amount(stat->allocated, -1, out, arg);
mi_print_amount(stat->freed, -1, out, arg);
mi_print_amount(stat->current, -1, out, arg);
if (unit==-1) {
_mi_fprintf(out, arg, "%24s", "");
}
else {
mi_print_amount(-unit, 1, out, arg);
mi_print_count((stat->allocated / -unit), 0, out, arg);
}
if (stat->allocated > stat->freed)
_mi_fprintf(out, arg, " not all freed!\n");
else
_mi_fprintf(out, arg, " ok\n");
}
else {
mi_print_amount(stat->peak, 1, out, arg);
mi_print_amount(stat->allocated, 1, out, arg);
@ -316,12 +313,10 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0)
#endif
#if MI_STAT
mi_stat_print(&stats->normal, "normal", (stats->normal_count.count == 0 ? 1 : -(stats->normal.allocated / stats->normal_count.count)), out, arg);
mi_stat_print(&stats->huge, "huge", (stats->huge_count.count == 0 ? 1 : -(stats->huge.allocated / stats->huge_count.count)), out, arg);
mi_stat_print(&stats->giant, "giant", (stats->giant_count.count == 0 ? 1 : -(stats->giant.allocated / stats->giant_count.count)), out, arg);
mi_stat_print(&stats->huge, "huge", (stats->huge_count.count == 0 ? 1 : -(stats->huge.allocated / stats->huge_count.count)), out, arg);
mi_stat_count_t total = { 0,0,0,0 };
mi_stat_add(&total, &stats->normal, 1);
mi_stat_add(&total, &stats->huge, 1);
mi_stat_add(&total, &stats->giant, 1);
mi_stat_print(&total, "total", 1, out, arg);
#endif
#if MI_STAT>1
@ -340,6 +335,9 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0)
mi_stat_print(&stats->pages_abandoned, "-abandoned", -1, out, arg);
mi_stat_counter_print(&stats->pages_extended, "-extended", out, arg);
mi_stat_counter_print(&stats->page_no_retire, "-noretire", out, arg);
mi_stat_counter_print(&stats->arena_count, "arenas", out, arg);
mi_stat_counter_print(&stats->arena_crossover_count, "-crossover", out, arg);
mi_stat_counter_print(&stats->arena_rollback_count, "-rollback", out, arg);
mi_stat_counter_print(&stats->mmap_calls, "mmaps", out, arg);
mi_stat_counter_print(&stats->commit_calls, "commits", out, arg);
mi_stat_counter_print(&stats->reset_calls, "resets", out, arg);

View file

@ -100,7 +100,7 @@ static void various_tests() {
auto tbuf = new unsigned char[sizeof(Test)];
t = new (tbuf) Test(42);
t->~Test();
delete tbuf;
delete[] tbuf;
}
class Static {

View file

@ -35,7 +35,7 @@ we therefore test the API over various inputs. Please add more tests :-)
#include "mimalloc.h"
// #include "mimalloc/internal.h"
#include "mimalloc/types.h" // for MI_DEBUG and MI_ALIGNMENT_MAX
#include "mimalloc/types.h" // for MI_DEBUG and MI_BLOCK_ALIGNMENT_MAX
#include "testhelper.h"
@ -47,6 +47,11 @@ bool test_heap2(void);
bool test_stl_allocator1(void);
bool test_stl_allocator2(void);
bool test_stl_heap_allocator1(void);
bool test_stl_heap_allocator2(void);
bool test_stl_heap_allocator3(void);
bool test_stl_heap_allocator4(void);
bool mem_is_zero(uint8_t* p, size_t size) {
if (p==NULL) return false;
for (size_t i = 0; i < size; ++i) {
@ -156,7 +161,7 @@ int main(void) {
};
CHECK_BODY("malloc-aligned6") {
bool ok = true;
for (size_t align = 1; align <= MI_ALIGN_HUGE && ok; align *= 2) {
for (size_t align = 1; align <= MI_BLOCK_ALIGNMENT_MAX && ok; align *= 2) {
void* ps[8];
for (int i = 0; i < 8 && ok; i++) {
ps[i] = mi_malloc_aligned(align*13 // size
@ -172,16 +177,16 @@ int main(void) {
result = ok;
};
CHECK_BODY("malloc-aligned7") {
void* p = mi_malloc_aligned(1024,MI_ALIGN_HUGE);
void* p = mi_malloc_aligned(1024,MI_BLOCK_ALIGNMENT_MAX);
mi_free(p);
result = ((uintptr_t)p % MI_ALIGN_HUGE) == 0;
result = ((uintptr_t)p % MI_BLOCK_ALIGNMENT_MAX) == 0;
};
CHECK_BODY("malloc-aligned8") {
bool ok = true;
for (int i = 0; i < 5 && ok; i++) {
int n = (1 << i);
void* p = mi_malloc_aligned(1024, n * MI_ALIGN_HUGE);
ok = ((uintptr_t)p % (n*MI_ALIGN_HUGE)) == 0;
void* p = mi_malloc_aligned(1024, n * MI_BLOCK_ALIGNMENT_MAX);
ok = ((uintptr_t)p % (n*MI_BLOCK_ALIGNMENT_MAX)) == 0;
mi_free(p);
}
result = ok;
@ -189,7 +194,7 @@ int main(void) {
CHECK_BODY("malloc-aligned9") {
bool ok = true;
void* p[8];
size_t sizes[8] = { 8, 512, 1024 * 1024, MI_ALIGN_HUGE, MI_ALIGN_HUGE + 1, 2 * MI_ALIGN_HUGE, 8 * MI_ALIGN_HUGE, 0 };
size_t sizes[8] = { 8, 512, 1024 * 1024, MI_BLOCK_ALIGNMENT_MAX, MI_BLOCK_ALIGNMENT_MAX + 1, 2 * MI_BLOCK_ALIGNMENT_MAX, 8 * MI_BLOCK_ALIGNMENT_MAX, 0 };
for (int i = 0; i < 28 && ok; i++) {
int align = (1 << i);
for (int j = 0; j < 8 && ok; j++) {
@ -227,6 +232,28 @@ int main(void) {
result = (((uintptr_t)p % 0x100) == 0); // #602
mi_free(p);
}
CHECK_BODY("mimalloc-aligned13") {
bool ok = true;
for( size_t size = 1; size <= (MI_SMALL_SIZE_MAX * 2) && ok; size++ ) {
for(size_t align = 1; align <= size && ok; align *= 2 ) {
void* p[10];
for(int i = 0; i < 10 && ok; i++) {
p[i] = mi_malloc_aligned(size,align);;
ok = (p[i] != NULL && ((uintptr_t)(p[i]) % align) == 0);
}
for(int i = 0; i < 10 && ok; i++) {
mi_free(p[i]);
}
/*
if (ok && align <= size && ((size + MI_PADDING_SIZE) & (align-1)) == 0) {
size_t bsize = mi_good_size(size);
ok = (align <= bsize && (bsize & (align-1)) == 0);
}
*/
}
}
result = ok;
}
CHECK_BODY("malloc-aligned-at1") {
void* p = mi_malloc_aligned_at(48,32,0); result = (p != NULL && ((uintptr_t)(p) + 0) % 32 == 0); mi_free(p);
};
@ -306,15 +333,22 @@ int main(void) {
// ---------------------------------------------------
// various
// ---------------------------------------------------
#if !defined(MI_TRACK_ASAN) // realpath may leak with ASAN enabled (as the ASAN allocator intercepts it)
CHECK_BODY("realpath") {
char* s = mi_realpath( ".", NULL );
// printf("realpath: %s\n",s);
mi_free(s);
};
#endif
CHECK("stl_allocator1", test_stl_allocator1());
CHECK("stl_allocator2", test_stl_allocator2());
CHECK("stl_heap_allocator1", test_stl_heap_allocator1());
CHECK("stl_heap_allocator2", test_stl_heap_allocator2());
CHECK("stl_heap_allocator3", test_stl_heap_allocator3());
CHECK("stl_heap_allocator4", test_stl_heap_allocator4());
// ---------------------------------------------------
// Done
// ---------------------------------------------------[]
@ -368,3 +402,61 @@ bool test_stl_allocator2(void) {
return true;
#endif
}
bool test_stl_heap_allocator1(void) {
#ifdef __cplusplus
std::vector<some_struct, mi_heap_stl_allocator<some_struct> > vec;
vec.push_back(some_struct());
vec.pop_back();
return vec.size() == 0;
#else
return true;
#endif
}
bool test_stl_heap_allocator2(void) {
#ifdef __cplusplus
std::vector<some_struct, mi_heap_destroy_stl_allocator<some_struct> > vec;
vec.push_back(some_struct());
vec.pop_back();
return vec.size() == 0;
#else
return true;
#endif
}
bool test_stl_heap_allocator3(void) {
#ifdef __cplusplus
mi_heap_t* heap = mi_heap_new();
bool good = false;
{
mi_heap_stl_allocator<some_struct> myAlloc(heap);
std::vector<some_struct, mi_heap_stl_allocator<some_struct> > vec(myAlloc);
vec.push_back(some_struct());
vec.pop_back();
good = vec.size() == 0;
}
mi_heap_delete(heap);
return good;
#else
return true;
#endif
}
bool test_stl_heap_allocator4(void) {
#ifdef __cplusplus
mi_heap_t* heap = mi_heap_new();
bool good = false;
{
mi_heap_destroy_stl_allocator<some_struct> myAlloc(heap);
std::vector<some_struct, mi_heap_destroy_stl_allocator<some_struct> > vec(myAlloc);
vec.push_back(some_struct());
vec.pop_back();
good = vec.size() == 0;
}
mi_heap_destroy(heap);
return good;
#else
return true;
#endif
}

View file

@ -37,11 +37,12 @@ static int ITER = 50; // N full iterations destructing and re-creating a
// static int THREADS = 8; // more repeatable if THREADS <= #processors
// static int SCALE = 100; // scaling factor
#define STRESS // undefine for leak test
#define STRESS // undefine for leak test
static bool allow_large_objects = true; // allow very large objects? (set to `true` if SCALE>100)
static size_t use_one_size = 0; // use single object size of `N * sizeof(uintptr_t)`?
static bool main_participates = false; // main thread participates as a worker too
// #define USE_STD_MALLOC
#ifdef USE_STD_MALLOC
@ -196,10 +197,13 @@ static void test_stress(void) {
free_items(p);
}
}
// mi_collect(false);
#if !defined(NDEBUG) || defined(MI_TSAN)
#ifndef NDEBUG
//mi_collect(false);
//mi_debug_show_arenas();
#endif
#if !defined(NDEBUG) || defined(MI_TSAN)
if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); }
#endif
#endif
}
}
@ -267,7 +271,8 @@ int main(int argc, char** argv) {
#ifndef USE_STD_MALLOC
#ifndef NDEBUG
mi_collect(true);
// mi_collect(true);
mi_debug_show_arenas(true,true,true);
#endif
mi_stats_print(NULL);
#endif
@ -291,13 +296,15 @@ static void run_os_threads(size_t nthreads, void (*fun)(intptr_t)) {
thread_entry_fun = fun;
DWORD* tids = (DWORD*)custom_calloc(nthreads,sizeof(DWORD));
HANDLE* thandles = (HANDLE*)custom_calloc(nthreads,sizeof(HANDLE));
for (uintptr_t i = 0; i < nthreads; i++) {
const size_t start = (main_participates ? 1 : 0);
for (size_t i = start; i < nthreads; i++) {
thandles[i] = CreateThread(0, 8*1024, &thread_entry, (void*)(i), 0, &tids[i]);
}
for (size_t i = 0; i < nthreads; i++) {
if (main_participates) fun(0); // run the main thread as well
for (size_t i = start; i < nthreads; i++) {
WaitForSingleObject(thandles[i], INFINITE);
}
for (size_t i = 0; i < nthreads; i++) {
for (size_t i = start; i < nthreads; i++) {
CloseHandle(thandles[i]);
}
custom_free(tids);
@ -324,11 +331,13 @@ static void run_os_threads(size_t nthreads, void (*fun)(intptr_t)) {
thread_entry_fun = fun;
pthread_t* threads = (pthread_t*)custom_calloc(nthreads,sizeof(pthread_t));
memset(threads, 0, sizeof(pthread_t) * nthreads);
const size_t start = (main_participates ? 1 : 0);
//pthread_setconcurrency(nthreads);
for (size_t i = 0; i < nthreads; i++) {
for (size_t i = start; i < nthreads; i++) {
pthread_create(&threads[i], NULL, &thread_entry, (void*)i);
}
for (size_t i = 0; i < nthreads; i++) {
if (main_participates) fun(0); // run the main thread as well
for (size_t i = start; i < nthreads; i++) {
pthread_join(threads[i], NULL);
}
custom_free(threads);