diff --git a/CMakeLists.txt b/CMakeLists.txt index 5ce084f6..0d780fa1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -126,7 +126,7 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|x64|amd64|AMD64)$" OR CMAKE_GENE set(MI_ARCH "x64") elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64|armv[89].?|ARM64)$" OR CMAKE_GENERATOR_PLATFORM STREQUAL "ARM64" OR "arm64" IN_LIST CMAKE_OSX_ARCHITECTURES) set(MI_ARCH "arm64") -elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm|armv[34567].?|ARM)$") +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm|armv[34567]|ARM)$") set(MI_ARCH "arm32") elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(riscv|riscv32|riscv64)$") if(CMAKE_SIZEOF_VOID_P==4) @@ -173,8 +173,7 @@ if(CMAKE_C_COMPILER_ID MATCHES "Intel") list(APPEND mi_cflags -Wall) endif() -# force C++ compilation with msvc or clang-cl to use modern C++ atomics -if(CMAKE_C_COMPILER_ID MATCHES "MSVC|Intel" OR MI_CLANG_CL) +if(CMAKE_C_COMPILER_ID MATCHES "MSVC|Intel") set(MI_USE_CXX "ON") endif() @@ -435,7 +434,7 @@ endif() if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU|Intel" AND NOT CMAKE_SYSTEM_NAME MATCHES "Haiku") if(MI_OPT_ARCH) - if(APPLE AND CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang" AND CMAKE_OSX_ARCHITECTURES) # to support multi-arch binaries (#999) + if(APPLE AND CMAKE_C_COMPILER_ID STREQUAL "AppleClang" AND CMAKE_OSX_ARCHITECTURES) # to support multi-arch binaries (#999) if("arm64" IN_LIST CMAKE_OSX_ARCHITECTURES) list(APPEND MI_OPT_ARCH_FLAGS "-Xarch_arm64;-march=armv8.1-a") endif() @@ -533,9 +532,7 @@ if(MI_TRACK_ASAN) endif() string(TOLOWER "${CMAKE_BUILD_TYPE}" CMAKE_BUILD_TYPE_LC) list(APPEND mi_defines "MI_CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE_LC}") #todo: multi-config project needs $ ? -if(CMAKE_BUILD_TYPE_LC MATCHES "^(release|relwithdebinfo|minsizerel|none)$") - list(APPEND mi_defines MI_BUILD_RELEASE) -else() +if(NOT(CMAKE_BUILD_TYPE_LC MATCHES "^(release|relwithdebinfo|minsizerel|none)$")) set(mi_libname "${mi_libname}-${CMAKE_BUILD_TYPE_LC}") #append build type (e.g. -debug) if not a release version endif() @@ -585,7 +582,7 @@ if(MI_BUILD_SHARED) install(TARGETS mimalloc EXPORT mimalloc ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}) install(EXPORT mimalloc DESTINATION ${mi_install_cmakedir}) - if(WIN32 AND NOT MINGW) + if(WIN32) # On windows, the import library name for the dll would clash with the static mimalloc.lib library # so we postfix the dll import library with `.dll.lib` (and also the .pdb debug file) set_property(TARGET mimalloc PROPERTY ARCHIVE_OUTPUT_NAME "${mi_libname}.dll" ) @@ -595,9 +592,6 @@ if(MI_BUILD_SHARED) # install(FILES "$/${mi_libname}.dll.pdb" DESTINATION ${CMAKE_INSTALL_LIBDIR}) endif() if(WIN32 AND MI_WIN_REDIRECT) - if(MINGW) - set_property(TARGET mimalloc PROPERTY PREFIX "") - endif() # On windows, link and copy the mimalloc redirection dll too. if(CMAKE_GENERATOR_PLATFORM STREQUAL "arm64ec") set(MIMALLOC_REDIRECT_SUFFIX "-arm64ec") @@ -713,12 +707,10 @@ if (MI_BUILD_TESTS) target_compile_definitions(mimalloc-test-${TEST_NAME} PRIVATE ${mi_defines}) target_compile_options(mimalloc-test-${TEST_NAME} PRIVATE ${mi_cflags}) target_include_directories(mimalloc-test-${TEST_NAME} PRIVATE include) - if(MI_BUILD_STATIC AND NOT MI_DEBUG_TSAN) - target_link_libraries(mimalloc-test-${TEST_NAME} PRIVATE mimalloc-static ${mi_libraries}) - elseif(MI_BUILD_SHARED) + if(MI_BUILD_SHARED AND (MI_TRACK_ASAN OR MI_DEBUG_TSAN OR MI_DEBUG_UBSAN)) target_link_libraries(mimalloc-test-${TEST_NAME} PRIVATE mimalloc ${mi_libraries}) else() - message(STATUS "cannot build TSAN tests without MI_BUILD_SHARED being enabled") + target_link_libraries(mimalloc-test-${TEST_NAME} PRIVATE mimalloc-static ${mi_libraries}) endif() add_test(NAME test-${TEST_NAME} COMMAND mimalloc-test-${TEST_NAME}) endforeach() @@ -727,19 +719,21 @@ if (MI_BUILD_TESTS) if(MI_BUILD_SHARED AND NOT (MI_TRACK_ASAN OR MI_DEBUG_TSAN OR MI_DEBUG_UBSAN)) add_executable(mimalloc-test-stress-dynamic test/test-stress.c) target_compile_definitions(mimalloc-test-stress-dynamic PRIVATE ${mi_defines} "USE_STD_MALLOC=1") + if(WIN32) + target_compile_definitions(mimalloc-test-stress-dynamic PRIVATE "MI_LINK_VERSION=1") + endif() target_compile_options(mimalloc-test-stress-dynamic PRIVATE ${mi_cflags}) target_include_directories(mimalloc-test-stress-dynamic PRIVATE include) + target_link_libraries(mimalloc-test-stress-dynamic PRIVATE mimalloc ${mi_libraries}) # mi_version if(WIN32) - target_compile_definitions(mimalloc-test-stress-dynamic PRIVATE "MI_LINK_VERSION=1") # link mi_version - target_link_libraries(mimalloc-test-stress-dynamic PRIVATE mimalloc ${mi_libraries}) # link mi_version - add_test(NAME test-stress-dynamic COMMAND ${CMAKE_COMMAND} -E env MIMALLOC_VERBOSE=1 $) + add_test(NAME test-stress-dynamic COMMAND ${CMAKE_COMMAND} -E env MIMALLOC_SHOW_STATS=1 $) else() if(APPLE) set(LD_PRELOAD "DYLD_INSERT_LIBRARIES") else() set(LD_PRELOAD "LD_PRELOAD") endif() - add_test(NAME test-stress-dynamic COMMAND ${CMAKE_COMMAND} -E env MIMALLOC_VERBOSE=1 ${LD_PRELOAD}=$ $) + add_test(NAME test-stress-dynamic COMMAND ${CMAKE_COMMAND} -E env MIMALLOC_SHOW_STATS=1 ${LD_PRELOAD}=$ $) endif() endif() endif() diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 83d6a482..a803cd15 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -6,8 +6,10 @@ trigger: branches: include: - - main - - dev* + - master + - dev + - dev2 + - dev3 tags: include: - v* @@ -32,22 +34,6 @@ jobs: BuildType: secure cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON MSBuildConfiguration: Release - Debug x86: - BuildType: debug - cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON -A Win32 - MSBuildConfiguration: Debug - Release x86: - BuildType: release - cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -A Win32 - MSBuildConfiguration: Release - Debug Fixed TLS: - BuildType: debug - cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON -DMI_WIN_USE_FIXED_TLS=ON - MSBuildConfiguration: Debug - Release Fixed TLS: - BuildType: release - cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_WIN_USE_FIXED_TLS=ON - MSBuildConfiguration: Release steps: - task: CMake@1 inputs: @@ -175,7 +161,6 @@ jobs: - script: ctest --verbose --timeout 240 workingDirectory: $(BuildType) displayName: CTest - # - upload: $(Build.SourcesDirectory)/$(BuildType) # artifact: mimalloc-macos-$(BuildType) @@ -183,6 +168,35 @@ jobs: # Other OS versions (just debug mode) # ---------------------------------------------------------- +- job: + displayName: Windows 2019 + pool: + vmImage: + windows-2019 + strategy: + matrix: + Debug: + BuildType: debug + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON + MSBuildConfiguration: Debug + Release: + BuildType: release + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release + MSBuildConfiguration: Release + steps: + - task: CMake@1 + inputs: + workingDirectory: $(BuildType) + cmakeArgs: .. $(cmakeExtraArgs) + - task: MSBuild@1 + inputs: + solution: $(BuildType)/libmimalloc.sln + configuration: '$(MSBuildConfiguration)' + msbuildArguments: -m + - script: ctest --verbose --timeout 240 -C $(MSBuildConfiguration) + workingDirectory: $(BuildType) + displayName: CTest + - job: displayName: Ubuntu 24.04 pool: diff --git a/bin/mimalloc-redirect-arm64.dll b/bin/mimalloc-redirect-arm64.dll old mode 100755 new mode 100644 index 27172d2c..e6360285 Binary files a/bin/mimalloc-redirect-arm64.dll and b/bin/mimalloc-redirect-arm64.dll differ diff --git a/bin/mimalloc-redirect-arm64.lib b/bin/mimalloc-redirect-arm64.lib old mode 100755 new mode 100644 index dca80b9b..11d71ef9 Binary files a/bin/mimalloc-redirect-arm64.lib and b/bin/mimalloc-redirect-arm64.lib differ diff --git a/bin/mimalloc-redirect-arm64ec.dll b/bin/mimalloc-redirect-arm64ec.dll old mode 100755 new mode 100644 index a228af39..f5ee4e47 Binary files a/bin/mimalloc-redirect-arm64ec.dll and b/bin/mimalloc-redirect-arm64ec.dll differ diff --git a/bin/mimalloc-redirect-arm64ec.lib b/bin/mimalloc-redirect-arm64ec.lib old mode 100755 new mode 100644 index 0ce77436..b88e8fc1 Binary files a/bin/mimalloc-redirect-arm64ec.lib and b/bin/mimalloc-redirect-arm64ec.lib differ diff --git a/bin/mimalloc-redirect.dll b/bin/mimalloc-redirect.dll old mode 100755 new mode 100644 index ec035f1b..9e015cc6 Binary files a/bin/mimalloc-redirect.dll and b/bin/mimalloc-redirect.dll differ diff --git a/bin/mimalloc-redirect.lib b/bin/mimalloc-redirect.lib old mode 100755 new mode 100644 index 785fa475..1d710c01 Binary files a/bin/mimalloc-redirect.lib and b/bin/mimalloc-redirect.lib differ diff --git a/bin/mimalloc-redirect32.dll b/bin/mimalloc-redirect32.dll old mode 100755 new mode 100644 index 92578f24..32799ffe Binary files a/bin/mimalloc-redirect32.dll and b/bin/mimalloc-redirect32.dll differ diff --git a/bin/mimalloc-redirect32.lib b/bin/mimalloc-redirect32.lib old mode 100755 new mode 100644 index bf649787..e2927250 Binary files a/bin/mimalloc-redirect32.lib and b/bin/mimalloc-redirect32.lib differ diff --git a/cmake/mimalloc-config-version.cmake b/cmake/mimalloc-config-version.cmake index dfe78468..7f3bd631 100644 --- a/cmake/mimalloc-config-version.cmake +++ b/cmake/mimalloc-config-version.cmake @@ -1,6 +1,6 @@ -set(mi_version_major 2) -set(mi_version_minor 2) -set(mi_version_patch 5) +set(mi_version_major 1) +set(mi_version_minor 9) +set(mi_version_patch 2) set(mi_version ${mi_version_major}.${mi_version_minor}) set(PACKAGE_VERSION ${mi_version}) diff --git a/contrib/docker/alpine-arm32v7/Dockerfile b/contrib/docker/alpine-arm32v7/Dockerfile index daa60f50..f74934fb 100644 --- a/contrib/docker/alpine-arm32v7/Dockerfile +++ b/contrib/docker/alpine-arm32v7/Dockerfile @@ -1,6 +1,6 @@ # install from an image # download first an appropriate tar.gz image into the current directory -# from +# from: FROM scratch # Substitute the image name that was downloaded diff --git a/contrib/docker/alpine-x86/Dockerfile b/contrib/docker/alpine-x86/Dockerfile deleted file mode 100644 index a0f76c17..00000000 --- a/contrib/docker/alpine-x86/Dockerfile +++ /dev/null @@ -1,28 +0,0 @@ -# install from an image -# download first an appropriate tar.gz image into the current directory -# from -FROM scratch - -# Substitute the image name that was downloaded -ADD alpine-minirootfs-20250108-x86.tar.gz / - -# Install tools -RUN apk add build-base make cmake -RUN apk add git -RUN apk add vim - -RUN mkdir -p /home/dev -WORKDIR /home/dev - -# Get mimalloc -RUN git clone https://github.com/microsoft/mimalloc -b dev2 -RUN mkdir -p mimalloc/out/release -RUN mkdir -p mimalloc/out/debug - -# Build mimalloc debug -WORKDIR /home/dev/mimalloc/out/debug -RUN cmake ../.. -DMI_DEBUG_FULL=ON -# RUN make -j -# RUN make test - -CMD ["/bin/sh"] diff --git a/contrib/vcpkg/portfile.cmake b/contrib/vcpkg/portfile.cmake index abb90af9..69661526 100644 --- a/contrib/vcpkg/portfile.cmake +++ b/contrib/vcpkg/portfile.cmake @@ -4,12 +4,12 @@ vcpkg_from_github( HEAD_REF master # The "REF" can be a commit hash, branch name (dev2), or a version (v2.2.1). - REF "v${VERSION}" - # REF e2db21e9ba9fb9172b7b0aa0fe9b8742525e8774 + # REF "v${VERSION}" + REF 866ce5b89db1dbc3e66bbf89041291fd16329518 # The sha512 is the hash of the tar.gz bundle. - # (To get the sha512, run `vcpkg install "mimalloc[override]" --overlay-ports=./contrib/vcpkg` and copy the sha from the error message.) - SHA512 5218fcd3ad285687ed3f78b4651d7d3aee92b6f28e6c563a884975e654a43c94c4e5c02c5ed0322c3d3627d83d4843df2d2d8441f09aa18d00674ca9fd657345 + # (To get the sha512, run `vcpkg install mimalloc[override] --overlay-ports=` and copy the sha from the error message.) + SHA512 0b0e5ff823c49b9534b8c32800679806c5d7c29020af058da043c3e6e36ae3c32a1cdd5a21ece97dd60bc7dd4703967f683beac435dbb8514638a6cc55e5dea8 ) vcpkg_check_features(OUT_FEATURE_OPTIONS FEATURE_OPTIONS @@ -19,7 +19,6 @@ vcpkg_check_features(OUT_FEATURE_OPTIONS FEATURE_OPTIONS secure MI_SECURE override MI_OVERRIDE optarch MI_OPT_ARCH - nooptarch MI_NO_OPT_ARCH optsimd MI_OPT_SIMD xmalloc MI_XMALLOC asm MI_SEE_ASM diff --git a/contrib/vcpkg/vcpkg.json b/contrib/vcpkg/vcpkg.json index 42f2aa35..45f8097b 100644 --- a/contrib/vcpkg/vcpkg.json +++ b/contrib/vcpkg/vcpkg.json @@ -1,7 +1,7 @@ { "name": "mimalloc", - "version": "2.2.4", - "port-version": 1, + "version": "1.9.2", + "port-version": 2, "description": "Compact general purpose allocator with excellent performance", "homepage": "https://github.com/microsoft/mimalloc", "license": "MIT", @@ -35,9 +35,6 @@ "optarch": { "description": "Use architecture specific optimizations (on x64: '-march=haswell;-mavx2', on arm64: '-march=armv8.1-a')" }, - "nooptarch": { - "description": "Do _not_ use architecture specific optimizations (on x64: '-march=haswell;-mavx2', on arm64: '-march=armv8.1-a')" - }, "optsimd": { "description": "Allow use of SIMD instructions (avx2 or neon) (requires 'optarch' to be enabled)" }, diff --git a/ide/vs2022/mimalloc-test-stress.vcxproj b/ide/vs2022/mimalloc-test-stress.vcxproj index 128a4ff6..d6af71ce 100644 --- a/ide/vs2022/mimalloc-test-stress.vcxproj +++ b/ide/vs2022/mimalloc-test-stress.vcxproj @@ -282,8 +282,8 @@ - - {abb5eae7-b3e6-432e-b636-333449892ea7} + + {abb5eae7-b3e6-432e-b636-333449892ea6} diff --git a/include/mimalloc.h b/include/mimalloc.h index d895d925..4e9c3156 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2018-2025, Microsoft Research, Daan Leijen +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. @@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MIMALLOC_H #define MIMALLOC_H -#define MI_MALLOC_VERSION 225 // major + 2 digits minor +#define MI_MALLOC_VERSION 192 // major + 2 digits minor // ------------------------------------------------------ // Compiler specific attributes @@ -97,7 +97,6 @@ terms of the MIT license. A copy of the license can be found in the file #include // size_t #include // bool -#include // INTPTR_MAX #ifdef __cplusplus extern "C" { @@ -154,21 +153,17 @@ mi_decl_export void mi_stats_reset(void) mi_attr_noexcept; mi_decl_export void mi_stats_merge(void) mi_attr_noexcept; mi_decl_export void mi_stats_print(void* out) mi_attr_noexcept; // backward compatibility: `out` is ignored and should be NULL mi_decl_export void mi_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept; -mi_decl_export void mi_thread_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept; mi_decl_export void mi_options_print(void) mi_attr_noexcept; +mi_decl_export void mi_process_init(void) mi_attr_noexcept; +mi_decl_export void mi_thread_init(void) mi_attr_noexcept; +mi_decl_export void mi_thread_done(void) mi_attr_noexcept; +mi_decl_export void mi_thread_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept; + mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) mi_attr_noexcept; - -// Generally do not use the following as these are usually called automatically -mi_decl_export void mi_process_init(void) mi_attr_noexcept; -mi_decl_export void mi_cdecl mi_process_done(void) mi_attr_noexcept; -mi_decl_export void mi_thread_init(void) mi_attr_noexcept; -mi_decl_export void mi_thread_done(void) mi_attr_noexcept; - - // ------------------------------------------------------------------------------------- // Aligned allocation // Note that `alignment` always follows `size` for consistency with unaligned diff --git a/include/mimalloc/atomic.h b/include/mimalloc/atomic.h index e8bac316..6eaa6f99 100644 --- a/include/mimalloc/atomic.h +++ b/include/mimalloc/atomic.h @@ -111,7 +111,6 @@ static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub); #define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release(p,exp,(tp*)des) #define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel(p,exp,(tp*)des) #define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release(p,exp,(tp*)des) -#define mi_atomic_cas_ptr_strong_acq_rel(tp,p,exp,des) mi_atomic_cas_strong_acq_rel(p,exp,(tp*)des) #define mi_atomic_exchange_ptr_relaxed(tp,p,x) mi_atomic_exchange_relaxed(p,(tp*)x) #define mi_atomic_exchange_ptr_release(tp,p,x) mi_atomic_exchange_release(p,(tp*)x) #define mi_atomic_exchange_ptr_acq_rel(tp,p,x) mi_atomic_exchange_acq_rel(p,(tp*)x) @@ -121,7 +120,6 @@ static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub); #define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release(p,exp,des) #define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel(p,exp,des) #define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release(p,exp,des) -#define mi_atomic_cas_ptr_strong_acq_rel(tp,p,exp,des) mi_atomic_cas_strong_acq_rel(p,exp,des) #define mi_atomic_exchange_ptr_relaxed(tp,p,x) mi_atomic_exchange_relaxed(p,x) #define mi_atomic_exchange_ptr_release(tp,p,x) mi_atomic_exchange_release(p,x) #define mi_atomic_exchange_ptr_acq_rel(tp,p,x) mi_atomic_exchange_acq_rel(p,x) @@ -268,13 +266,6 @@ static inline int64_t mi_atomic_addi64_relaxed(volatile _Atomic(int64_t)*p, int6 return current; #endif } -static inline void mi_atomic_void_addi64_relaxed(volatile int64_t* p, const volatile int64_t* padd) { - const int64_t add = *padd; - if (add != 0) { - mi_atomic_addi64_relaxed((volatile _Atomic(int64_t)*)p, add); - } -} - static inline void mi_atomic_maxi64_relaxed(volatile _Atomic(int64_t)*p, int64_t x) { int64_t current; do { @@ -305,7 +296,6 @@ static inline bool mi_atomic_casi64_strong_acq_rel(volatile _Atomic(int64_t*)p, #define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des) #define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des) #define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des) -#define mi_atomic_cas_ptr_strong_acq_rel(tp,p,exp,des) mi_atomic_cas_strong_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des) #define mi_atomic_exchange_ptr_relaxed(tp,p,x) (tp*)mi_atomic_exchange_relaxed((_Atomic(uintptr_t)*)(p),(uintptr_t)x) #define mi_atomic_exchange_ptr_release(tp,p,x) (tp*)mi_atomic_exchange_release((_Atomic(uintptr_t)*)(p),(uintptr_t)x) #define mi_atomic_exchange_ptr_acq_rel(tp,p,x) (tp*)mi_atomic_exchange_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t)x) @@ -373,9 +363,8 @@ static inline void mi_atomic_yield(void) { _mm_pause(); } #elif (defined(__GNUC__) || defined(__clang__)) && \ - (defined(__x86_64__) || defined(__i386__) || \ - defined(__aarch64__) || defined(__arm__) || \ - defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) || defined(__POWERPC__)) + (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__armel__) || defined(__ARMEL__) || \ + defined(__aarch64__) || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__)) || defined(__POWERPC__) #if defined(__x86_64__) || defined(__i386__) static inline void mi_atomic_yield(void) { __asm__ volatile ("pause" ::: "memory"); @@ -384,16 +373,10 @@ static inline void mi_atomic_yield(void) { static inline void mi_atomic_yield(void) { __asm__ volatile("wfe"); } -#elif defined(__arm__) -#if __ARM_ARCH >= 7 +#elif (defined(__arm__) && __ARM_ARCH__ >= 7) static inline void mi_atomic_yield(void) { __asm__ volatile("yield" ::: "memory"); } -#else -static inline void mi_atomic_yield(void) { - __asm__ volatile ("nop" ::: "memory"); -} -#endif #elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) || defined(__POWERPC__) #ifdef __APPLE__ static inline void mi_atomic_yield(void) { @@ -404,6 +387,10 @@ static inline void mi_atomic_yield(void) { __asm__ __volatile__ ("or 27,27,27" ::: "memory"); } #endif +#elif defined(__armel__) || defined(__ARMEL__) +static inline void mi_atomic_yield(void) { + __asm__ volatile ("nop" ::: "memory"); +} #endif #elif defined(__sun) // Fallback for other archs diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index ca5be930..106da0d1 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -8,6 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MIMALLOC_INTERNAL_H #define MIMALLOC_INTERNAL_H + // -------------------------------------------------------------------------- // This file contains the internal API's of mimalloc and various utility // functions and macros. @@ -16,88 +17,50 @@ terms of the MIT license. A copy of the license can be found in the file #include "types.h" #include "track.h" - -// -------------------------------------------------------------------------- -// Compiler defines -// -------------------------------------------------------------------------- - #if (MI_DEBUG>0) #define mi_trace_message(...) _mi_trace_message(__VA_ARGS__) #else #define mi_trace_message(...) #endif -#define mi_decl_cache_align mi_decl_align(64) - +#define MI_CACHE_LINE 64 #if defined(_MSC_VER) #pragma warning(disable:4127) // suppress constant conditional warning (due to MI_SECURE paths) #pragma warning(disable:26812) // unscoped enum warning #define mi_decl_noinline __declspec(noinline) #define mi_decl_thread __declspec(thread) -#define mi_decl_align(a) __declspec(align(a)) -#define mi_decl_noreturn __declspec(noreturn) +#define mi_decl_cache_align __declspec(align(MI_CACHE_LINE)) #define mi_decl_weak #define mi_decl_hidden -#define mi_decl_cold #elif (defined(__GNUC__) && (__GNUC__ >= 3)) || defined(__clang__) // includes clang and icc #define mi_decl_noinline __attribute__((noinline)) #define mi_decl_thread __thread -#define mi_decl_align(a) __attribute__((aligned(a))) -#define mi_decl_noreturn __attribute__((noreturn)) +#define mi_decl_cache_align __attribute__((aligned(MI_CACHE_LINE))) #define mi_decl_weak __attribute__((weak)) #define mi_decl_hidden __attribute__((visibility("hidden"))) -#if (__GNUC__ >= 4) || defined(__clang__) -#define mi_decl_cold __attribute__((cold)) -#else -#define mi_decl_cold -#endif #elif __cplusplus >= 201103L // c++11 #define mi_decl_noinline #define mi_decl_thread thread_local -#define mi_decl_align(a) alignas(a) -#define mi_decl_noreturn [[noreturn]] +#define mi_decl_cache_align alignas(MI_CACHE_LINE) #define mi_decl_weak #define mi_decl_hidden -#define mi_decl_cold #else #define mi_decl_noinline #define mi_decl_thread __thread // hope for the best :-) -#define mi_decl_align(a) -#define mi_decl_noreturn +#define mi_decl_cache_align #define mi_decl_weak #define mi_decl_hidden -#define mi_decl_cold -#endif - -#if defined(__GNUC__) || defined(__clang__) -#define mi_unlikely(x) (__builtin_expect(!!(x),false)) -#define mi_likely(x) (__builtin_expect(!!(x),true)) -#elif (defined(__cplusplus) && (__cplusplus >= 202002L)) || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L) -#define mi_unlikely(x) (x) [[unlikely]] -#define mi_likely(x) (x) [[likely]] -#else -#define mi_unlikely(x) (x) -#define mi_likely(x) (x) -#endif - -#ifndef __has_builtin -#define __has_builtin(x) 0 -#endif - -#if defined(__cplusplus) -#define mi_decl_externc extern "C" -#else -#define mi_decl_externc #endif #if defined(__EMSCRIPTEN__) && !defined(__wasi__) #define __wasi__ #endif - -// -------------------------------------------------------------------------- -// Internal functions -// -------------------------------------------------------------------------- +#if defined(__cplusplus) +#define mi_decl_externc extern "C" +#else +#define mi_decl_externc +#endif // "libc.c" #include @@ -133,10 +96,10 @@ uintptr_t _mi_os_random_weak(uintptr_t extra_seed); static inline uintptr_t _mi_random_shuffle(uintptr_t x); // init.c -extern mi_decl_hidden mi_decl_cache_align mi_stats_t _mi_stats_main; +extern mi_decl_cache_align mi_stats_t _mi_stats_main; extern mi_decl_hidden mi_decl_cache_align const mi_page_t _mi_page_empty; -void _mi_auto_process_init(void); -void mi_cdecl _mi_auto_process_done(void) mi_attr_noexcept; +void _mi_process_load(void); +void mi_cdecl _mi_process_done(void); bool _mi_is_redirected(void); bool _mi_allocator_init(const char** message); void _mi_allocator_done(void); @@ -154,7 +117,6 @@ void _mi_heap_guarded_init(mi_heap_t* heap); // os.c void _mi_os_init(void); // called from process init void* _mi_os_alloc(size_t size, mi_memid_t* memid); -void* _mi_os_zalloc(size_t size, mi_memid_t* memid); void _mi_os_free(void* p, size_t size, mi_memid_t memid); void _mi_os_free_ex(void* p, size_t size, bool still_committed, mi_memid_t memid); @@ -164,14 +126,12 @@ bool _mi_os_has_overcommit(void); bool _mi_os_has_virtual_reserve(void); bool _mi_os_reset(void* addr, size_t size); +bool _mi_os_commit(void* p, size_t size, bool* is_zero); bool _mi_os_decommit(void* addr, size_t size); +bool _mi_os_protect(void* addr, size_t size); bool _mi_os_unprotect(void* addr, size_t size); bool _mi_os_purge(void* p, size_t size); bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, size_t stat_size); -void _mi_os_reuse(void* p, size_t size); -mi_decl_nodiscard bool _mi_os_commit(void* p, size_t size, bool* is_zero); -mi_decl_nodiscard bool _mi_os_commit_ex(void* addr, size_t size, bool* is_zero, size_t stat_size); -bool _mi_os_protect(void* addr, size_t size); void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, mi_memid_t* memid); void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_memid_t* memid); @@ -179,10 +139,8 @@ void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size); bool _mi_os_use_large_page(size_t size, size_t alignment); size_t _mi_os_large_page_size(void); -void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize, mi_memid_t* memid); -int _mi_os_numa_node_count(void); -int _mi_os_numa_node(void); +void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize, mi_memid_t* memid); // arena.c mi_arena_id_t _mi_arena_id_none(void); @@ -219,11 +177,10 @@ void _mi_segment_map_freed_at(const mi_segment_t* segment); void _mi_segment_map_unsafe_destroy(void); // "segment.c" -mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment, mi_segments_tld_t* tld); -void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld); -void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld); -bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld); -void _mi_segment_collect(mi_segment_t* segment, bool force); +mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment, mi_segments_tld_t* tld); +void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld); +void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld); +uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size); #if MI_HUGE_PAGE_ABANDON void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block_t* block); @@ -231,11 +188,10 @@ void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, m void _mi_segment_huge_page_reset(mi_segment_t* segment, mi_page_t* page, mi_block_t* block); #endif -uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size); // page start for any page -void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld); -void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld); -bool _mi_segment_attempt_reclaim(mi_heap_t* heap, mi_segment_t* segment); -bool _mi_segment_visit_blocks(mi_segment_t* segment, int heap_tag, bool visit_blocks, mi_block_visit_fun* visitor, void* arg); +void _mi_segments_collect(bool force, mi_segments_tld_t* tld); +void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld); +bool _mi_segment_attempt_reclaim(mi_heap_t* heap, mi_segment_t* segment); +bool _mi_segment_visit_blocks(mi_segment_t* segment, int heap_tag, bool visit_blocks, mi_block_visit_fun* visitor, void* arg); // "page.c" void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept mi_attr_malloc; @@ -258,7 +214,6 @@ void _mi_deferred_free(mi_heap_t* heap, bool force); void _mi_page_free_collect(mi_page_t* page,bool force); void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page); // callback from segments -size_t _mi_page_bin(const mi_page_t* page); // for stats size_t _mi_bin_size(size_t bin); // for stats size_t _mi_bin(size_t size); // for stats @@ -275,7 +230,6 @@ bool _mi_heap_area_visit_blocks(const mi_heap_area_t* area, mi_page_t* pa // "stats.c" void _mi_stats_done(mi_stats_t* stats); -void _mi_stats_merge_thread(mi_tld_t* tld); mi_msecs_t _mi_clock_now(void); mi_msecs_t _mi_clock_end(mi_msecs_t start); mi_msecs_t _mi_clock_start(void); @@ -297,6 +251,26 @@ bool _mi_page_is_valid(mi_page_t* page); #endif +// ------------------------------------------------------ +// Branches +// ------------------------------------------------------ + +#if defined(__GNUC__) || defined(__clang__) +#define mi_unlikely(x) (__builtin_expect(!!(x),false)) +#define mi_likely(x) (__builtin_expect(!!(x),true)) +#elif (defined(__cplusplus) && (__cplusplus >= 202002L)) || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L) +#define mi_unlikely(x) (x) [[unlikely]] +#define mi_likely(x) (x) [[likely]] +#else +#define mi_unlikely(x) (x) +#define mi_likely(x) (x) +#endif + +#ifndef __has_builtin +#define __has_builtin(x) 0 +#endif + + /* ----------------------------------------------------------- Error codes passed to `_mi_fatal_error` All are recoverable but EFAULT is a serious error and aborts by default in secure mode. @@ -321,32 +295,6 @@ bool _mi_page_is_valid(mi_page_t* page); #endif -// ------------------------------------------------------ -// Assertions -// ------------------------------------------------------ - -#if (MI_DEBUG) -// use our own assertion to print without memory allocation -mi_decl_noreturn mi_decl_cold void _mi_assert_fail(const char* assertion, const char* fname, unsigned int line, const char* func) mi_attr_noexcept; -#define mi_assert(expr) ((expr) ? (void)0 : _mi_assert_fail(#expr,__FILE__,__LINE__,__func__)) -#else -#define mi_assert(x) -#endif - -#if (MI_DEBUG>1) -#define mi_assert_internal mi_assert -#else -#define mi_assert_internal(x) -#endif - -#if (MI_DEBUG>2) -#define mi_assert_expensive mi_assert -#else -#define mi_assert_expensive(x) -#endif - - - /* ----------------------------------------------------------- Inlined definitions ----------------------------------------------------------- */ @@ -393,28 +341,12 @@ static inline uintptr_t _mi_align_up(uintptr_t sz, size_t alignment) { } } -// Align downwards -static inline uintptr_t _mi_align_down(uintptr_t sz, size_t alignment) { - mi_assert_internal(alignment != 0); - uintptr_t mask = alignment - 1; - if ((alignment & mask) == 0) { // power of two? - return (sz & ~mask); - } - else { - return ((sz / alignment) * alignment); - } -} // Align a pointer upwards static inline void* mi_align_up_ptr(void* p, size_t alignment) { return (void*)_mi_align_up((uintptr_t)p, alignment); } -// Align a pointer downwards -static inline void* mi_align_down_ptr(void* p, size_t alignment) { - return (void*)_mi_align_down((uintptr_t)p, alignment); -} - // Divide upwards: `s <= _mi_divide_up(s,d)*d < s+d`. static inline uintptr_t _mi_divide_up(uintptr_t size, size_t divider) { @@ -438,7 +370,6 @@ static inline bool mi_mem_is_zero(const void* p, size_t size) { return true; } - // Align a byte size to a size in _machine words_, // i.e. byte size == `wsize*sizeof(void*)`. static inline size_t _mi_wsize_from_size(size_t size) { @@ -533,44 +464,29 @@ static inline mi_segment_t* _mi_ptr_segment(const void* p) { #endif } -static inline mi_page_t* mi_slice_to_page(mi_slice_t* s) { - mi_assert_internal(s->slice_offset== 0 && s->slice_count > 0); - return (mi_page_t*)(s); -} - -static inline mi_slice_t* mi_page_to_slice(mi_page_t* p) { - mi_assert_internal(p->slice_offset== 0 && p->slice_count > 0); - return (mi_slice_t*)(p); -} - // Segment belonging to a page static inline mi_segment_t* _mi_page_segment(const mi_page_t* page) { mi_assert_internal(page!=NULL); mi_segment_t* segment = _mi_ptr_segment(page); - mi_assert_internal(segment == NULL || ((mi_slice_t*)page >= segment->slices && (mi_slice_t*)page < segment->slices + segment->slice_entries)); + mi_assert_internal(segment == NULL || page == &segment->pages[page->segment_idx]); return segment; } -static inline mi_slice_t* mi_slice_first(const mi_slice_t* slice) { - mi_slice_t* start = (mi_slice_t*)((uint8_t*)slice - slice->slice_offset); - mi_assert_internal(start >= _mi_ptr_segment(slice)->slices); - mi_assert_internal(start->slice_offset == 0); - mi_assert_internal(start + start->slice_count > slice); - return start; +// used internally +static inline size_t _mi_segment_page_idx_of(const mi_segment_t* segment, const void* p) { + // if (segment->page_size > MI_SEGMENT_SIZE) return &segment->pages[0]; // huge pages + ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment; + mi_assert_internal(diff >= 0 && (size_t)diff <= MI_SEGMENT_SIZE /* for huge alignment it can be equal */); + size_t idx = (size_t)diff >> segment->page_shift; + mi_assert_internal(idx < segment->capacity); + mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM || idx == 0); + return idx; } -// Get the page containing the pointer (performance critical as it is called in mi_free) +// Get the page containing the pointer static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const void* p) { - mi_assert_internal(p > (void*)segment); - ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment; - mi_assert_internal(diff > 0 && diff <= (ptrdiff_t)MI_SEGMENT_SIZE); - size_t idx = (size_t)diff >> MI_SEGMENT_SLICE_SHIFT; - mi_assert_internal(idx <= segment->slice_entries); - mi_slice_t* slice0 = (mi_slice_t*)&segment->slices[idx]; - mi_slice_t* slice = mi_slice_first(slice0); // adjust to the block that holds the page data - mi_assert_internal(slice->slice_offset == 0); - mi_assert_internal(slice >= segment->slices && slice < segment->slices + segment->slice_entries); - return mi_slice_to_page(slice); + size_t idx = _mi_segment_page_idx_of(segment, p); + return &((mi_segment_t*)segment)->pages[idx]; } // Quick page start for initialized pages @@ -593,8 +509,8 @@ static inline size_t mi_page_block_size(const mi_page_t* page) { } static inline bool mi_page_is_huge(const mi_page_t* page) { - mi_assert_internal((page->is_huge && _mi_page_segment(page)->kind == MI_SEGMENT_HUGE) || - (!page->is_huge && _mi_page_segment(page)->kind != MI_SEGMENT_HUGE)); + mi_assert_internal((page->is_huge && _mi_page_segment(page)->page_kind == MI_PAGE_HUGE) || + (!page->is_huge && _mi_page_segment(page)->page_kind != MI_PAGE_HUGE)); return page->is_huge; } @@ -606,11 +522,7 @@ static inline size_t mi_page_usable_block_size(const mi_page_t* page) { // size of a segment static inline size_t mi_segment_size(mi_segment_t* segment) { - return segment->segment_slices * MI_SEGMENT_SLICE_SIZE; -} - -static inline uint8_t* mi_segment_end(mi_segment_t* segment) { - return (uint8_t*)segment + mi_segment_size(segment); + return segment->segment_size; } // Thread free access @@ -765,13 +677,12 @@ static inline bool mi_is_in_same_segment(const void* p, const void* q) { } static inline bool mi_is_in_same_page(const void* p, const void* q) { - mi_segment_t* segment = _mi_ptr_segment(p); - if (_mi_ptr_segment(q) != segment) return false; - // assume q may be invalid // return (_mi_segment_page_of(segment, p) == _mi_segment_page_of(segment, q)); - mi_page_t* page = _mi_segment_page_of(segment, p); - size_t psize; - uint8_t* start = _mi_segment_page_start(segment, page, &psize); - return (start <= (uint8_t*)q && (uint8_t*)q < start + psize); + mi_segment_t* segmentp = _mi_ptr_segment(p); + mi_segment_t* segmentq = _mi_ptr_segment(q); + if (segmentp != segmentq) return false; + size_t idxp = _mi_segment_page_idx_of(segmentp, p); + size_t idxq = _mi_segment_page_idx_of(segmentq, q); + return (idxp == idxq); } static inline uintptr_t mi_rotl(uintptr_t x, uintptr_t shift) { @@ -853,50 +764,6 @@ static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, c } -// ------------------------------------------------------------------- -// commit mask -// ------------------------------------------------------------------- - -static inline void mi_commit_mask_create_empty(mi_commit_mask_t* cm) { - for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { - cm->mask[i] = 0; - } -} - -static inline void mi_commit_mask_create_full(mi_commit_mask_t* cm) { - for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { - cm->mask[i] = ~((size_t)0); - } -} - -static inline bool mi_commit_mask_is_empty(const mi_commit_mask_t* cm) { - for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { - if (cm->mask[i] != 0) return false; - } - return true; -} - -static inline bool mi_commit_mask_is_full(const mi_commit_mask_t* cm) { - for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { - if (cm->mask[i] != ~((size_t)0)) return false; - } - return true; -} - -// defined in `segment.c`: -size_t _mi_commit_mask_committed_size(const mi_commit_mask_t* cm, size_t total); -size_t _mi_commit_mask_next_run(const mi_commit_mask_t* cm, size_t* idx); - -#define mi_commit_mask_foreach(cm,idx,count) \ - idx = 0; \ - while ((count = _mi_commit_mask_next_run(cm,&idx)) > 0) { - -#define mi_commit_mask_foreach_end() \ - idx += count; \ - } - - - /* ----------------------------------------------------------- memory id's ----------------------------------------------------------- */ @@ -912,10 +779,8 @@ static inline mi_memid_t _mi_memid_none(void) { return _mi_memid_create(MI_MEM_NONE); } -static inline mi_memid_t _mi_memid_create_os(void* base, size_t size, bool committed, bool is_zero, bool is_large) { +static inline mi_memid_t _mi_memid_create_os(bool committed, bool is_zero, bool is_large) { mi_memid_t memid = _mi_memid_create(MI_MEM_OS); - memid.mem.os.base = base; - memid.mem.os.size = size; memid.initially_committed = committed; memid.initially_zero = is_zero; memid.is_pinned = is_large; @@ -947,6 +812,24 @@ static inline uintptr_t _mi_random_shuffle(uintptr_t x) { return x; } +// ------------------------------------------------------------------- +// Optimize numa node access for the common case (= one node) +// ------------------------------------------------------------------- + +int _mi_os_numa_node_get(void); +size_t _mi_os_numa_node_count_get(void); + +extern mi_decl_hidden _Atomic(size_t) _mi_numa_node_count; +static inline int _mi_os_numa_node(void) { + if mi_likely(mi_atomic_load_relaxed(&_mi_numa_node_count) == 1) { return 0; } + else return _mi_os_numa_node_get(); +} +static inline size_t _mi_os_numa_node_count(void) { + const size_t count = mi_atomic_load_relaxed(&_mi_numa_node_count); + if mi_likely(count > 0) { return count; } + else return _mi_os_numa_node_count_get(); +} + // ----------------------------------------------------------------------- @@ -987,7 +870,7 @@ static inline size_t mi_clz(size_t x) { #else _BitScanReverse64(&idx, x); #endif - return ((MI_SIZE_BITS - 1) - (size_t)idx); + return ((MI_SIZE_BITS - 1) - idx); } static inline size_t mi_ctz(size_t x) { if (x==0) return MI_SIZE_BITS; @@ -997,7 +880,7 @@ static inline size_t mi_ctz(size_t x) { #else _BitScanForward64(&idx, x); #endif - return (size_t)idx; + return idx; } #else @@ -1064,21 +947,6 @@ static inline size_t mi_bsr(size_t x) { return (x==0 ? MI_SIZE_BITS : MI_SIZE_BITS - 1 - mi_clz(x)); } -size_t _mi_popcount_generic(size_t x); - -static inline size_t mi_popcount(size_t x) { - if (x<=1) return x; - if (x==SIZE_MAX) return MI_SIZE_BITS; - #if defined(__GNUC__) - #if (SIZE_MAX == ULONG_MAX) - return __builtin_popcountl(x); - #else - return __builtin_popcountll(x); - #endif - #else - return _mi_popcount_generic(x); - #endif -} // --------------------------------------------------------------------------------- // Provide our own `_mi_memcpy` for potential performance optimizations. @@ -1090,8 +958,8 @@ static inline size_t mi_popcount(size_t x) { #if !MI_TRACK_ENABLED && defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64)) #include -extern mi_decl_hidden bool _mi_cpu_has_fsrm; -extern mi_decl_hidden bool _mi_cpu_has_erms; +extern bool _mi_cpu_has_fsrm; +extern bool _mi_cpu_has_erms; static inline void _mi_memcpy(void* dst, const void* src, size_t n) { if ((_mi_cpu_has_fsrm && n <= 128) || (_mi_cpu_has_erms && n > 128)) { __movsb((unsigned char*)dst, (const unsigned char*)src, n); diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h index 1087d9b8..bddd66e9 100644 --- a/include/mimalloc/prim.h +++ b/include/mimalloc/prim.h @@ -59,15 +59,10 @@ int _mi_prim_commit(void* addr, size_t size, bool* is_zero); // pre: needs_recommit != NULL int _mi_prim_decommit(void* addr, size_t size, bool* needs_recommit); -// Reset memory. The range keeps being accessible but the content might be reset to zero at any moment. +// Reset memory. The range keeps being accessible but the content might be reset. // Returns error code or 0 on success. int _mi_prim_reset(void* addr, size_t size); -// Reuse memory. This is called for memory that is already committed but -// may have been reset (`_mi_prim_reset`) or decommitted (`_mi_prim_decommit`) where `needs_recommit` was false. -// Returns error code or 0 on success. On most platforms this is a no-op. -int _mi_prim_reuse(void* addr, size_t size); - // Protect memory. Returns error code or 0 on success. int _mi_prim_protect(void* addr, size_t size, bool protect); @@ -123,6 +118,9 @@ void _mi_prim_thread_done_auto_done(void); void _mi_prim_thread_associate_default_heap(mi_heap_t* heap); + + + //------------------------------------------------------------------- // Access to TLS (thread local storage) slots. // We need fast access to both a unique thread id (in `free.c:mi_free`) and @@ -210,19 +208,19 @@ static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexce #elif _WIN32 && MI_WIN_USE_FIXED_TLS && !defined(MI_WIN_USE_FLS) // On windows we can store the thread-local heap at a fixed TLS slot to avoid -// thread-local initialization checks in the fast path. -// We allocate a user TLS slot at process initialization (see `windows/prim.c`) -// and store the offset `_mi_win_tls_offset`. -#define MI_HAS_TLS_SLOT 1 // 2 = we can reliably initialize the slot (saving a test on each malloc) +// thread-local initialization checks in the fast path. This uses a fixed location +// in the TCB though (last user-reserved slot by default) which may clash with other applications. -extern mi_decl_hidden size_t _mi_win_tls_offset; +#define MI_HAS_TLS_SLOT 2 // 2 = we can reliably initialize the slot (saving a test on each malloc) #if MI_WIN_USE_FIXED_TLS > 1 #define MI_TLS_SLOT (MI_WIN_USE_FIXED_TLS) #elif MI_SIZE_SIZE == 4 -#define MI_TLS_SLOT (0x0E10 + _mi_win_tls_offset) // User TLS slots +#define MI_TLS_SLOT (0x710) // Last user-reserved slot +// #define MI_TLS_SLOT (0xF0C) // Last TlsSlot (might clash with other app reserved slot) #else -#define MI_TLS_SLOT (0x1480 + _mi_win_tls_offset) // User TLS slots +#define MI_TLS_SLOT (0x888) // Last user-reserved slot +// #define MI_TLS_SLOT (0x1678) // Last TlsSlot (might clash with other app reserved slot) #endif static inline void* mi_prim_tls_slot(size_t slot) mi_attr_noexcept { @@ -271,8 +269,8 @@ static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexce // defined in `init.c`; do not use these directly -extern mi_decl_hidden mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from -extern mi_decl_hidden bool _mi_process_is_initialized; // has mi_process_init been called? +extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from +extern bool _mi_process_is_initialized; // has mi_process_init been called? static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept; @@ -400,7 +398,7 @@ static inline mi_heap_t* mi_prim_get_default_heap(void) { #elif defined(MI_TLS_PTHREAD) -extern mi_decl_hidden pthread_key_t _mi_heap_default_key; +extern pthread_key_t _mi_heap_default_key; static inline mi_heap_t* mi_prim_get_default_heap(void) { mi_heap_t* heap = (mi_unlikely(_mi_heap_default_key == (pthread_key_t)(-1)) ? _mi_heap_main_get() : (mi_heap_t*)pthread_getspecific(_mi_heap_default_key)); return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap); diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index a15d9cba..9f743149 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -13,9 +13,8 @@ terms of the MIT license. A copy of the license can be found in the file // mi_heap_t : all data for a thread-local heap, contains // lists of all managed heap pages. // mi_segment_t : a larger chunk of memory (32GiB) from where pages -// are allocated. A segment is divided in slices (64KiB) from -// which pages are allocated. -// mi_page_t : a "mimalloc" page (usually 64KiB or 512KiB) from +// are allocated. +// mi_page_t : a mimalloc page (usually 64KiB or 512KiB) from // where objects are allocated. // Note: we write "OS page" for OS memory pages while // using plain "page" for mimalloc pages (`mi_page_t`). @@ -67,10 +66,10 @@ terms of the MIT license. A copy of the license can be found in the file // #define MI_DEBUG 2 // + internal assertion checks // #define MI_DEBUG 3 // + extensive internal invariant checking (cmake -DMI_DEBUG_FULL=ON) #if !defined(MI_DEBUG) -#if defined(MI_BUILD_RELEASE) || defined(NDEBUG) -#define MI_DEBUG 0 -#else +#if !defined(NDEBUG) || defined(_DEBUG) #define MI_DEBUG 2 +#else +#define MI_DEBUG 0 #endif #endif @@ -168,40 +167,38 @@ typedef int32_t mi_ssize_t; // ------------------------------------------------------ // Main tuning parameters for segment and page sizes -// Sizes for 64-bit (usually divide by two for 32-bit) -#ifndef MI_SEGMENT_SLICE_SHIFT -#define MI_SEGMENT_SLICE_SHIFT (13 + MI_INTPTR_SHIFT) // 64KiB (32KiB on 32-bit) -#endif - -#ifndef MI_SEGMENT_SHIFT -#if MI_INTPTR_SIZE > 4 -#define MI_SEGMENT_SHIFT ( 9 + MI_SEGMENT_SLICE_SHIFT) // 32MiB -#else -#define MI_SEGMENT_SHIFT ( 7 + MI_SEGMENT_SLICE_SHIFT) // 4MiB on 32-bit -#endif -#endif - +// Sizes for 64-bit, divide by two for 32-bit #ifndef MI_SMALL_PAGE_SHIFT -#define MI_SMALL_PAGE_SHIFT (MI_SEGMENT_SLICE_SHIFT) // 64KiB +#define MI_SMALL_PAGE_SHIFT (13 + MI_INTPTR_SHIFT) // 64KiB #endif #ifndef MI_MEDIUM_PAGE_SHIFT -#define MI_MEDIUM_PAGE_SHIFT ( 3 + MI_SMALL_PAGE_SHIFT) // 512KiB +#define MI_MEDIUM_PAGE_SHIFT ( 3 + MI_SMALL_PAGE_SHIFT) // 512KiB +#endif +#ifndef MI_LARGE_PAGE_SHIFT +#define MI_LARGE_PAGE_SHIFT ( 3 + MI_MEDIUM_PAGE_SHIFT) // 4MiB +#endif +#ifndef MI_SEGMENT_SHIFT +#define MI_SEGMENT_SHIFT ( MI_LARGE_PAGE_SHIFT) // 4MiB -- must be equal to `MI_LARGE_PAGE_SHIFT` #endif // Derived constants #define MI_SEGMENT_SIZE (MI_ZU(1)<= 655360) +#if (MI_LARGE_OBJ_WSIZE_MAX >= 655360) #error "mimalloc internal: define more bins" #endif // Maximum block size for which blocks are guaranteed to be block size aligned. (see `segment.c:_mi_segment_page_start`) -#define MI_MAX_ALIGN_GUARANTEE (MI_MEDIUM_OBJ_SIZE_MAX) +#define MI_MAX_ALIGN_GUARANTEE (MI_MEDIUM_OBJ_SIZE_MAX) // Alignments over MI_BLOCK_ALIGNMENT_MAX are allocated in dedicated huge page segments -#define MI_BLOCK_ALIGNMENT_MAX (MI_SEGMENT_SIZE >> 1) +#define MI_BLOCK_ALIGNMENT_MAX (MI_SEGMENT_SIZE >> 1) -// Maximum slice count (255) for which we can find the page for interior pointers -#define MI_MAX_SLICE_OFFSET_COUNT ((MI_BLOCK_ALIGNMENT_MAX / MI_SEGMENT_SLICE_SIZE) - 1) - -// we never allocate more than PTRDIFF_MAX (see also ) -// on 64-bit+ systems we also limit the maximum allocation size such that the slice count fits in 32-bits. (issue #877) -#if (PTRDIFF_MAX > INT32_MAX) && (PTRDIFF_MAX >= (MI_SEGMENT_SLIZE_SIZE * UINT32_MAX)) -#define MI_MAX_ALLOC_SIZE (MI_SEGMENT_SLICE_SIZE * (UINT32_MAX-1)) -#else +// We never allocate more than PTRDIFF_MAX (see also ) #define MI_MAX_ALLOC_SIZE PTRDIFF_MAX -#endif - // ------------------------------------------------------ // Mimalloc pages contain allocated blocks @@ -308,8 +296,8 @@ typedef uintptr_t mi_thread_free_t; // Notes: // - Access is optimized for `free.c:mi_free` and `alloc.c:mi_page_alloc` // - Using `uint16_t` does not seem to slow things down -// - The size is 12 words on 64-bit which helps the page index calculations -// (and 14 words on 32-bit, and encoded free lists add 2 words) +// - The size is 10 words on 64-bit which helps the page index calculations +// (and 12 words on 32-bit, and encoded free lists add 2 words) // - `xthread_free` uses the bottom bits as a delayed-free flags to optimize // concurrent frees where only the first concurrent free adds to the owning // heap `thread_delayed_free` list (see `free.c:mi_free_block_mt`). @@ -319,12 +307,12 @@ typedef uintptr_t mi_thread_free_t; // will be freed correctly even if only other threads free blocks. typedef struct mi_page_s { // "owned" by the segment - uint32_t slice_count; // slices in this page (0 if not a page) - uint32_t slice_offset; // distance from the actual page data slice (0 if a page) + uint8_t segment_idx; // index in the segment `pages` array, `page == &segment->pages[page->segment_idx]` + uint8_t segment_in_use:1; // `true` if the segment allocated this page uint8_t is_committed:1; // `true` if the page virtual memory is committed uint8_t is_zero_init:1; // `true` if the page was initially zero initialized - uint8_t is_huge:1; // `true` if the page is in a huge segment (`segment->kind == MI_SEGMENT_HUGE`) - // padding + uint8_t is_huge:1; // `true` if the page is in a huge segment + // layout like this to optimize access in `mi_malloc` and `mi_free` uint16_t capacity; // number of blocks committed, must be the first field, see `segment.c:page_clear` uint16_t reserved; // number of blocks reserved in memory @@ -348,11 +336,12 @@ typedef struct mi_page_s { _Atomic(mi_thread_free_t) xthread_free; // list of deferred free blocks freed by other threads _Atomic(uintptr_t) xheap; - struct mi_page_s* next; // next page owned by this thread with the same `block_size` - struct mi_page_s* prev; // previous page owned by this thread with the same `block_size` + struct mi_page_s* next; // next page owned by the heap with the same `block_size` + struct mi_page_s* prev; // previous page owned by the heap with the same `block_size` - // 64-bit 11 words, 32-bit 13 words, (+2 for secure) + #if MI_INTPTR_SIZE==4 // pad to 12 words on 32-bit void* padding[1]; + #endif } mi_page_t; @@ -365,44 +354,10 @@ typedef enum mi_page_kind_e { MI_PAGE_SMALL, // small blocks go into 64KiB pages inside a segment MI_PAGE_MEDIUM, // medium blocks go into 512KiB pages inside a segment MI_PAGE_LARGE, // larger blocks go into a single page spanning a whole segment - MI_PAGE_HUGE // a huge page is a single page in a segment of variable size - // used for blocks `> MI_LARGE_OBJ_SIZE_MAX` or an aligment `> MI_BLOCK_ALIGNMENT_MAX`. + MI_PAGE_HUGE // a huge page is a single page in a segment of variable size (but still 2MiB aligned) + // used for blocks `> MI_LARGE_OBJ_SIZE_MAX` or an alignment `> MI_BLOCK_ALIGNMENT_MAX`. } mi_page_kind_t; -typedef enum mi_segment_kind_e { - MI_SEGMENT_NORMAL, // MI_SEGMENT_SIZE size with pages inside. - MI_SEGMENT_HUGE, // segment with just one huge page inside. -} mi_segment_kind_t; - -// ------------------------------------------------------ -// A segment holds a commit mask where a bit is set if -// the corresponding MI_COMMIT_SIZE area is committed. -// The MI_COMMIT_SIZE must be a multiple of the slice -// size. If it is equal we have the most fine grained -// decommit (but setting it higher can be more efficient). -// The MI_MINIMAL_COMMIT_SIZE is the minimal amount that will -// be committed in one go which can be set higher than -// MI_COMMIT_SIZE for efficiency (while the decommit mask -// is still tracked in fine-grained MI_COMMIT_SIZE chunks) -// ------------------------------------------------------ - -#define MI_MINIMAL_COMMIT_SIZE (1*MI_SEGMENT_SLICE_SIZE) -#define MI_COMMIT_SIZE (MI_SEGMENT_SLICE_SIZE) // 64KiB -#define MI_COMMIT_MASK_BITS (MI_SEGMENT_SIZE / MI_COMMIT_SIZE) -#define MI_COMMIT_MASK_FIELD_BITS MI_SIZE_BITS -#define MI_COMMIT_MASK_FIELD_COUNT (MI_COMMIT_MASK_BITS / MI_COMMIT_MASK_FIELD_BITS) - -#if (MI_COMMIT_MASK_BITS != (MI_COMMIT_MASK_FIELD_COUNT * MI_COMMIT_MASK_FIELD_BITS)) -#error "the segment size must be exactly divisible by the (commit size * size_t bits)" -#endif - -typedef struct mi_commit_mask_s { - size_t mask[MI_COMMIT_MASK_FIELD_COUNT]; -} mi_commit_mask_t; - -typedef mi_page_t mi_slice_t; -typedef int64_t mi_msecs_t; - // --------------------------------------------------------------- // a memory id tracks the provenance of arena/OS allocated memory @@ -446,57 +401,43 @@ typedef struct mi_memid_s { } mi_memid_t; -// ----------------------------------------------------------------------------------------- -// Segments are large allocated memory blocks (32mb on 64 bit) from arenas or the OS. -// -// Inside segments we allocated fixed size mimalloc pages (`mi_page_t`) that contain blocks. -// The start of a segment is this structure with a fixed number of slice entries (`slices`) -// usually followed by a guard OS page and the actual allocation area with pages. -// While a page is not allocated, we view it's data as a `mi_slice_t` (instead of a `mi_page_t`). -// Of any free area, the first slice has the info and `slice_offset == 0`; for any subsequent -// slices part of the area, the `slice_offset` is the byte offset back to the first slice -// (so we can quickly find the page info on a free, `internal.h:_mi_segment_page_of`). -// For slices, the `block_size` field is repurposed to signify if a slice is used (`1`) or not (`0`). -// Small and medium pages use a fixed amount of slices to reduce slice fragmentation, while -// large and huge pages span a variable amount of slices. - +// --------------------------------------------------------------- +// Segments contain mimalloc pages +// --------------------------------------------------------------- typedef struct mi_subproc_s mi_subproc_t; +// Segments are large allocated memory blocks (2MiB on 64 bit) from the OS. +// Inside segments we allocated fixed size _pages_ that contain blocks. typedef struct mi_segment_s { // constant fields - mi_memid_t memid; // memory id for arena/OS allocation - bool allow_decommit; // can we decommmit the memory - bool allow_purge; // can we purge the memory (reset or decommit) - size_t segment_size; - mi_subproc_t* subproc; // segment belongs to sub process + mi_memid_t memid; // memory id to track provenance + bool allow_decommit; + bool allow_purge; + size_t segment_size; // for huge pages this may be different from `MI_SEGMENT_SIZE` + mi_subproc_t* subproc; // segment belongs to sub process // segment fields - mi_msecs_t purge_expire; // purge slices in the `purge_mask` after this time - mi_commit_mask_t purge_mask; // slices that can be purged - mi_commit_mask_t commit_mask; // slices that are currently committed + struct mi_segment_s* next; // must be the first (non-constant) segment field -- see `segment.c:segment_init` + struct mi_segment_s* prev; + bool was_reclaimed; // true if it was reclaimed (used to limit reclaim-on-free reclamation) + bool dont_free; // can be temporarily true to ensure the segment is not freed - // from here is zero initialized - struct mi_segment_s* next; // the list of freed segments in the cache (must be first field, see `segment.c:mi_segment_init`) - bool was_reclaimed; // true if it was reclaimed (used to limit on-free reclamation) - bool dont_free; // can be temporarily true to ensure the segment is not freed + size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`) + size_t abandoned_visits; // count how often this segment is visited for reclaiming (to force reclaim if it is too long) - size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`) - size_t abandoned_visits; // count how often this segment is visited during abondoned reclamation (to force reclaim if it takes too long) - size_t used; // count of pages in use - uintptr_t cookie; // verify addresses in debug mode: `mi_ptr_cookie(segment) == segment->cookie` + size_t used; // count of pages in use (`used <= capacity`) + size_t capacity; // count of available pages (`#free + used`) + size_t segment_info_size;// space we are using from the first page for segment meta-data and possible guard pages. + uintptr_t cookie; // verify addresses in secure mode: `_mi_ptr_cookie(segment) == segment->cookie` struct mi_segment_s* abandoned_os_next; // only used for abandoned segments outside arena's, and only if `mi_option_visit_abandoned` is enabled struct mi_segment_s* abandoned_os_prev; - size_t segment_slices; // for huge segments this may be different from `MI_SLICES_PER_SEGMENT` - size_t segment_info_slices; // initial count of slices that we are using for segment info and possible guard pages. - // layout like this to optimize access in `mi_free` - mi_segment_kind_t kind; - size_t slice_entries; // entries in the `slices` array, at most `MI_SLICES_PER_SEGMENT` _Atomic(mi_threadid_t) thread_id; // unique id of the thread owning this segment - - mi_slice_t slices[MI_SLICES_PER_SEGMENT+1]; // one extra final entry for huge blocks with large alignment + size_t page_shift; // `1 << page_shift` == the page sizes == `page->block_size * page->reserved` (unless the first page, then `-segment_info_size`). + mi_page_kind_t page_kind; // kind of pages: small, medium, large, or huge + mi_page_t pages[1]; // up to `MI_SMALL_PAGES_PER_SEGMENT` pages } mi_segment_t; @@ -571,6 +512,7 @@ struct mi_heap_s { size_t guarded_size_min; // minimal size for guarded objects size_t guarded_size_max; // maximal size for guarded objects size_t guarded_sample_rate; // sample rate (set to 0 to disable guarded pages) + size_t guarded_sample_seed; // starting sample count size_t guarded_sample_count; // current sample count (counting down to 0) #endif mi_page_t* pages_free_direct[MI_PAGES_DIRECT]; // optimize: array where every entry points a page with possibly free blocks in the corresponding queue for that size. @@ -599,19 +541,20 @@ struct mi_subproc_s { // Thread Local data // ------------------------------------------------------ -// A "span" is is an available range of slices. The span queues keep -// track of slice spans of at most the given `slice_count` (but more than the previous size class). -typedef struct mi_span_queue_s { - mi_slice_t* first; - mi_slice_t* last; - size_t slice_count; -} mi_span_queue_t; +// Milliseconds as in `int64_t` to avoid overflows +typedef int64_t mi_msecs_t; -#define MI_SEGMENT_BIN_MAX (35) // 35 == mi_segment_bin(MI_SLICES_PER_SEGMENT) +// Queue of segments +typedef struct mi_segment_queue_s { + mi_segment_t* first; + mi_segment_t* last; +} mi_segment_queue_t; // Segments thread local data typedef struct mi_segments_tld_s { - mi_span_queue_t spans[MI_SEGMENT_BIN_MAX+1]; // free slice spans inside segments + mi_segment_queue_t small_free; // queue of segments with free small pages + mi_segment_queue_t medium_free; // queue of segments with free medium pages + mi_page_queue_t pages_purge; // queue of freed pages that are delay purged size_t count; // current number of segments; size_t peak_count; // peak number of segments size_t current_size; // current size of all segments @@ -632,6 +575,7 @@ struct mi_tld_s { }; + // ------------------------------------------------------ // Debug // ------------------------------------------------------ @@ -646,6 +590,26 @@ struct mi_tld_s { #define MI_DEBUG_PADDING (0xDE) #endif +#if (MI_DEBUG) +// use our own assertion to print without memory allocation +void _mi_assert_fail(const char* assertion, const char* fname, unsigned int line, const char* func ); +#define mi_assert(expr) ((expr) ? (void)0 : _mi_assert_fail(#expr,__FILE__,__LINE__,__func__)) +#else +#define mi_assert(x) +#endif + +#if (MI_DEBUG>1) +#define mi_assert_internal mi_assert +#else +#define mi_assert_internal(x) +#endif + +#if (MI_DEBUG>2) +#define mi_assert_expensive mi_assert +#else +#define mi_assert_expensive(x) +#endif + // ------------------------------------------------------ // Statistics @@ -661,25 +625,22 @@ struct mi_tld_s { // add to stat keeping track of the peak void _mi_stat_increase(mi_stat_count_t* stat, size_t amount); void _mi_stat_decrease(mi_stat_count_t* stat, size_t amount); -void _mi_stat_adjust_decrease(mi_stat_count_t* stat, size_t amount); // counters can just be increased void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount); #if (MI_STAT) #define mi_stat_increase(stat,amount) _mi_stat_increase( &(stat), amount) #define mi_stat_decrease(stat,amount) _mi_stat_decrease( &(stat), amount) -#define mi_stat_adjust_decrease(stat,amount) _mi_stat_adjust_decrease( &(stat), amount) #define mi_stat_counter_increase(stat,amount) _mi_stat_counter_increase( &(stat), amount) #else #define mi_stat_increase(stat,amount) ((void)0) #define mi_stat_decrease(stat,amount) ((void)0) -#define mi_stat_adjust_decrease(stat,amount) ((void)0) #define mi_stat_counter_increase(stat,amount) ((void)0) #endif #define mi_heap_stat_counter_increase(heap,stat,amount) mi_stat_counter_increase( (heap)->tld->stats.stat, amount) #define mi_heap_stat_increase(heap,stat,amount) mi_stat_increase( (heap)->tld->stats.stat, amount) #define mi_heap_stat_decrease(heap,stat,amount) mi_stat_decrease( (heap)->tld->stats.stat, amount) -#define mi_heap_stat_adjust_decrease(heap,stat,amount) mi_stat_adjust_decrease( (heap)->tld->stats.stat, amount) + #endif diff --git a/readme.md b/readme.md index ddf358b2..81f2057e 100644 --- a/readme.md +++ b/readme.md @@ -12,9 +12,9 @@ is a general purpose allocator with excellent [performance](#performance) charac Initially developed by Daan Leijen for the runtime systems of the [Koka](https://koka-lang.github.io) and [Lean](https://github.com/leanprover/lean) languages. -Latest release : `v3.1.5` (beta) (2025-06-13). -Latest v2 release: `v2.2.4` (2025-06-09). -Latest v1 release: `v1.9.4` (2024-06-09). +Latest release : `v3.0.2` (beta) (2025-03-06). +Latest v2 release: `v2.2.2` (2025-03-06). +Latest v1 release: `v1.9.2` (2024-03-06). mimalloc is a drop-in replacement for `malloc` and can be used in other programs without code changes, for example, on dynamically linked ELF-based systems (Linux, BSD, etc.) you can use it as: @@ -72,25 +72,18 @@ Enjoy! ### Branches -* `main`: latest stable release (still based on `dev2`). +* `master`: latest stable release (still based on `dev2`). * `dev`: development branch for mimalloc v1. Use this branch for submitting PR's. * `dev2`: development branch for mimalloc v2. This branch is downstream of `dev` (and is essentially equal to `dev` except for `src/segment.c`). Uses larger sliced segments to manage mimalloc pages that can reduce fragmentation. -* `dev3`: development branch for mimalloc v3 beta. This branch is downstream of `dev`. This version - simplifies the lock-free ownership of previous versions, and improves sharing of memory between - threads. On certain large workloads this version may use (much) less memory. +* `dev3`: development branch for mimalloc v3-beta. This branch is downstream of `dev`. This version + simplifies the lock-free ownership of previous versions, has no thread-local segments any more. + This improves sharing of memory between threads, and on certain large workloads may use less memory + with less fragmentation. ### Releases -* 2025-06-13, `v3.1.5`: Bug fix release where memory was not always correctly committed (issue #1098). -* 2025-06-09, `v1.9.4`, `v2.2.4`, `v3.1.4` (beta) : Some important bug fixes, including a case where OS memory - was not always fully released. Improved v3 performance, build on XBox, fix build on Android, support interpose - for older macOS versions, use MADV_FREE_REUSABLE on macOS, always check commit success, better support for Windows - fixed TLS offset, etc. -* 2025-03-28, `v1.9.3`, `v2.2.3`, `v3.0.3` (beta) : Various small bug and build fixes, including: - fix arm32 pre v7 builds, fix mingw build, get runtime statistics, improve statistic commit counts, - fix execution on non BMI1 x64 systems. * 2025-03-06, `v1.9.2`, `v2.2.2`, `v3.0.2-beta`: Various small bug and build fixes. Add `mi_options_print`, `mi_arenas_print`, and the experimental `mi_stat_get` and `mi_stat_get_json`. Add `mi_thread_set_in_threadpool` and `mi_heap_set_numa_affinity` (v3 only). Add vcpkg portfile. @@ -104,13 +97,53 @@ Enjoy! add 0-byte to canary; upstream CPython fixes; reduce .bss size; allow fixed TLS slot on Windows for improved performance. * 2024-05-21, `v1.8.7`, `v2.1.7`: Fix build issues on less common platforms. Started upstreaming patches from the CPython [integration](https://github.com/python/cpython/issues/113141#issuecomment-2119255217). Upstream `vcpkg` patches. +* 2024-05-13, `v1.8.6`, `v2.1.6`: Fix build errors on various (older) platforms. Refactored aligned allocation. +* 2024-04-22, `v1.8.4`, `v2.1.4`: Fixes various bugs and build issues. Add `MI_LIBC_MUSL` cmake flag for musl builds. + Free-ing code is refactored into a separate module (`free.c`). Mimalloc page info is simplified with the block size + directly available (and new `block_size_shift` to improve aligned block free-ing). + New approach to collection of abandoned segments: When + a thread terminates the segments it owns are abandoned (containing still live objects) and these can be + reclaimed by other threads. We no longer use a list of abandoned segments but this is now done using bitmaps in arena's + which is more concurrent (and more aggressive). Abandoned memory can now also be reclaimed if a thread frees an object in + an abandoned page (which can be disabled using `mi_option_abandoned_reclaim_on_free`). The option `mi_option_max_segment_reclaim` + gives a maximum percentage of abandoned segments that can be reclaimed per try (=10%). + +* 2023-04-24, `v1.8.2`, `v2.1.2`: Fixes build issues on freeBSD, musl, and C17 (UE 5.1.1). Reduce code size/complexity + by removing regions and segment-cache's and only use arenas with improved memory purging -- this may improve memory + usage as well for larger services. Renamed options for consistency. Improved Valgrind and ASAN checking. + +* 2023-04-03, `v1.8.1`, `v2.1.1`: Fixes build issues on some platforms. + +* 2023-03-29, `v1.8.0`, `v2.1.0`: Improved support dynamic overriding on Windows 11. Improved tracing precision + with [asan](#asan) and [Valgrind](#valgrind), and added Windows event tracing [ETW](#ETW) (contributed by Xinglong He). Created an OS + abstraction layer to make it easier to port and separate platform dependent code (in `src/prim`). Fixed C++ STL compilation on older Microsoft C++ compilers, and various small bug fixes. + +* 2022-12-23, `v1.7.9`, `v2.0.9`: Supports building with [asan](#asan) and improved [Valgrind](#valgrind) support. + Support arbitrary large alignments (in particular for `std::pmr` pools). + Added C++ STL allocators attached to a specific heap (thanks @vmarkovtsev). + Heap walks now visit all object (including huge objects). Support Windows nano server containers (by Johannes Schindelin,@dscho). + Various small bug fixes. + +* 2022-11-03, `v1.7.7`, `v2.0.7`: Initial support for [Valgrind](#valgrind) for leak testing and heap block overflow + detection. Initial + support for attaching heaps to a specific memory area (only in v2). Fix `realloc` behavior for zero size blocks, remove restriction to integral multiple of the alignment in `alloc_align`, improved aligned allocation performance, reduced contention with many threads on few processors (thank you @dposluns!), vs2022 support, support `pkg-config`, . + +* 2022-04-14, `v1.7.6`, `v2.0.6`: fix fallback path for aligned OS allocation on Windows, improve Windows aligned allocation + even when compiling with older SDK's, fix dynamic overriding on macOS Monterey, fix MSVC C++ dynamic overriding, fix + warnings under Clang 14, improve performance if many OS threads are created and destroyed, fix statistics for large object + allocations, using MIMALLOC_VERBOSE=1 has no maximum on the number of error messages, various small fixes. + +* 2022-02-14, `v1.7.5`, `v2.0.5` (alpha): fix malloc override on + Windows 11, fix compilation with musl, potentially reduced + committed memory, add `bin/minject` for Windows, + improved wasm support, faster aligned allocation, + various small fixes. * [Older release notes](#older-release-notes) Special thanks to: -* Sergiy Kuryata for his contributions on reducing memory commit -- especially on Windows with the Windows thread pool (now implemented in v3). -* [David Carlier](https://devnexen.blogspot.com/) (@devnexen) for his _many_ contributions, and making +* [David Carlier](https://devnexen.blogspot.com/) (@devnexen) for his many contributions, and making mimalloc work better on many less common operating systems, like Haiku, Dragonfly, etc. * Mary Feofanova (@mary3000), Evgeniy Moiseenko, and Manuel Pöter (@mpoeter) for making mimalloc TSAN checkable, and finding memory model bugs using the [genMC] model checker. @@ -141,7 +174,7 @@ mimalloc is used in various large scale low-latency services and programs, for e Open `ide/vs2022/mimalloc.sln` in Visual Studio 2022 and build. The `mimalloc-lib` project builds a static library (in `out/msvc-x64`), while the -`mimalloc-override-dll` project builds a DLL for overriding malloc +`mimalloc-override-dll` project builds DLL for overriding malloc in the entire program. ## Linux, macOS, BSD, etc. @@ -865,48 +898,6 @@ provided by the bot. You will only need to do this once across all repos using o # Older Release Notes -* 2024-05-13, `v1.8.6`, `v2.1.6`: Fix build errors on various (older) platforms. Refactored aligned allocation. -* 2024-04-22, `v1.8.4`, `v2.1.4`: Fixes various bugs and build issues. Add `MI_LIBC_MUSL` cmake flag for musl builds. - Free-ing code is refactored into a separate module (`free.c`). Mimalloc page info is simplified with the block size - directly available (and new `block_size_shift` to improve aligned block free-ing). - New approach to collection of abandoned segments: When - a thread terminates the segments it owns are abandoned (containing still live objects) and these can be - reclaimed by other threads. We no longer use a list of abandoned segments but this is now done using bitmaps in arena's - which is more concurrent (and more aggressive). Abandoned memory can now also be reclaimed if a thread frees an object in - an abandoned page (which can be disabled using `mi_option_abandoned_reclaim_on_free`). The option `mi_option_max_segment_reclaim` - gives a maximum percentage of abandoned segments that can be reclaimed per try (=10%). - -* 2023-04-24, `v1.8.2`, `v2.1.2`: Fixes build issues on freeBSD, musl, and C17 (UE 5.1.1). Reduce code size/complexity - by removing regions and segment-cache's and only use arenas with improved memory purging -- this may improve memory - usage as well for larger services. Renamed options for consistency. Improved Valgrind and ASAN checking. - -* 2023-04-03, `v1.8.1`, `v2.1.1`: Fixes build issues on some platforms. - -* 2023-03-29, `v1.8.0`, `v2.1.0`: Improved support dynamic overriding on Windows 11. Improved tracing precision - with [asan](#asan) and [Valgrind](#valgrind), and added Windows event tracing [ETW](#ETW) (contributed by Xinglong He). Created an OS - abstraction layer to make it easier to port and separate platform dependent code (in `src/prim`). Fixed C++ STL compilation on older Microsoft C++ compilers, and various small bug fixes. - -* 2022-12-23, `v1.7.9`, `v2.0.9`: Supports building with [asan](#asan) and improved [Valgrind](#valgrind) support. - Support arbitrary large alignments (in particular for `std::pmr` pools). - Added C++ STL allocators attached to a specific heap (thanks @vmarkovtsev). - Heap walks now visit all object (including huge objects). Support Windows nano server containers (by Johannes Schindelin,@dscho). - Various small bug fixes. - -* 2022-11-03, `v1.7.7`, `v2.0.7`: Initial support for [Valgrind](#valgrind) for leak testing and heap block overflow - detection. Initial - support for attaching heaps to a specific memory area (only in v2). Fix `realloc` behavior for zero size blocks, remove restriction to integral multiple of the alignment in `alloc_align`, improved aligned allocation performance, reduced contention with many threads on few processors (thank you @dposluns!), vs2022 support, support `pkg-config`, . - -* 2022-04-14, `v1.7.6`, `v2.0.6`: fix fallback path for aligned OS allocation on Windows, improve Windows aligned allocation - even when compiling with older SDK's, fix dynamic overriding on macOS Monterey, fix MSVC C++ dynamic overriding, fix - warnings under Clang 14, improve performance if many OS threads are created and destroyed, fix statistics for large object - allocations, using MIMALLOC_VERBOSE=1 has no maximum on the number of error messages, various small fixes. - -* 2022-02-14, `v1.7.5`, `v2.0.5` (alpha): fix malloc override on - Windows 11, fix compilation with musl, potentially reduced - committed memory, add `bin/minject` for Windows, - improved wasm support, faster aligned allocation, - various small fixes. - * 2021-11-14, `v1.7.3`, `v2.0.3` (beta): improved WASM support, improved macOS support and performance (including M1), improved performance for v2 for large objects, Python integration improvements, more standard installation directories, various small fixes. diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index 3d3202eb..d0e691b3 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -115,7 +115,7 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_overalloc(mi_heap_t // now zero the block if needed if (alignment > MI_BLOCK_ALIGNMENT_MAX) { - // for the tracker, on huge aligned allocations only the memory from the start of the large block is defined + // for the tracker, on huge aligned allocations only from the start of the large block is defined mi_track_mem_undefined(aligned_p, size); if (zero) { _mi_memzero_aligned(aligned_p, mi_usable_size(aligned_p)); @@ -191,6 +191,9 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t const bool is_aligned = (((uintptr_t)page->free + offset) & align_mask)==0; if mi_likely(is_aligned) { + #if MI_STAT>1 + mi_heap_stat_increase(heap, malloc_requested, size); + #endif void* p = (zero ? _mi_page_malloc_zeroed(heap,page,padsize) : _mi_page_malloc(heap,page,padsize)); // call specific page malloc for better codegen mi_assert_internal(p != NULL); mi_assert_internal(((uintptr_t)p + offset) % alignment == 0); @@ -217,11 +220,6 @@ mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc_aligned(mi_heap_t* heap, return mi_heap_malloc_aligned_at(heap, size, alignment, 0); } -// ensure a definition is emitted -#if defined(__cplusplus) -void* _mi_extern_heap_malloc_aligned = (void*)&mi_heap_malloc_aligned; -#endif - // ------------------------------------------------------ // Aligned Allocation // ------------------------------------------------------ diff --git a/src/alloc-override.c b/src/alloc-override.c index 52ab69c5..b5109ded 100644 --- a/src/alloc-override.c +++ b/src/alloc-override.c @@ -71,20 +71,24 @@ typedef void* mi_nothrow_t; #define MI_INTERPOSE_FUN(oldfun,newfun) { (const void*)&newfun, (const void*)&oldfun } #define MI_INTERPOSE_MI(fun) MI_INTERPOSE_FUN(fun,mi_##fun) - #define MI_INTERPOSE_DECLS(name) __attribute__((used)) static struct mi_interpose_s name[] __attribute__((section("__DATA, __interpose"))) - - MI_INTERPOSE_DECLS(_mi_interposes) = + __attribute__((used)) static struct mi_interpose_s _mi_interposes[] __attribute__((section("__DATA, __interpose"))) = { MI_INTERPOSE_MI(malloc), MI_INTERPOSE_MI(calloc), MI_INTERPOSE_MI(realloc), MI_INTERPOSE_MI(strdup), + #if defined(MAC_OS_X_VERSION_10_7) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_7 + MI_INTERPOSE_MI(strndup), + #endif MI_INTERPOSE_MI(realpath), MI_INTERPOSE_MI(posix_memalign), MI_INTERPOSE_MI(reallocf), MI_INTERPOSE_MI(valloc), MI_INTERPOSE_FUN(malloc_size,mi_malloc_size_checked), MI_INTERPOSE_MI(malloc_good_size), + #if defined(MAC_OS_X_VERSION_10_15) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_15 + MI_INTERPOSE_MI(aligned_alloc), + #endif #ifdef MI_OSX_ZONE // we interpose malloc_default_zone in alloc-override-osx.c so we can use mi_free safely MI_INTERPOSE_MI(free), @@ -95,12 +99,6 @@ typedef void* mi_nothrow_t; MI_INTERPOSE_FUN(vfree,mi_cfree), #endif }; - MI_INTERPOSE_DECLS(_mi_interposes_10_7) __OSX_AVAILABLE(10.7) = { - MI_INTERPOSE_MI(strndup), - }; - MI_INTERPOSE_DECLS(_mi_interposes_10_15) __OSX_AVAILABLE(10.15) = { - MI_INTERPOSE_MI(aligned_alloc), - }; #ifdef __cplusplus extern "C" { diff --git a/src/alloc.c b/src/alloc.c index 0fed5e75..15867315 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -30,7 +30,6 @@ terms of the MIT license. A copy of the license can be found in the file // Note: in release mode the (inlined) routine is about 7 instructions with a single test. extern inline void* _mi_page_malloc_zero(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept { - mi_assert_internal(size >= MI_PADDING_SIZE); mi_assert_internal(page->block_size == 0 /* empty heap */ || mi_page_block_size(page) >= size); // check the free list @@ -83,13 +82,12 @@ extern inline void* _mi_page_malloc_zero(mi_heap_t* heap, mi_page_t* page, size_ #if (MI_STAT>0) const size_t bsize = mi_page_usable_block_size(page); - if (bsize <= MI_MEDIUM_OBJ_SIZE_MAX) { + if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { mi_heap_stat_increase(heap, malloc_normal, bsize); mi_heap_stat_counter_increase(heap, malloc_normal_count, 1); #if (MI_STAT>1) const size_t bin = _mi_bin(bsize); mi_heap_stat_increase(heap, malloc_bins[bin], 1); - mi_heap_stat_increase(heap, malloc_requested, size - MI_PADDING_SIZE); #endif } #endif @@ -148,6 +146,12 @@ static inline mi_decl_restrict void* mi_heap_malloc_small_zero(mi_heap_t* heap, void* const p = _mi_page_malloc_zero(heap, page, size + MI_PADDING_SIZE, zero); mi_track_malloc(p,size,zero); + #if MI_STAT>1 + if (p != NULL) { + if (!mi_heap_is_initialized(heap)) { heap = mi_prim_get_default_heap(); } + mi_heap_stat_increase(heap, malloc_requested, mi_usable_size(p)); + } + #endif #if MI_DEBUG>3 if (p != NULL && zero) { mi_assert_expensive(mi_mem_is_zero(p, size)); @@ -184,6 +188,12 @@ extern inline void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool z void* const p = _mi_malloc_generic(heap, size + MI_PADDING_SIZE, zero, huge_alignment); // note: size can overflow but it is detected in malloc_generic mi_track_malloc(p,size,zero); + #if MI_STAT>1 + if (p != NULL) { + if (!mi_heap_is_initialized(heap)) { heap = mi_prim_get_default_heap(); } + mi_heap_stat_increase(heap, malloc_requested, mi_usable_size(p)); + } + #endif #if MI_DEBUG>3 if (p != NULL && zero) { mi_assert_expensive(mi_mem_is_zero(p, size)); @@ -630,7 +640,7 @@ static void* mi_block_ptr_set_guarded(mi_block_t* block, size_t obj_size) { // give up to place it right in front of the guard page if the offset is too large for unalignment offset = MI_BLOCK_ALIGNMENT_MAX; } - void* p = (uint8_t*)block + offset; + void* p = (uint8_t*)block + offset; mi_track_align(block, p, offset, obj_size); mi_track_mem_defined(block, sizeof(mi_block_t)); return p; @@ -652,12 +662,11 @@ mi_decl_restrict void* _mi_heap_malloc_guarded(mi_heap_t* heap, size_t size, boo void* const p = mi_block_ptr_set_guarded(block, obj_size); // stats - mi_track_malloc(p, size, zero); + mi_track_malloc(p, size, zero); if (p != NULL) { if (!mi_heap_is_initialized(heap)) { heap = mi_prim_get_default_heap(); } #if MI_STAT>1 - mi_heap_stat_adjust_decrease(heap, malloc_requested, req_size); - mi_heap_stat_increase(heap, malloc_requested, size); + mi_heap_stat_increase(heap, malloc_requested, mi_usable_size(p)); #endif _mi_stat_counter_increase(&heap->tld->stats.malloc_guarded_count, 1); } @@ -685,7 +694,7 @@ void* _mi_externs[] = { (void*)&mi_zalloc_small, (void*)&mi_heap_malloc, (void*)&mi_heap_zalloc, - (void*)&mi_heap_malloc_small, + (void*)&mi_heap_malloc_small // (void*)&mi_heap_alloc_new, // (void*)&mi_heap_alloc_new_n }; diff --git a/src/arena.c b/src/arena.c index e97ca885..9d40a271 100644 --- a/src/arena.c +++ b/src/arena.c @@ -44,7 +44,7 @@ typedef struct mi_arena_s { mi_lock_t abandoned_visit_lock; // lock is only used when abandoned segments are being visited _Atomic(size_t) search_idx; // optimization to start the search for free blocks _Atomic(mi_msecs_t) purge_expire; // expiration time when blocks should be purged from `blocks_purge`. - + mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero? mi_bitmap_field_t* blocks_committed; // are the blocks committed? (can be NULL for memory that cannot be decommitted) mi_bitmap_field_t* blocks_purge; // blocks that can be (reset) decommitted. (can be NULL for memory that cannot be (reset) decommitted) @@ -99,10 +99,6 @@ bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_id_t request_arena_i } } -bool _mi_arena_memid_is_os_allocated(mi_memid_t memid) { - return (memid.memkind == MI_MEM_OS); -} - size_t mi_arena_get_count(void) { return mi_atomic_load_relaxed(&mi_arena_count); } @@ -192,9 +188,14 @@ void* _mi_arena_meta_zalloc(size_t size, mi_memid_t* memid) { if (p != NULL) return p; // or fall back to the OS - p = _mi_os_zalloc(size, memid); + p = _mi_os_alloc(size, memid); if (p == NULL) return NULL; + // zero the OS memory if needed + if (!memid->initially_zero) { + _mi_memzero_aligned(p, size); + memid->initially_zero = true; + } return p; } @@ -254,7 +255,7 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t ar // set the dirty bits (todo: no need for an atomic op here?) if (arena->memid.initially_zero && arena->blocks_dirty != NULL) { - memid->initially_zero = _mi_bitmap_claim_across(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL, NULL); + memid->initially_zero = _mi_bitmap_claim_across(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL); } // set commit state @@ -265,36 +266,21 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t ar else if (commit) { // commit requested, but the range may not be committed as a whole: ensure it is committed now memid->initially_committed = true; - const size_t commit_size = mi_arena_block_size(needed_bcount); bool any_uncommitted; - size_t already_committed = 0; - _mi_bitmap_claim_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted, &already_committed); + _mi_bitmap_claim_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted); if (any_uncommitted) { - mi_assert_internal(already_committed < needed_bcount); - const size_t stat_commit_size = commit_size - mi_arena_block_size(already_committed); bool commit_zero = false; - if (!_mi_os_commit_ex(p, commit_size, &commit_zero, stat_commit_size)) { + if (!_mi_os_commit(p, mi_arena_block_size(needed_bcount), &commit_zero)) { memid->initially_committed = false; } else { if (commit_zero) { memid->initially_zero = true; } } } - else { - // all are already committed: signal that we are reusing memory in case it was purged before - _mi_os_reuse( p, commit_size ); - } } else { // no need to commit, but check if already fully committed - size_t already_committed = 0; - memid->initially_committed = _mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &already_committed); - if (!memid->initially_committed && already_committed > 0) { - // partially committed: as it will be committed at some time, adjust the stats and pretend the range is fully uncommitted. - mi_assert_internal(already_committed < needed_bcount); - _mi_stat_decrease(&_mi_stats_main.committed, mi_arena_block_size(already_committed)); - _mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index); - } + memid->initially_committed = _mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index); } return p; @@ -368,7 +354,7 @@ static mi_decl_noinline void* mi_arena_try_alloc(int numa_node, size_t size, siz static bool mi_arena_reserve(size_t req_size, bool allow_large, mi_arena_id_t *arena_id) { if (_mi_preloading()) return false; // use OS only while pre loading - + const size_t arena_count = mi_atomic_load_acquire(&mi_arena_count); if (arena_count > (MI_MAX_ARENAS - 4)) return false; @@ -410,7 +396,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset // try to allocate in an arena if the alignment is small enough and the object is not too small (as for heap meta data) if (!mi_option_is_enabled(mi_option_disallow_arena_alloc)) { // is arena allocation allowed? - if (size >= MI_ARENA_MIN_OBJ_SIZE && alignment <= MI_SEGMENT_ALIGN && align_offset == 0) + if (size >= MI_ARENA_MIN_OBJ_SIZE && alignment <= MI_SEGMENT_ALIGN && align_offset == 0) { void* p = mi_arena_try_alloc(numa_node, size, alignment, commit, allow_large, req_arena_id, memid); if (p != NULL) return p; @@ -478,19 +464,17 @@ static void mi_arena_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks) const size_t size = mi_arena_block_size(blocks); void* const p = mi_arena_block_start(arena, bitmap_idx); bool needs_recommit; - size_t already_committed = 0; - if (_mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx, &already_committed)) { + if (_mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx)) { // all blocks are committed, we can purge freely - mi_assert_internal(already_committed == blocks); needs_recommit = _mi_os_purge(p, size); } else { // some blocks are not committed -- this can happen when a partially committed block is freed // in `_mi_arena_free` and it is conservatively marked as uncommitted but still scheduled for a purge - // we need to ensure we do not try to reset (as that may be invalid for uncommitted memory). - mi_assert_internal(already_committed < blocks); + // we need to ensure we do not try to reset (as that may be invalid for uncommitted memory), + // and also undo the decommit stats (as it was already adjusted) mi_assert_internal(mi_option_is_enabled(mi_option_purge_decommits)); - needs_recommit = _mi_os_purge_ex(p, size, false /* allow reset? */, mi_arena_block_size(already_committed)); + needs_recommit = _mi_os_purge_ex(p, size, false /* allow reset? */, 0); } // clear the purged blocks @@ -524,7 +508,7 @@ static void mi_arena_schedule_purge(mi_arena_t* arena, size_t bitmap_idx, size_t else { // already an expiration was set } - _mi_bitmap_claim_across(arena->blocks_purge, arena->field_count, blocks, bitmap_idx, NULL, NULL); + _mi_bitmap_claim_across(arena->blocks_purge, arena->field_count, blocks, bitmap_idx, NULL); } } @@ -559,7 +543,7 @@ static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force) { // check pre-conditions if (arena->memid.is_pinned) return false; - + // expired yet? mi_msecs_t expire = mi_atomic_loadi64_relaxed(&arena->purge_expire); if (!force && (expire == 0 || expire > now)) return false; @@ -614,7 +598,7 @@ static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force) return any_purged; } -static void mi_arenas_try_purge( bool force, bool visit_all ) +static void mi_arenas_try_purge( bool force, bool visit_all ) { if (_mi_preloading() || mi_arena_purge_delay() <= 0) return; // nothing will be scheduled @@ -631,7 +615,7 @@ static void mi_arenas_try_purge( bool force, bool visit_all ) mi_atomic_guard(&purge_guard) { // increase global expire: at most one purge per delay cycle - mi_atomic_storei64_release(&mi_arenas_purge_expire, now + mi_arena_purge_delay()); + mi_atomic_storei64_release(&mi_arenas_purge_expire, now + mi_arena_purge_delay()); size_t max_purge_count = (visit_all ? max_arena : 2); bool all_visited = true; for (size_t i = 0; i < max_arena; i++) { @@ -664,16 +648,15 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi if (p==NULL) return; if (size==0) return; const bool all_committed = (committed_size == size); - const size_t decommitted_size = (committed_size <= size ? size - committed_size : 0); // need to set all memory to undefined as some parts may still be marked as no_access (like padding etc.) mi_track_mem_undefined(p,size); if (mi_memkind_is_os(memid.memkind)) { // was a direct OS allocation, pass through - if (!all_committed && decommitted_size > 0) { - // if partially committed, adjust the committed stats (as `_mi_os_free` will decrease commit by the full size) - _mi_stat_increase(&_mi_stats_main.committed, decommitted_size); + if (!all_committed && committed_size > 0) { + // if partially committed, adjust the committed stats (as `_mi_os_free` will increase decommit by the full size) + _mi_stat_decrease(&_mi_stats_main.committed, committed_size); } _mi_os_free(p, size, memid); } @@ -707,14 +690,14 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi mi_assert_internal(arena->blocks_purge != NULL); if (!all_committed) { - // mark the entire range as no longer committed (so we will recommit the full range when re-using) + // mark the entire range as no longer committed (so we recommit the full range when re-using) _mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx); mi_track_mem_noaccess(p,size); - //if (committed_size > 0) { + if (committed_size > 0) { // if partially committed, adjust the committed stats (is it will be recommitted when re-using) - // in the delayed purge, we do no longer decrease the commit if the range is not marked entirely as committed. + // in the delayed purge, we now need to not count a decommit if the range is not marked as committed. _mi_stat_decrease(&_mi_stats_main.committed, committed_size); - //} + } // note: if not all committed, it may be that the purge will reset/decommit the entire range // that contains already decommitted parts. Since purge consistently uses reset or decommit that // works (as we should never reset decommitted parts). @@ -950,7 +933,7 @@ void mi_debug_show_arenas(void) mi_attr_noexcept { for (size_t i = 0; i < max_arenas; i++) { mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); if (arena == NULL) break; - _mi_message("arena %zu: %zu blocks of size %zuMiB (in %zu fields) %s\n", i, arena->block_count, (size_t)(MI_ARENA_BLOCK_SIZE / MI_MiB), arena->field_count, (arena->memid.is_pinned ? ", pinned" : "")); + _mi_message("arena %zu: %zu blocks of size %zuMiB (in %zu fields) %s\n", i, arena->block_count, MI_ARENA_BLOCK_SIZE / MI_MiB, arena->field_count, (arena->memid.is_pinned ? ", pinned" : "")); if (show_inuse) { inuse_total += mi_debug_show_bitmap(" ", "inuse blocks", arena->block_count, arena->blocks_inuse, arena->field_count); } @@ -1010,17 +993,17 @@ int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t if (pages == 0) return 0; // pages per numa node - int numa_count = (numa_nodes > 0 && numa_nodes <= INT_MAX ? (int)numa_nodes : _mi_os_numa_node_count()); - if (numa_count == 0) numa_count = 1; + size_t numa_count = (numa_nodes > 0 ? numa_nodes : _mi_os_numa_node_count()); + if (numa_count <= 0) numa_count = 1; const size_t pages_per = pages / numa_count; const size_t pages_mod = pages % numa_count; const size_t timeout_per = (timeout_msecs==0 ? 0 : (timeout_msecs / numa_count) + 50); // reserve evenly among numa nodes - for (int numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) { + for (size_t numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) { size_t node_pages = pages_per; // can be 0 - if ((size_t)numa_node < pages_mod) node_pages++; - int err = mi_reserve_huge_os_pages_at(node_pages, numa_node, timeout_per); + if (numa_node < pages_mod) node_pages++; + int err = mi_reserve_huge_os_pages_at(node_pages, (int)numa_node, timeout_per); if (err) return err; if (pages < node_pages) { pages = 0; diff --git a/src/bitmap.c b/src/bitmap.c index 32d1e954..9ef784d6 100644 --- a/src/bitmap.c +++ b/src/bitmap.c @@ -34,17 +34,17 @@ static inline size_t mi_bitmap_mask_(size_t count, size_t bitidx) { } + /* ----------------------------------------------------------- Claim a bit sequence atomically ----------------------------------------------------------- */ // Try to atomically claim a sequence of `count` bits in a single // field at `idx` in `bitmap`. Returns `true` on success. -inline bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx) +bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx) { mi_assert_internal(bitmap_idx != NULL); mi_assert_internal(count <= MI_BITMAP_FIELD_BITS); - mi_assert_internal(count > 0); mi_bitmap_field_t* field = &bitmap[idx]; size_t map = mi_atomic_load_relaxed(field); if (map==MI_BITMAP_FIELD_FULL) return false; // short cut @@ -94,9 +94,9 @@ inline bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, cons return false; } -// Find `count` bits of 0 and set them to 1 atomically; returns `true` on success. + // Starts at idx, and wraps around to search in all `bitmap_fields` fields. -// `count` can be at most MI_BITMAP_FIELD_BITS and will never cross fields. +// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never cross fields. bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx) { size_t idx = start_field_idx; for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) { @@ -108,24 +108,6 @@ bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fiel return false; } -// Like _mi_bitmap_try_find_from_claim but with an extra predicate that must be fullfilled -bool _mi_bitmap_try_find_from_claim_pred(mi_bitmap_t bitmap, const size_t bitmap_fields, - const size_t start_field_idx, const size_t count, - mi_bitmap_pred_fun_t pred_fun, void* pred_arg, - mi_bitmap_index_t* bitmap_idx) { - size_t idx = start_field_idx; - for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) { - if (idx >= bitmap_fields) idx = 0; // wrap - if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) { - if (pred_fun == NULL || pred_fun(*bitmap_idx, pred_arg)) { - return true; - } - // predicate returned false, unclaim and look further - _mi_bitmap_unclaim(bitmap, bitmap_fields, count, *bitmap_idx); - } - } - return false; -} // Set `count` bits at `bitmap_idx` to 0 atomically // Returns `true` if all `count` bits were 1 previously. @@ -246,7 +228,7 @@ static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bit // intermediate fields while (++field < final_field) { - newmap = MI_BITMAP_FIELD_FULL; + newmap = mi_bitmap_mask_(MI_BITMAP_FIELD_BITS, 0); map = 0; if (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)) { goto rollback; } } @@ -268,7 +250,7 @@ rollback: // (we just failed to claim `field` so decrement first) while (--field > initial_field) { newmap = 0; - map = MI_BITMAP_FIELD_FULL; + map = mi_bitmap_mask_(MI_BITMAP_FIELD_BITS, 0); mi_assert_internal(mi_atomic_load_relaxed(field) == map); mi_atomic_store_release(field, newmap); } @@ -369,7 +351,7 @@ bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t // Set `count` bits at `bitmap_idx` to 1 atomically // Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit. -bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero, size_t* already_set) { +bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero) { size_t idx = mi_bitmap_index_field(bitmap_idx); size_t pre_mask; size_t mid_mask; @@ -377,31 +359,28 @@ bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t co size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask); bool all_zero = true; bool any_zero = false; - size_t one_count = 0; _Atomic(size_t)*field = &bitmap[idx]; size_t prev = mi_atomic_or_acq_rel(field++, pre_mask); - if ((prev & pre_mask) != 0) { all_zero = false; one_count += mi_popcount(prev & pre_mask); } + if ((prev & pre_mask) != 0) all_zero = false; if ((prev & pre_mask) != pre_mask) any_zero = true; while (mid_count-- > 0) { prev = mi_atomic_or_acq_rel(field++, mid_mask); - if ((prev & mid_mask) != 0) { all_zero = false; one_count += mi_popcount(prev & mid_mask); } + if ((prev & mid_mask) != 0) all_zero = false; if ((prev & mid_mask) != mid_mask) any_zero = true; } if (post_mask!=0) { prev = mi_atomic_or_acq_rel(field, post_mask); - if ((prev & post_mask) != 0) { all_zero = false; one_count += mi_popcount(prev & post_mask); } + if ((prev & post_mask) != 0) all_zero = false; if ((prev & post_mask) != post_mask) any_zero = true; } if (pany_zero != NULL) { *pany_zero = any_zero; } - if (already_set != NULL) { *already_set = one_count; }; - mi_assert_internal(all_zero ? one_count == 0 : one_count <= count); return all_zero; } // Returns `true` if all `count` bits were 1. // `any_ones` is `true` if there was at least one bit set to one. -static bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_ones, size_t* already_set) { +static bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_ones) { size_t idx = mi_bitmap_index_field(bitmap_idx); size_t pre_mask; size_t mid_mask; @@ -409,33 +388,30 @@ static bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_field size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask); bool all_ones = true; bool any_ones = false; - size_t one_count = 0; mi_bitmap_field_t* field = &bitmap[idx]; size_t prev = mi_atomic_load_relaxed(field++); if ((prev & pre_mask) != pre_mask) all_ones = false; - if ((prev & pre_mask) != 0) { any_ones = true; one_count += mi_popcount(prev & pre_mask); } + if ((prev & pre_mask) != 0) any_ones = true; while (mid_count-- > 0) { prev = mi_atomic_load_relaxed(field++); if ((prev & mid_mask) != mid_mask) all_ones = false; - if ((prev & mid_mask) != 0) { any_ones = true; one_count += mi_popcount(prev & mid_mask); } + if ((prev & mid_mask) != 0) any_ones = true; } if (post_mask!=0) { prev = mi_atomic_load_relaxed(field); if ((prev & post_mask) != post_mask) all_ones = false; - if ((prev & post_mask) != 0) { any_ones = true; one_count += mi_popcount(prev & post_mask); } + if ((prev & post_mask) != 0) any_ones = true; } if (pany_ones != NULL) { *pany_ones = any_ones; } - if (already_set != NULL) { *already_set = one_count; } - mi_assert_internal(all_ones ? one_count == count : one_count < count); return all_ones; } -bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, size_t* already_set) { - return mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, NULL, already_set); +bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { + return mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, NULL); } bool _mi_bitmap_is_any_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { bool any_ones; - mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, &any_ones, NULL); + mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, &any_ones); return any_ones; } diff --git a/src/bitmap.h b/src/bitmap.h index 0f4744f4..d60668cb 100644 --- a/src/bitmap.h +++ b/src/bitmap.h @@ -44,11 +44,6 @@ static inline mi_bitmap_index_t mi_bitmap_index_create(size_t idx, size_t bitidx return mi_bitmap_index_create_ex(idx,bitidx); } -// Create a bit index. -static inline mi_bitmap_index_t mi_bitmap_index_create_from_bit(size_t full_bitidx) { - return mi_bitmap_index_create(full_bitidx / MI_BITMAP_FIELD_BITS, full_bitidx % MI_BITMAP_FIELD_BITS); -} - // Get the field index from a bit index. static inline size_t mi_bitmap_index_field(mi_bitmap_index_t bitmap_idx) { return (bitmap_idx / MI_BITMAP_FIELD_BITS); @@ -76,10 +71,6 @@ bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_ // For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never cross fields. bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx); -// Like _mi_bitmap_try_find_from_claim but with an extra predicate that must be fullfilled -typedef bool (mi_cdecl *mi_bitmap_pred_fun_t)(mi_bitmap_index_t bitmap_idx, void* pred_arg); -bool _mi_bitmap_try_find_from_claim_pred(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_pred_fun_t pred_fun, void* pred_arg, mi_bitmap_index_t* bitmap_idx); - // Set `count` bits at `bitmap_idx` to 0 atomically // Returns `true` if all `count` bits were 1 previously. bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); @@ -111,9 +102,9 @@ bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t // Set `count` bits at `bitmap_idx` to 1 atomically // Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit. -bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero, size_t* already_set); +bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero); -bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, size_t* already_set); +bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); bool _mi_bitmap_is_any_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); #endif diff --git a/src/free.c b/src/free.c index 5e5ae443..a1732e8c 100644 --- a/src/free.c +++ b/src/free.c @@ -35,9 +35,7 @@ static inline void mi_free_block_local(mi_page_t* page, mi_block_t* block, bool mi_check_padding(page, block); if (track_stats) { mi_stat_free(page, block); } #if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN && !MI_GUARDED - if (!mi_page_is_huge(page)) { // huge page content may be already decommitted - memset(block, MI_DEBUG_FREED, mi_page_block_size(page)); - } + memset(block, MI_DEBUG_FREED, mi_page_block_size(page)); #endif if (track_stats) { mi_track_free_size(block, mi_page_usable_size_of(page, block)); } // faster then mi_usable_size as we already know the page and that p is unaligned @@ -123,16 +121,10 @@ static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* ms #if (MI_DEBUG>0) if mi_unlikely(!mi_is_in_heap_region(p)) { - #if (MI_INTPTR_SIZE == 8 && defined(__linux__)) - if (((uintptr_t)p >> 40) != 0x7F) { // linux tends to align large blocks above 0x7F000000000 (issue #640) - #else - { - #endif - _mi_warning_message("%s: pointer might not point to a valid heap region: %p\n" - "(this may still be a valid very large allocation (over 64MiB))\n", msg, p); - if mi_likely(_mi_ptr_cookie(segment) == segment->cookie) { - _mi_warning_message("(yes, the previous pointer %p was valid after all)\n", p); - } + _mi_warning_message("%s: pointer might not point to a valid heap region: %p\n" + "(this may still be a valid very large allocation (over 64MiB))\n", msg, p); + if mi_likely(_mi_ptr_cookie(segment) == segment->cookie) { + _mi_warning_message("(yes, the previous pointer %p was valid after all)\n", p); } } #endif @@ -280,7 +272,7 @@ static void mi_decl_noinline mi_free_block_mt(mi_page_t* page, mi_segment_t* seg // for small size, ensure we can fit the delayed thread pointers without triggering overflow detection _mi_padding_shrink(page, block, sizeof(mi_block_t)); - if (segment->kind == MI_SEGMENT_HUGE) { + if (segment->page_kind == MI_PAGE_HUGE) { #if MI_HUGE_PAGE_ABANDON // huge page segments are always abandoned and can be freed immediately _mi_segment_huge_page_free(segment, page, block); @@ -348,10 +340,7 @@ mi_decl_nodiscard size_t mi_usable_size(const void* p) mi_attr_noexcept { void mi_free_size(void* p, size_t size) mi_attr_noexcept { MI_UNUSED_RELEASE(size); - #if MI_DEBUG - const size_t available = _mi_usable_size(p,"mi_free_size"); - mi_assert(p == NULL || size <= available || available == 0 /* invalid pointer */ ); - #endif + mi_assert(p == NULL || size <= _mi_usable_size(p,"mi_free_size")); mi_free(p); } @@ -525,24 +514,24 @@ static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) { // only maintain stats for smaller objects if requested #if (MI_STAT>0) static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) { +#if (MI_STAT < 2) MI_UNUSED(block); +#endif mi_heap_t* const heap = mi_heap_get_default(); const size_t bsize = mi_page_usable_block_size(page); - // #if (MI_STAT>1) - // const size_t usize = mi_page_usable_size_of(page, block); - // mi_heap_stat_decrease(heap, malloc_requested, usize); - // #endif - if (bsize <= MI_MEDIUM_OBJ_SIZE_MAX) { +#if (MI_STAT>1) + const size_t usize = mi_page_usable_size_of(page, block); + mi_heap_stat_decrease(heap, malloc_requested, usize); +#endif + if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { mi_heap_stat_decrease(heap, malloc_normal, bsize); - #if (MI_STAT > 1) +#if (MI_STAT > 1) mi_heap_stat_decrease(heap, malloc_bins[_mi_bin(bsize)], 1); - #endif +#endif } - //else if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { - // mi_heap_stat_decrease(heap, malloc_large, bsize); - //} else { - mi_heap_stat_decrease(heap, malloc_huge, bsize); + const size_t bpsize = mi_page_block_size(page); // match stat in page.c:mi_huge_page_alloc + mi_heap_stat_decrease(heap, malloc_huge, bpsize); } } #else diff --git a/src/heap.c b/src/heap.c index f96e60d0..7c235a7b 100644 --- a/src/heap.c +++ b/src/heap.c @@ -95,11 +95,6 @@ static bool mi_heap_page_collect(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t mi_assert_internal(mi_heap_page_is_valid(heap, pq, page, NULL, NULL)); mi_collect_t collect = *((mi_collect_t*)arg_collect); _mi_page_free_collect(page, collect >= MI_FORCE); - if (collect == MI_FORCE) { - // note: call before a potential `_mi_page_free` as the segment may be freed if this was the last used page in that segment. - mi_segment_t* segment = _mi_page_segment(page); - _mi_segment_collect(segment, true /* force? */); - } if (mi_page_all_free(page)) { // no more used blocks, free the page. // note: this will free retired pages as well. @@ -132,15 +127,14 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) const bool is_main_thread = (_mi_is_main_thread() && heap->thread_id == _mi_thread_id()); // note: never reclaim on collect but leave it to threads that need storage to reclaim - const bool force_main = - #ifdef NDEBUG + if ( + #ifdef NDEBUG collect == MI_FORCE - #else + #else collect >= MI_FORCE - #endif - && is_main_thread && mi_heap_is_backing(heap) && !heap->no_reclaim; - - if (force_main) { + #endif + && is_main_thread && mi_heap_is_backing(heap) && !heap->no_reclaim) + { // the main thread is abandoned (end-of-program), try to reclaim all abandoned segments. // if all memory is freed by now, all segments should be freed. // note: this only collects in the current subprocess @@ -163,9 +157,8 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL); mi_assert_internal( collect != MI_ABANDON || mi_atomic_load_ptr_acquire(mi_block_t,&heap->thread_delayed_free) == NULL ); - // collect abandoned segments (in particular, purge expired parts of segments in the abandoned segment list) - // note: forced purge can be quite expensive if many threads are created/destroyed so we do not force on abandonment - _mi_abandoned_collect(heap, collect == MI_FORCE /* force? */, &heap->tld->segments); + // collect segments (purge pages, this can be expensive so don't force on abandonment) + _mi_segments_collect(collect == MI_FORCE, &heap->tld->segments); // if forced, collect thread data cache on program-exit (or shared library unload) if (force && is_main_thread && mi_heap_is_backing(heap)) { @@ -176,7 +169,9 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) _mi_arenas_collect(collect == MI_FORCE /* force purge? */); // merge statistics - if (collect <= MI_FORCE) { _mi_stats_merge_thread(heap->tld); } + if (collect <= MI_FORCE) { + mi_stats_merge(); + } } void _mi_heap_collect_abandon(mi_heap_t* heap) { @@ -333,26 +328,20 @@ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_ // stats const size_t bsize = mi_page_block_size(page); - if (bsize > MI_MEDIUM_OBJ_SIZE_MAX) { - //if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { - // mi_heap_stat_decrease(heap, malloc_large, bsize); - //} - //else - { - mi_heap_stat_decrease(heap, malloc_huge, bsize); - } + if (bsize > MI_LARGE_OBJ_SIZE_MAX) { + mi_heap_stat_decrease(heap, malloc_huge, bsize); } - #if (MI_STAT>0) +#if (MI_STAT) _mi_page_free_collect(page, false); // update used count const size_t inuse = page->used; if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { mi_heap_stat_decrease(heap, malloc_normal, bsize * inuse); - #if (MI_STAT>1) +#if (MI_STAT>1) mi_heap_stat_decrease(heap, malloc_bins[_mi_bin(bsize)], inuse); - #endif +#endif } - // mi_heap_stat_decrease(heap, malloc_requested, bsize * inuse); // todo: off for aligned blocks... - #endif + mi_heap_stat_decrease(heap, malloc_requested, bsize * inuse); // todo: off for aligned blocks... +#endif /// pretend it is all free now mi_assert_internal(mi_page_thread_free(page) == NULL); diff --git a/src/init.c b/src/init.c index 3fc8b033..215eed20 100644 --- a/src/init.c +++ b/src/init.c @@ -34,12 +34,13 @@ const mi_page_t _mi_page_empty = { MI_ATOMIC_VAR_INIT(0), // xthread_free MI_ATOMIC_VAR_INIT(0), // xheap NULL, NULL - , { 0 } // padding + #if MI_INTPTR_SIZE==4 + , { NULL } + #endif }; #define MI_PAGE_EMPTY() ((mi_page_t*)&_mi_page_empty) -#if (MI_SMALL_WSIZE_MAX==128) #if (MI_PADDING>0) && (MI_INTPTR_SIZE >= 8) #define MI_SMALL_PAGES_EMPTY { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY(), MI_PAGE_EMPTY() } #elif (MI_PADDING>0) @@ -47,9 +48,7 @@ const mi_page_t _mi_page_empty = { #else #define MI_SMALL_PAGES_EMPTY { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY() } #endif -#else -#error "define right initialization sizes corresponding to MI_SMALL_WSIZE_MAX" -#endif + // Empty page queues for every bin #define QNULL(sz) { NULL, NULL, (sz)*sizeof(uintptr_t) } @@ -64,8 +63,8 @@ const mi_page_t _mi_page_empty = { QNULL( 10240), QNULL( 12288), QNULL( 14336), QNULL( 16384), QNULL( 20480), QNULL( 24576), QNULL( 28672), QNULL( 32768), /* 56 */ \ QNULL( 40960), QNULL( 49152), QNULL( 57344), QNULL( 65536), QNULL( 81920), QNULL( 98304), QNULL(114688), QNULL(131072), /* 64 */ \ QNULL(163840), QNULL(196608), QNULL(229376), QNULL(262144), QNULL(327680), QNULL(393216), QNULL(458752), QNULL(524288), /* 72 */ \ - QNULL(MI_MEDIUM_OBJ_WSIZE_MAX + 1 /* 655360, Huge queue */), \ - QNULL(MI_MEDIUM_OBJ_WSIZE_MAX + 2) /* Full queue */ } + QNULL(MI_LARGE_OBJ_WSIZE_MAX + 1 /* 655360, Huge queue */), \ + QNULL(MI_LARGE_OBJ_WSIZE_MAX + 2) /* Full queue */ } #define MI_STAT_COUNT_NULL() {0,0,0} @@ -87,18 +86,6 @@ const mi_page_t _mi_page_empty = { { MI_INIT74(MI_STAT_COUNT_NULL) }, \ { MI_INIT74(MI_STAT_COUNT_NULL) } - -// Empty slice span queues for every bin -#define SQNULL(sz) { NULL, NULL, sz } -#define MI_SEGMENT_SPAN_QUEUES_EMPTY \ - { SQNULL(1), \ - SQNULL( 1), SQNULL( 2), SQNULL( 3), SQNULL( 4), SQNULL( 5), SQNULL( 6), SQNULL( 7), SQNULL( 10), /* 8 */ \ - SQNULL( 12), SQNULL( 14), SQNULL( 16), SQNULL( 20), SQNULL( 24), SQNULL( 28), SQNULL( 32), SQNULL( 40), /* 16 */ \ - SQNULL( 48), SQNULL( 56), SQNULL( 64), SQNULL( 80), SQNULL( 96), SQNULL( 112), SQNULL( 128), SQNULL( 160), /* 24 */ \ - SQNULL( 192), SQNULL( 224), SQNULL( 256), SQNULL( 320), SQNULL( 384), SQNULL( 448), SQNULL( 512), SQNULL( 640), /* 32 */ \ - SQNULL( 768), SQNULL( 896), SQNULL( 1024) /* 35 */ } - - // -------------------------------------------------------- // Statically allocate an empty heap as the initial // thread local value for the default heap, @@ -108,7 +95,7 @@ const mi_page_t _mi_page_empty = { // may lead to allocation itself on some platforms) // -------------------------------------------------------- -mi_decl_cache_align const mi_heap_t _mi_heap_empty = { +mi_decl_hidden mi_decl_cache_align const mi_heap_t _mi_heap_empty = { NULL, MI_ATOMIC_VAR_INIT(NULL), 0, // tid @@ -123,23 +110,12 @@ mi_decl_cache_align const mi_heap_t _mi_heap_empty = { false, // can reclaim 0, // tag #if MI_GUARDED - 0, 0, 0, 1, // count is 1 so we never write to it (see `internal.h:mi_heap_malloc_use_guarded`) + 0, 0, 0, 0, 1, // count is 1 so we never write to it (see `internal.h:mi_heap_malloc_use_guarded`) #endif MI_SMALL_PAGES_EMPTY, MI_PAGE_QUEUES_EMPTY }; -static mi_decl_cache_align mi_subproc_t mi_subproc_default; - -#define tld_empty_stats ((mi_stats_t*)((uint8_t*)&tld_empty + offsetof(mi_tld_t,stats))) - -mi_decl_cache_align static const mi_tld_t tld_empty = { - 0, - false, - NULL, NULL, - { MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, 0, &mi_subproc_default, tld_empty_stats }, // segments - { MI_STAT_VERSION, MI_STATS_NULL } // stats -}; mi_threadid_t _mi_thread_id(void) mi_attr_noexcept { return _mi_prim_thread_id(); @@ -150,10 +126,15 @@ mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty; extern mi_decl_hidden mi_heap_t _mi_heap_main; +static mi_decl_cache_align mi_subproc_t mi_subproc_default; + static mi_decl_cache_align mi_tld_t tld_main = { 0, false, - &_mi_heap_main, & _mi_heap_main, - { MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, 0, &mi_subproc_default, &tld_main.stats }, // segments + &_mi_heap_main, &_mi_heap_main, + { { NULL, NULL }, {NULL ,NULL}, {NULL ,NULL, 0}, + 0, 0, 0, 0, 0, &mi_subproc_default, + &tld_main.stats + }, // segments { MI_STAT_VERSION, MI_STATS_NULL } // stats }; @@ -172,7 +153,7 @@ mi_decl_cache_align mi_heap_t _mi_heap_main = { false, // can reclaim 0, // tag #if MI_GUARDED - 0, 0, 0, 0, + 0, 0, 0, 0, 0, #endif MI_SMALL_PAGES_EMPTY, MI_PAGE_QUEUES_EMPTY @@ -184,14 +165,15 @@ mi_stats_t _mi_stats_main = { MI_STAT_VERSION, MI_STATS_NULL }; #if MI_GUARDED mi_decl_export void mi_heap_guarded_set_sample_rate(mi_heap_t* heap, size_t sample_rate, size_t seed) { - heap->guarded_sample_rate = sample_rate; - heap->guarded_sample_count = sample_rate; // count down samples - if (heap->guarded_sample_rate > 1) { - if (seed == 0) { - seed = _mi_heap_random_next(heap); - } - heap->guarded_sample_count = (seed % heap->guarded_sample_rate) + 1; // start at random count between 1 and `sample_rate` + heap->guarded_sample_seed = seed; + if (heap->guarded_sample_seed == 0) { + heap->guarded_sample_seed = _mi_heap_random_next(heap); } + heap->guarded_sample_rate = sample_rate; + if (heap->guarded_sample_rate >= 1) { + heap->guarded_sample_seed = heap->guarded_sample_seed % heap->guarded_sample_rate; + } + heap->guarded_sample_count = heap->guarded_sample_seed; // count down samples } mi_decl_export void mi_heap_guarded_set_size_bound(mi_heap_t* heap, size_t min, size_t max) { @@ -244,6 +226,7 @@ mi_heap_t* _mi_heap_main_get(void) { return &_mi_heap_main; } + /* ----------------------------------------------------------- Sub process ----------------------------------------------------------- */ @@ -317,6 +300,7 @@ static _Atomic(mi_thread_data_t*) td_cache[TD_CACHE_SIZE]; static mi_thread_data_t* mi_thread_data_zalloc(void) { // try to find thread metadata in the cache + bool is_zero = false; mi_thread_data_t* td = NULL; for (int i = 0; i < TD_CACHE_SIZE; i++) { td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]); @@ -324,25 +308,32 @@ static mi_thread_data_t* mi_thread_data_zalloc(void) { // found cached allocation, try use it td = mi_atomic_exchange_ptr_acq_rel(mi_thread_data_t, &td_cache[i], NULL); if (td != NULL) { - _mi_memzero(td, offsetof(mi_thread_data_t,memid)); - return td; + break; } } } // if that fails, allocate as meta data - mi_memid_t memid; - td = (mi_thread_data_t*)_mi_os_zalloc(sizeof(mi_thread_data_t), &memid); if (td == NULL) { - // if this fails, try once more. (issue #257) - td = (mi_thread_data_t*)_mi_os_zalloc(sizeof(mi_thread_data_t), &memid); + mi_memid_t memid; + td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &memid); if (td == NULL) { - // really out of memory - _mi_error_message(ENOMEM, "unable to allocate thread local heap metadata (%zu bytes)\n", sizeof(mi_thread_data_t)); - return NULL; + // if this fails, try once more. (issue #257) + td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &memid); + if (td == NULL) { + // really out of memory + _mi_error_message(ENOMEM, "unable to allocate thread local heap metadata (%zu bytes)\n", sizeof(mi_thread_data_t)); + } + } + if (td != NULL) { + td->memid = memid; + is_zero = memid.initially_zero; } } - td->memid = memid; + + if (td != NULL && !is_zero) { + _mi_memzero_aligned(td, offsetof(mi_thread_data_t,memid)); + } return td; } @@ -400,7 +391,7 @@ static bool _mi_thread_heap_init(void) { // initialize thread local data void _mi_tld_init(mi_tld_t* tld, mi_heap_t* bheap) { - _mi_memcpy_aligned(tld, &tld_empty, sizeof(mi_tld_t)); + _mi_memzero_aligned(tld,sizeof(mi_tld_t)); tld->heap_backing = bheap; tld->heaps = NULL; tld->segments.subproc = &mi_subproc_default; @@ -441,10 +432,7 @@ static bool _mi_thread_heap_done(mi_heap_t* heap) { // free if not the main thread if (heap != &_mi_heap_main) { - // the following assertion does not always hold for huge segments as those are always treated - // as abondened: one may allocate it in one thread, but deallocate in another in which case - // the count can be too large or negative. todo: perhaps not count huge segments? see issue #363 - // mi_assert_internal(heap->tld->segments.count == 0 || heap->thread_id != _mi_thread_id()); + mi_assert_internal(heap->tld->segments.count == 0 || heap->thread_id != _mi_thread_id()); mi_thread_data_free((mi_thread_data_t*)heap); } else { @@ -577,7 +565,7 @@ mi_decl_nodiscard bool mi_is_redirected(void) mi_attr_noexcept { } // Called once by the process loader from `src/prim/prim.c` -void _mi_auto_process_init(void) { +void _mi_process_load(void) { mi_heap_main_init(); #if defined(__APPLE__) || defined(MI_TLS_RECURSE_GUARD) volatile mi_heap_t* dummy = _mi_heap_default; // access TLS to allocate it before setting tls_initialized to true; @@ -659,13 +647,13 @@ void mi_process_init(void) mi_attr_noexcept { if (mi_option_is_enabled(mi_option_reserve_os_memory)) { long ksize = mi_option_get(mi_option_reserve_os_memory); if (ksize > 0) { - mi_reserve_os_memory((size_t)ksize*MI_KiB, true /* commit? */, true /* allow large pages? */); + mi_reserve_os_memory((size_t)ksize*MI_KiB, true, true); } } } -// Called when the process is done (cdecl as it is used with `at_exit` on some platforms) -void mi_cdecl mi_process_done(void) mi_attr_noexcept { +// Called when the process is done (through `at_exit`) +void mi_cdecl _mi_process_done(void) { // only shutdown if we were initialized if (!_mi_process_is_initialized) return; // ensure we are called once @@ -708,7 +696,3 @@ void mi_cdecl mi_process_done(void) mi_attr_noexcept { os_preloading = true; // don't call the C runtime anymore } -void mi_cdecl _mi_auto_process_done(void) mi_attr_noexcept { - if (_mi_option_get_fast(mi_option_destroy_on_exit)>1) return; - mi_process_done(); -} diff --git a/src/libc.c b/src/libc.c index 52d095eb..1bd97aa3 100644 --- a/src/libc.c +++ b/src/libc.c @@ -275,60 +275,3 @@ int _mi_snprintf(char* buf, size_t buflen, const char* fmt, ...) { va_end(args); return written; } - - -#if MI_SIZE_SIZE == 4 -#define mi_mask_even_bits32 (0x55555555) -#define mi_mask_even_pairs32 (0x33333333) -#define mi_mask_even_nibbles32 (0x0F0F0F0F) - -// sum of all the bytes in `x` if it is guaranteed that the sum < 256! -static size_t mi_byte_sum32(uint32_t x) { - // perform `x * 0x01010101`: the highest byte contains the sum of all bytes. - x += (x << 8); - x += (x << 16); - return (size_t)(x >> 24); -} - -static size_t mi_popcount_generic32(uint32_t x) { - // first count each 2-bit group `a`, where: a==0b00 -> 00, a==0b01 -> 01, a==0b10 -> 01, a==0b11 -> 10 - // in other words, `a - (a>>1)`; to do this in parallel, we need to mask to prevent spilling a bit pair - // into the lower bit-pair: - x = x - ((x >> 1) & mi_mask_even_bits32); - // add the 2-bit pair results - x = (x & mi_mask_even_pairs32) + ((x >> 2) & mi_mask_even_pairs32); - // add the 4-bit nibble results - x = (x + (x >> 4)) & mi_mask_even_nibbles32; - // each byte now has a count of its bits, we can sum them now: - return mi_byte_sum32(x); -} - -mi_decl_noinline size_t _mi_popcount_generic(size_t x) { - return mi_popcount_generic32(x); -} - -#else -#define mi_mask_even_bits64 (0x5555555555555555) -#define mi_mask_even_pairs64 (0x3333333333333333) -#define mi_mask_even_nibbles64 (0x0F0F0F0F0F0F0F0F) - -// sum of all the bytes in `x` if it is guaranteed that the sum < 256! -static size_t mi_byte_sum64(uint64_t x) { - x += (x << 8); - x += (x << 16); - x += (x << 32); - return (size_t)(x >> 56); -} - -static size_t mi_popcount_generic64(uint64_t x) { - x = x - ((x >> 1) & mi_mask_even_bits64); - x = (x & mi_mask_even_pairs64) + ((x >> 2) & mi_mask_even_pairs64); - x = (x + (x >> 4)) & mi_mask_even_nibbles64; - return mi_byte_sum64(x); -} - -mi_decl_noinline size_t _mi_popcount_generic(size_t x) { - return mi_popcount_generic64(x); -} -#endif - diff --git a/src/options.c b/src/options.c index af2a0e70..772dfe66 100644 --- a/src/options.c +++ b/src/options.c @@ -106,11 +106,11 @@ typedef struct mi_option_desc_s { static mi_option_desc_t options[_mi_option_last] = { // stable options - #if MI_DEBUG || defined(MI_SHOW_ERRORS) +#if MI_DEBUG || defined(MI_SHOW_ERRORS) { 1, UNINIT, MI_OPTION(show_errors) }, - #else +#else { 0, UNINIT, MI_OPTION(show_errors) }, - #endif +#endif { 0, UNINIT, MI_OPTION(show_stats) }, { MI_DEFAULT_VERBOSE, UNINIT, MI_OPTION(verbose) }, @@ -129,7 +129,7 @@ static mi_option_desc_t options[_mi_option_last] = UNINIT, MI_OPTION(reserve_os_memory) }, // reserve N KiB OS memory in advance (use `option_get_size`) { 0, UNINIT, MI_OPTION(deprecated_segment_cache) }, // cache N segments per thread { 0, UNINIT, MI_OPTION(deprecated_page_reset) }, // reset page memory on free - { 0, UNINIT, MI_OPTION_LEGACY(abandoned_page_purge,abandoned_page_reset) }, // reset free page memory when a thread terminates + { 0, UNINIT, MI_OPTION(abandoned_page_purge) }, // purge free page memory when a thread terminates { 0, UNINIT, MI_OPTION(deprecated_segment_reset) }, // reset segment memory on free (needs eager commit) #if defined(__NetBSD__) { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed @@ -425,14 +425,14 @@ static mi_decl_noinline void mi_recurse_exit_prim(void) { } static bool mi_recurse_enter(void) { - #if defined(__APPLE__) || defined(__ANDROID__) || defined(MI_TLS_RECURSE_GUARD) + #if defined(__APPLE__) || defined(MI_TLS_RECURSE_GUARD) if (_mi_preloading()) return false; #endif return mi_recurse_enter_prim(); } static void mi_recurse_exit(void) { - #if defined(__APPLE__) || defined(__ANDROID__) || defined(MI_TLS_RECURSE_GUARD) + #if defined(__APPLE__) || defined(MI_TLS_RECURSE_GUARD) if (_mi_preloading()) return; #endif mi_recurse_exit_prim(); @@ -525,7 +525,7 @@ void _mi_warning_message(const char* fmt, ...) { #if MI_DEBUG -mi_decl_noreturn mi_decl_cold void _mi_assert_fail(const char* assertion, const char* fname, unsigned line, const char* func ) mi_attr_noexcept { +void _mi_assert_fail(const char* assertion, const char* fname, unsigned line, const char* func ) { _mi_fprintf(NULL, NULL, "mimalloc: assertion failed: at \"%s\":%u, %s\n assertion: \"%s\"\n", fname, line, (func==NULL?"":func), assertion); abort(); } diff --git a/src/os.c b/src/os.c index 9b1b4b46..12cc5da3 100644 --- a/src/os.c +++ b/src/os.c @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2018-2025, Microsoft Research, Daan Leijen +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. @@ -91,6 +91,21 @@ void _mi_os_init(void) { bool _mi_os_decommit(void* addr, size_t size); bool _mi_os_commit(void* addr, size_t size, bool* is_zero); +static inline uintptr_t _mi_align_down(uintptr_t sz, size_t alignment) { + mi_assert_internal(alignment != 0); + uintptr_t mask = alignment - 1; + if ((alignment & mask) == 0) { // power of two? + return (sz & ~mask); + } + else { + return ((sz / alignment) * alignment); + } +} + +static void* mi_align_down_ptr(void* p, size_t alignment) { + return (void*)_mi_align_down((uintptr_t)p, alignment); +} + /* ----------------------------------------------------------- aligned hinting @@ -152,8 +167,8 @@ static void mi_os_free_huge_os_pages(void* p, size_t size); static void mi_os_prim_free(void* addr, size_t size, size_t commit_size) { mi_assert_internal((size % _mi_os_page_size()) == 0); - if (addr == NULL) return; // || _mi_os_is_huge_reserved(addr) - int err = _mi_prim_free(addr, size); // allow size==0 (issue #1041) + if (addr == NULL || size == 0) return; // || _mi_os_is_huge_reserved(addr) + int err = _mi_prim_free(addr, size); if (err != 0) { _mi_warning_message("unable to free OS memory (error: %d (0x%x), size: 0x%zx bytes, address: %p)\n", err, err, size, addr); } @@ -166,16 +181,15 @@ static void mi_os_prim_free(void* addr, size_t size, size_t commit_size) { void _mi_os_free_ex(void* addr, size_t size, bool still_committed, mi_memid_t memid) { if (mi_memkind_is_os(memid.memkind)) { size_t csize = memid.mem.os.size; - if (csize==0) { csize = _mi_os_good_alloc_size(size); } - mi_assert_internal(csize >= size); + if (csize==0) { _mi_os_good_alloc_size(size); } size_t commit_size = (still_committed ? csize : 0); void* base = addr; // different base? (due to alignment) if (memid.mem.os.base != base) { - mi_assert(memid.mem.os.base <= addr); + mi_assert(memid.mem.os.base <= addr); base = memid.mem.os.base; const size_t diff = (uint8_t*)addr - (uint8_t*)memid.mem.os.base; - if (memid.mem.os.size==0) { + if (memid.mem.os.size==0) { csize += diff; } if (still_committed) { @@ -286,10 +300,7 @@ static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit // explicitly commit only the aligned part if (commit) { - if (!_mi_os_commit(p, size, NULL)) { - mi_os_prim_free(*base, over_size, 0); - return NULL; - } + _mi_os_commit(p, size, NULL); } } else { // mmap can free inside an allocation @@ -327,11 +338,9 @@ void* _mi_os_alloc(size_t size, mi_memid_t* memid) { bool os_is_large = false; bool os_is_zero = false; void* p = mi_os_prim_alloc(size, 0, true, false, &os_is_large, &os_is_zero); - if (p == NULL) return NULL; - - *memid = _mi_memid_create_os(p, size, true, os_is_zero, os_is_large); - mi_assert_internal(memid->mem.os.size >= size); - mi_assert_internal(memid->initially_committed); + if (p != NULL) { + *memid = _mi_memid_create_os(true, os_is_zero, os_is_large); + } return p; } @@ -347,42 +356,15 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allo bool os_is_zero = false; void* os_base = NULL; void* p = mi_os_prim_alloc_aligned(size, alignment, commit, allow_large, &os_is_large, &os_is_zero, &os_base ); - if (p == NULL) return NULL; - - *memid = _mi_memid_create_os(p, size, commit, os_is_zero, os_is_large); - memid->mem.os.base = os_base; - memid->mem.os.size += ((uint8_t*)p - (uint8_t*)os_base); // todo: return from prim_alloc_aligned? - - mi_assert_internal(memid->mem.os.size >= size); - mi_assert_internal(_mi_is_aligned(p,alignment)); - if (commit) { mi_assert_internal(memid->initially_committed); } - return p; -} - - -mi_decl_nodiscard static void* mi_os_ensure_zero(void* p, size_t size, mi_memid_t* memid) { - if (p==NULL || size==0) return p; - // ensure committed - if (!memid->initially_committed) { - bool is_zero = false; - if (!_mi_os_commit(p, size, &is_zero)) { - _mi_os_free(p, size, *memid); - return NULL; - } - memid->initially_committed = true; + if (p != NULL) { + *memid = _mi_memid_create_os(commit, os_is_zero, os_is_large); + memid->mem.os.base = os_base; + // memid->mem.os.alignment = alignment; + memid->mem.os.size += ((uint8_t*)p - (uint8_t*)os_base); // todo: return from prim_alloc_aligned } - // ensure zero'd - if (memid->initially_zero) return p; - _mi_memzero_aligned(p,size); - memid->initially_zero = true; return p; } -void* _mi_os_zalloc(size_t size, mi_memid_t* memid) { - void* p = _mi_os_alloc(size,memid); - return mi_os_ensure_zero(p, size, memid); -} - /* ----------------------------------------------------------- OS aligned allocation with an offset. This is used for large alignments > MI_BLOCK_ALIGNMENT_MAX. We use a large mimalloc @@ -528,17 +510,6 @@ bool _mi_os_reset(void* addr, size_t size) { } -void _mi_os_reuse( void* addr, size_t size ) { - // page align conservatively within the range - size_t csize = 0; - void* const start = mi_os_page_align_area_conservative(addr, size, &csize); - if (csize == 0) return; - const int err = _mi_prim_reuse(start, csize); - if (err != 0) { - _mi_warning_message("cannot reuse OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize); - } -} - // either resets or decommits memory, returns true if the memory needs // to be recommitted if it is to be re-used later on. bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, size_t stat_size) @@ -548,7 +519,7 @@ bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, size_t stat_size) mi_os_stat_increase(purged, size); if (mi_option_is_enabled(mi_option_purge_decommits) && // should decommit? - !_mi_preloading()) // don't decommit during preloading (unsafe) + !_mi_preloading()) // don't decommit during preloading (unsafe) { bool needs_recommit = true; mi_os_decommit_ex(p, size, &needs_recommit, stat_size); @@ -568,6 +539,7 @@ bool _mi_os_purge(void* p, size_t size) { return _mi_os_purge_ex(p, size, true, size); } + // Protect a region in memory to be not accessible. static bool mi_os_protectx(void* addr, size_t size, bool protect) { // page align conservatively within the range @@ -646,7 +618,7 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse if (psize != NULL) *psize = 0; if (pages_reserved != NULL) *pages_reserved = 0; size_t size = 0; - uint8_t* const start = mi_os_claim_huge_pages(pages, &size); + uint8_t* start = mi_os_claim_huge_pages(pages, &size); if (start == NULL) return NULL; // or 32-bit systems // Allocate one page at the time but try to place them contiguously @@ -702,7 +674,7 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse if (psize != NULL) { *psize = page * MI_HUGE_OS_PAGE_SIZE; } if (page != 0) { mi_assert(start != NULL); - *memid = _mi_memid_create_os(start, size, true /* is committed */, all_zero, true /* is_large */); + *memid = _mi_memid_create_os(true /* is committed */, all_zero, true /* is_large */); memid->memkind = MI_MEM_OS_HUGE; mi_assert(memid->is_pinned); #ifdef MI_TRACK_ASAN @@ -724,47 +696,34 @@ static void mi_os_free_huge_os_pages(void* p, size_t size) { } } - /* ---------------------------------------------------------------------------- Support NUMA aware allocation -----------------------------------------------------------------------------*/ -static _Atomic(size_t) mi_numa_node_count; // = 0 // cache the node count +_Atomic(size_t) _mi_numa_node_count; // = 0 // cache the node count -int _mi_os_numa_node_count(void) { - size_t count = mi_atomic_load_acquire(&mi_numa_node_count); - if mi_unlikely(count == 0) { +size_t _mi_os_numa_node_count_get(void) { + size_t count = mi_atomic_load_acquire(&_mi_numa_node_count); + if (count <= 0) { long ncount = mi_option_get(mi_option_use_numa_nodes); // given explicitly? - if (ncount > 0 && ncount < INT_MAX) { + if (ncount > 0) { count = (size_t)ncount; } else { - const size_t n = _mi_prim_numa_node_count(); // or detect dynamically - if (n == 0 || n > INT_MAX) { count = 1; } - else { count = n; } + count = _mi_prim_numa_node_count(); // or detect dynamically + if (count == 0) count = 1; } - mi_atomic_store_release(&mi_numa_node_count, count); // save it + mi_atomic_store_release(&_mi_numa_node_count, count); // save it _mi_verbose_message("using %zd numa regions\n", count); } - mi_assert_internal(count > 0 && count <= INT_MAX); - return (int)count; + return count; } -static int mi_os_numa_node_get(void) { - int numa_count = _mi_os_numa_node_count(); +int _mi_os_numa_node_get(void) { + size_t numa_count = _mi_os_numa_node_count(); if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0 // never more than the node count and >= 0 - const size_t n = _mi_prim_numa_node(); - int numa_node = (n < INT_MAX ? (int)n : 0); + size_t numa_node = _mi_prim_numa_node(); if (numa_node >= numa_count) { numa_node = numa_node % numa_count; } - return numa_node; -} - -int _mi_os_numa_node(void) { - if mi_likely(mi_atomic_load_relaxed(&mi_numa_node_count) == 1) { - return 0; - } - else { - return mi_os_numa_node_get(); - } + return (int)numa_node; } diff --git a/src/page-queue.c b/src/page-queue.c index c719b626..3507505d 100644 --- a/src/page-queue.c +++ b/src/page-queue.c @@ -12,7 +12,7 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MI_IN_PAGE_C #error "this file should be included from 'page.c'" // include to help an IDE -#include "mimalloc.h" +#include "mimalloc.h" #include "mimalloc/internal.h" #include "mimalloc/atomic.h" #endif @@ -38,15 +38,15 @@ terms of the MIT license. A copy of the license can be found in the file static inline bool mi_page_queue_is_huge(const mi_page_queue_t* pq) { - return (pq->block_size == (MI_MEDIUM_OBJ_SIZE_MAX+sizeof(uintptr_t))); + return (pq->block_size == (MI_LARGE_OBJ_SIZE_MAX+sizeof(uintptr_t))); } static inline bool mi_page_queue_is_full(const mi_page_queue_t* pq) { - return (pq->block_size == (MI_MEDIUM_OBJ_SIZE_MAX+(2*sizeof(uintptr_t)))); + return (pq->block_size == (MI_LARGE_OBJ_SIZE_MAX+(2*sizeof(uintptr_t)))); } static inline bool mi_page_queue_is_special(const mi_page_queue_t* pq) { - return (pq->block_size > MI_MEDIUM_OBJ_SIZE_MAX); + return (pq->block_size > MI_LARGE_OBJ_SIZE_MAX); } /* ----------------------------------------------------------- @@ -58,7 +58,7 @@ static inline bool mi_page_queue_is_special(const mi_page_queue_t* pq) { // We use `wsize` for the size in "machine word sizes", // i.e. byte size == `wsize*sizeof(void*)`. static inline size_t mi_bin(size_t size) { - size_t wsize = _mi_wsize_from_size(size); + size_t wsize = _mi_wsize_from_size(size); #if defined(MI_ALIGN4W) if mi_likely(wsize <= 4) { return (wsize <= 1 ? 1 : (wsize+1)&~1); // round to double word sizes @@ -72,7 +72,7 @@ static inline size_t mi_bin(size_t size) { return (wsize == 0 ? 1 : wsize); } #endif - else if mi_unlikely(wsize > MI_MEDIUM_OBJ_WSIZE_MAX) { + else if mi_unlikely(wsize > MI_LARGE_OBJ_WSIZE_MAX) { return MI_BIN_HUGE; } else { @@ -107,7 +107,7 @@ size_t _mi_bin_size(size_t bin) { // Good size for allocation size_t mi_good_size(size_t size) mi_attr_noexcept { - if (size <= MI_MEDIUM_OBJ_SIZE_MAX) { + if (size <= MI_LARGE_OBJ_SIZE_MAX) { return _mi_bin_size(mi_bin(size + MI_PADDING_SIZE)); } else { @@ -136,11 +136,7 @@ static bool mi_heap_contains_queue(const mi_heap_t* heap, const mi_page_queue_t* } #endif -static inline bool mi_page_is_large_or_huge(const mi_page_t* page) { - return (mi_page_block_size(page) > MI_MEDIUM_OBJ_SIZE_MAX || mi_page_is_huge(page)); -} - -size_t _mi_page_bin(const mi_page_t* page) { +static size_t mi_page_bin(const mi_page_t* page) { const size_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : (mi_page_is_huge(page) ? MI_BIN_HUGE : mi_bin(mi_page_block_size(page)))); mi_assert_internal(bin <= MI_BIN_FULL); return bin; @@ -148,10 +144,10 @@ size_t _mi_page_bin(const mi_page_t* page) { static mi_page_queue_t* mi_heap_page_queue_of(mi_heap_t* heap, const mi_page_t* page) { mi_assert_internal(heap!=NULL); - const size_t bin = _mi_page_bin(page); + const size_t bin = mi_page_bin(page); mi_page_queue_t* pq = &heap->pages[bin]; mi_assert_internal((mi_page_block_size(page) == pq->block_size) || - (mi_page_is_large_or_huge(page) && mi_page_queue_is_huge(pq)) || + (mi_page_is_huge(page) && mi_page_queue_is_huge(pq)) || (mi_page_is_in_full(page) && mi_page_queue_is_full(pq))); return pq; } @@ -214,11 +210,10 @@ static bool mi_page_queue_is_empty(mi_page_queue_t* queue) { static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) { mi_assert_internal(page != NULL); mi_assert_expensive(mi_page_queue_contains(queue, page)); - mi_assert_internal(mi_page_block_size(page) == queue->block_size || - (mi_page_is_large_or_huge(page) && mi_page_queue_is_huge(queue)) || + mi_assert_internal(mi_page_block_size(page) == queue->block_size || + (mi_page_is_huge(page) && mi_page_queue_is_huge(queue)) || (mi_page_is_in_full(page) && mi_page_queue_is_full(queue))); mi_heap_t* heap = mi_page_heap(page); - if (page->prev != NULL) page->prev->next = page->next; if (page->next != NULL) page->next->prev = page->prev; if (page == queue->last) queue->last = page->prev; @@ -240,10 +235,10 @@ static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_ mi_assert_internal(mi_page_heap(page) == heap); mi_assert_internal(!mi_page_queue_contains(queue, page)); #if MI_HUGE_PAGE_ABANDON - mi_assert_internal(_mi_page_segment(page)->kind != MI_SEGMENT_HUGE); + mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE); #endif mi_assert_internal(mi_page_block_size(page) == queue->block_size || - (mi_page_is_large_or_huge(page) && mi_page_queue_is_huge(queue)) || + (mi_page_is_huge(page) && mi_page_queue_is_huge(queue)) || (mi_page_is_in_full(page) && mi_page_queue_is_full(queue))); mi_page_set_in_full(page, mi_page_queue_is_full(queue)); @@ -282,8 +277,8 @@ static void mi_page_queue_enqueue_from_ex(mi_page_queue_t* to, mi_page_queue_t* mi_assert_internal((bsize == to->block_size && bsize == from->block_size) || (bsize == to->block_size && mi_page_queue_is_full(from)) || (bsize == from->block_size && mi_page_queue_is_full(to)) || - (mi_page_is_large_or_huge(page) && mi_page_queue_is_huge(to)) || - (mi_page_is_large_or_huge(page) && mi_page_queue_is_full(to))); + (mi_page_is_huge(page) && mi_page_queue_is_huge(to)) || + (mi_page_is_huge(page) && mi_page_queue_is_full(to))); mi_heap_t* heap = mi_page_heap(page); @@ -322,8 +317,8 @@ static void mi_page_queue_enqueue_from_ex(mi_page_queue_t* to, mi_page_queue_t* page->prev = to->first; page->next = next; to->first->next = page; - if (next != NULL) { - next->prev = page; + if (next != NULL) { + next->prev = page; } else { to->last = page; diff --git a/src/page.c b/src/page.c index a5a10503..6a693e89 100644 --- a/src/page.c +++ b/src/page.c @@ -37,7 +37,7 @@ static inline mi_block_t* mi_page_block_at(const mi_page_t* page, void* page_sta } static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t size, mi_tld_t* tld); -static bool mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld); +static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld); #if (MI_DEBUG>=3) static size_t mi_page_list_count(mi_page_t* page, mi_block_t* head) { @@ -82,9 +82,11 @@ static bool mi_page_is_valid_init(mi_page_t* page) { mi_assert_internal(page->used <= page->capacity); mi_assert_internal(page->capacity <= page->reserved); + // const size_t bsize = mi_page_block_size(page); + mi_segment_t* segment = _mi_page_segment(page); uint8_t* start = mi_page_start(page); - mi_assert_internal(start == _mi_segment_page_start(_mi_page_segment(page), page, NULL)); - mi_assert_internal(page->is_huge == (_mi_page_segment(page)->kind == MI_SEGMENT_HUGE)); + mi_assert_internal(start == _mi_segment_page_start(segment,page,NULL)); + mi_assert_internal(page->is_huge == (segment->page_kind == MI_PAGE_HUGE)); //mi_assert_internal(start + page->capacity*page->block_size == page->top); mi_assert_internal(mi_page_list_is_valid(page,page->free)); @@ -112,7 +114,7 @@ static bool mi_page_is_valid_init(mi_page_t* page) { return true; } -extern mi_decl_hidden bool _mi_process_is_initialized; // has mi_process_init been called? +extern bool _mi_process_is_initialized; // has mi_process_init been called? bool _mi_page_is_valid(mi_page_t* page) { mi_assert_internal(mi_page_is_valid_init(page)); @@ -121,15 +123,14 @@ bool _mi_page_is_valid(mi_page_t* page) { #endif if (mi_page_heap(page)!=NULL) { mi_segment_t* segment = _mi_page_segment(page); - - mi_assert_internal(!_mi_process_is_initialized || segment->thread_id==0 || segment->thread_id == mi_page_heap(page)->thread_id); + mi_assert_internal(!_mi_process_is_initialized || segment->thread_id == mi_page_heap(page)->thread_id || segment->thread_id==0); #if MI_HUGE_PAGE_ABANDON - if (segment->kind != MI_SEGMENT_HUGE) + if (segment->page_kind != MI_PAGE_HUGE) #endif { mi_page_queue_t* pq = mi_page_queue_of(page); mi_assert_internal(mi_page_queue_contains(pq, page)); - mi_assert_internal(pq->block_size==mi_page_block_size(page) || mi_page_block_size(page) > MI_MEDIUM_OBJ_SIZE_MAX || mi_page_is_in_full(page)); + mi_assert_internal(pq->block_size==mi_page_block_size(page) || mi_page_block_size(page) > MI_LARGE_OBJ_SIZE_MAX || mi_page_is_in_full(page)); mi_assert_internal(mi_heap_contains_queue(mi_page_heap(page),pq)); } } @@ -256,11 +257,10 @@ void _mi_page_free_collect(mi_page_t* page, bool force) { // called from segments when reclaiming abandoned pages void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) { mi_assert_expensive(mi_page_is_valid_init(page)); - mi_assert_internal(mi_page_heap(page) == heap); mi_assert_internal(mi_page_thread_free_flag(page) != MI_NEVER_DELAYED_FREE); #if MI_HUGE_PAGE_ABANDON - mi_assert_internal(_mi_page_segment(page)->kind != MI_SEGMENT_HUGE); + mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE); #endif // TODO: push on full queue immediately if it is full? @@ -274,7 +274,7 @@ static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size #if !MI_HUGE_PAGE_ABANDON mi_assert_internal(pq != NULL); mi_assert_internal(mi_heap_contains_queue(heap, pq)); - mi_assert_internal(page_alignment > 0 || block_size > MI_MEDIUM_OBJ_SIZE_MAX || block_size == pq->block_size); + mi_assert_internal(page_alignment > 0 || block_size > MI_LARGE_OBJ_SIZE_MAX || block_size == pq->block_size); #endif mi_page_t* page = _mi_segment_page_alloc(heap, block_size, page_alignment, &heap->tld->segments); if (page == NULL) { @@ -284,14 +284,13 @@ static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size #if MI_HUGE_PAGE_ABANDON mi_assert_internal(pq==NULL || _mi_page_segment(page)->page_kind != MI_PAGE_HUGE); #endif - mi_assert_internal(page_alignment >0 || block_size > MI_MEDIUM_OBJ_SIZE_MAX || _mi_page_segment(page)->kind != MI_SEGMENT_HUGE); mi_assert_internal(pq!=NULL || mi_page_block_size(page) >= block_size); // a fresh page was found, initialize it const size_t full_block_size = (pq == NULL || mi_page_is_huge(page) ? mi_page_block_size(page) : block_size); // see also: mi_segment_huge_page_alloc mi_assert_internal(full_block_size >= block_size); mi_page_init(heap, page, full_block_size, heap->tld); mi_heap_stat_increase(heap, pages, 1); - mi_heap_stat_increase(heap, page_bins[_mi_page_bin(page)], 1); + mi_heap_stat_increase(heap, page_bins[mi_page_bin(page)], 1); if (pq != NULL) { mi_page_queue_push(heap, pq, page); } mi_assert_expensive(_mi_page_is_valid(page)); return page; @@ -427,7 +426,6 @@ void _mi_page_force_abandon(mi_page_t* page) { } } - // Free a page with no more free blocks void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) { mi_assert_internal(page != NULL); @@ -445,12 +443,13 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) { mi_segments_tld_t* segments_tld = &heap->tld->segments; mi_page_queue_remove(pq, page); - // and free it + // and free it + mi_heap_stat_decrease(heap, page_bins[mi_page_bin(page)], 1); mi_page_set_heap(page,NULL); _mi_segment_page_free(page, force, segments_tld); } -#define MI_MAX_RETIRE_SIZE MI_MEDIUM_OBJ_SIZE_MAX // should be less than size for MI_BIN_HUGE +#define MI_MAX_RETIRE_SIZE MI_LARGE_OBJ_SIZE_MAX // should be less than size for MI_BIN_HUGE #define MI_RETIRE_CYCLES (16) // Retire a page with no more used blocks @@ -624,7 +623,7 @@ static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, co #if (MI_SECURE>0) #define MI_MIN_EXTEND (8*MI_SECURE) // extend at least by this many #else -#define MI_MIN_EXTEND (4) +#define MI_MIN_EXTEND (1) #endif // Extend the capacity (up to reserved) by initializing a free list @@ -632,15 +631,18 @@ static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, co // Note: we also experimented with "bump" allocation on the first // allocations but this did not speed up any benchmark (due to an // extra test in malloc? or cache effects?) -static bool mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld) { +static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld) { mi_assert_expensive(mi_page_is_valid_init(page)); #if (MI_SECURE<=2) mi_assert(page->free == NULL); mi_assert(page->local_free == NULL); - if (page->free != NULL) return true; + if (page->free != NULL) return; #endif - if (page->capacity >= page->reserved) return true; + if (page->capacity >= page->reserved) return; + size_t page_size; + //uint8_t* page_start = + _mi_segment_page_start(_mi_page_segment(page), page, &page_size); mi_stat_counter_increase(tld->stats.pages_extended, 1); // calculate the extend count @@ -672,7 +674,6 @@ static bool mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld) page->capacity += (uint16_t)extend; mi_stat_increase(tld->stats.page_committed, extend * bsize); mi_assert_expensive(mi_page_is_valid_init(page)); - return true; } // Initialize a fresh page @@ -687,8 +688,6 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi size_t page_size; page->page_start = _mi_segment_page_start(segment, page, &page_size); mi_track_mem_noaccess(page->page_start,page_size); - mi_assert_internal(mi_page_block_size(page) <= page_size); - mi_assert_internal(page_size <= page->slice_count*MI_SEGMENT_SLICE_SIZE); mi_assert_internal(page_size / block_size < (1L<<16)); page->reserved = (uint16_t)(page_size / block_size); mi_assert_internal(page->reserved > 0); @@ -703,7 +702,6 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_expensive(mi_mem_is_zero(page->page_start, page_size)); } #endif - mi_assert_internal(page->is_committed); if (block_size > 0 && _mi_is_power_of_two(block_size)) { page->block_size_shift = (uint8_t)(mi_ctz((uintptr_t)block_size)); } @@ -727,10 +725,8 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_expensive(mi_page_is_valid_init(page)); // initialize an initial free list - if (mi_page_extend_free(heap,page,tld)) { - mi_assert(mi_page_immediate_available(page)); - } - return; + mi_page_extend_free(heap,page,tld); + mi_assert(mi_page_immediate_available(page)); } @@ -822,18 +818,13 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p if (page_candidate != NULL) { page = page_candidate; } - if (page != NULL) { - if (!mi_page_immediate_available(page)) { - mi_assert_internal(mi_page_is_expandable(page)); - if (!mi_page_extend_free(heap, page, heap->tld)) { - page = NULL; // failed to extend - } - } - mi_assert_internal(page == NULL || mi_page_immediate_available(page)); + if (page != NULL && !mi_page_immediate_available(page)) { + mi_assert_internal(mi_page_is_expandable(page)); + mi_page_extend_free(heap, page, heap->tld); } if (page == NULL) { - _mi_heap_collect_retired(heap, false); // perhaps make a page available? + _mi_heap_collect_retired(heap, false); // perhaps make a page available page = mi_page_fresh(heap, pq); if (page == NULL && first_try) { // out-of-memory _or_ an abandoned page with free blocks was reclaimed, try once again @@ -911,47 +902,31 @@ void mi_register_deferred_free(mi_deferred_free_fun* fn, void* arg) mi_attr_noex General allocation ----------------------------------------------------------- */ -// Large and huge page allocation. -// Huge pages contain just one block, and the segment contains just that page (as `MI_SEGMENT_HUGE`). +// Huge pages contain just one block, and the segment contains just that page. // Huge pages are also use if the requested alignment is very large (> MI_BLOCK_ALIGNMENT_MAX) // so their size is not always `> MI_LARGE_OBJ_SIZE_MAX`. -static mi_page_t* mi_large_huge_page_alloc(mi_heap_t* heap, size_t size, size_t page_alignment) { +static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size, size_t page_alignment) { size_t block_size = _mi_os_good_alloc_size(size); mi_assert_internal(mi_bin(block_size) == MI_BIN_HUGE || page_alignment > 0); - bool is_huge = (block_size > MI_LARGE_OBJ_SIZE_MAX || page_alignment > 0); #if MI_HUGE_PAGE_ABANDON - mi_page_queue_t* pq = (is_huge ? NULL : mi_page_queue(heap, block_size)); + mi_page_queue_t* pq = NULL; #else - mi_page_queue_t* pq = mi_page_queue(heap, is_huge ? MI_LARGE_OBJ_SIZE_MAX+1 : block_size); - mi_assert_internal(!is_huge || mi_page_queue_is_huge(pq)); + mi_page_queue_t* pq = mi_page_queue(heap, MI_LARGE_OBJ_SIZE_MAX+1); // always in the huge queue regardless of the block size + mi_assert_internal(mi_page_queue_is_huge(pq)); #endif mi_page_t* page = mi_page_fresh_alloc(heap, pq, block_size, page_alignment); if (page != NULL) { + mi_assert_internal(mi_page_block_size(page) >= size); mi_assert_internal(mi_page_immediate_available(page)); - - if (is_huge) { - mi_assert_internal(mi_page_is_huge(page)); - mi_assert_internal(_mi_page_segment(page)->kind == MI_SEGMENT_HUGE); - mi_assert_internal(_mi_page_segment(page)->used==1); - #if MI_HUGE_PAGE_ABANDON - mi_assert_internal(_mi_page_segment(page)->thread_id==0); // abandoned, not in the huge queue - mi_page_set_heap(page, NULL); - #endif - } - else { - mi_assert_internal(!mi_page_is_huge(page)); - } - - const size_t bsize = mi_page_usable_block_size(page); // note: not `mi_page_block_size` to account for padding - /*if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { - mi_heap_stat_increase(heap, malloc_large, bsize); - mi_heap_stat_counter_increase(heap, malloc_large_count, 1); - } - else */ - { - _mi_stat_increase(&heap->tld->stats.malloc_huge, bsize); - _mi_stat_counter_increase(&heap->tld->stats.malloc_huge_count, 1); - } + mi_assert_internal(mi_page_is_huge(page)); + mi_assert_internal(_mi_page_segment(page)->page_kind == MI_PAGE_HUGE); + mi_assert_internal(_mi_page_segment(page)->used==1); + #if MI_HUGE_PAGE_ABANDON + mi_assert_internal(_mi_page_segment(page)->thread_id==0); // abandoned, not in the huge queue + mi_page_set_heap(page, NULL); + #endif + mi_heap_stat_increase(heap, malloc_huge, mi_page_block_size(page)); + mi_heap_stat_counter_increase(heap, malloc_huge_count, 1); } return page; } @@ -962,13 +937,13 @@ static mi_page_t* mi_large_huge_page_alloc(mi_heap_t* heap, size_t size, size_t static mi_page_t* mi_find_page(mi_heap_t* heap, size_t size, size_t huge_alignment) mi_attr_noexcept { // huge allocation? const size_t req_size = size - MI_PADDING_SIZE; // correct for padding_size in case of an overflow on `size` - if mi_unlikely(req_size > (MI_MEDIUM_OBJ_SIZE_MAX - MI_PADDING_SIZE) || huge_alignment > 0) { + if mi_unlikely(req_size > (MI_LARGE_OBJ_SIZE_MAX - MI_PADDING_SIZE) || huge_alignment > 0) { if mi_unlikely(req_size > MI_MAX_ALLOC_SIZE) { _mi_error_message(EOVERFLOW, "allocation request is too large (%zu bytes)\n", req_size); return NULL; } else { - return mi_large_huge_page_alloc(heap,size,huge_alignment); + return mi_huge_page_alloc(heap,size,huge_alignment); } } else { @@ -1004,9 +979,9 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_al // free delayed frees from other threads (but skip contended ones) _mi_heap_delayed_free_partial(heap); - + // collect every once in a while (10000 by default) - const long generic_collect = mi_option_get_clamp(mi_option_generic_collect, 1, 1000000L); + const long generic_collect = mi_option_get_clamp(mi_option_generic_collect, 1, 1000000L); if (heap->generic_collect_count >= generic_collect) { heap->generic_collect_count = 0; mi_heap_collect(heap, false /* force? */); diff --git a/src/prim/emscripten/prim.c b/src/prim/emscripten/prim.c index c4cfc35d..82147de7 100644 --- a/src/prim/emscripten/prim.c +++ b/src/prim/emscripten/prim.c @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2018-2025, Microsoft Research, Daan Leijen, Alon Zakai +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen, Alon Zakai This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. @@ -58,7 +58,7 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config) { extern void emmalloc_free(void*); int _mi_prim_free(void* addr, size_t size) { - if (size==0) return 0; + MI_UNUSED(size); emmalloc_free(addr); return 0; } @@ -114,11 +114,6 @@ int _mi_prim_reset(void* addr, size_t size) { return 0; } -int _mi_prim_reuse(void* addr, size_t size) { - MI_UNUSED(addr); MI_UNUSED(size); - return 0; -} - int _mi_prim_protect(void* addr, size_t size, bool protect) { MI_UNUSED(addr); MI_UNUSED(size); MI_UNUSED(protect); return 0; diff --git a/src/prim/prim.c b/src/prim/prim.c index 5147bae8..2002853f 100644 --- a/src/prim/prim.c +++ b/src/prim/prim.c @@ -39,29 +39,29 @@ terms of the MIT license. A copy of the license can be found in the file #define mi_attr_destructor __attribute__((destructor)) #endif static void mi_attr_constructor mi_process_attach(void) { - _mi_auto_process_init(); + _mi_process_load(); } static void mi_attr_destructor mi_process_detach(void) { - _mi_auto_process_done(); + _mi_process_done(); } #elif defined(__cplusplus) // C++: use static initialization to detect process start/end // This is not guaranteed to be first/last but the best we can generally do? struct mi_init_done_t { mi_init_done_t() { - _mi_auto_process_init(); + _mi_process_load(); } ~mi_init_done_t() { - _mi_auto_process_done(); + _mi_process_done(); } }; static mi_init_done_t mi_init_done; #else - #pragma message("define a way to call _mi_auto_process_init/done on your platform") + #pragma message("define a way to call _mi_process_load/done on your platform") #endif #endif -// Generic allocator init/done callback +// Generic allocator init/done callback #ifndef MI_PRIM_HAS_ALLOCATOR_INIT bool _mi_is_redirected(void) { return false; diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c index 650aa657..8e3180e6 100644 --- a/src/prim/unix/prim.c +++ b/src/prim/unix/prim.c @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2018-2025, Microsoft Research, Daan Leijen +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. @@ -31,12 +31,11 @@ terms of the MIT license. A copy of the license can be found in the file #if defined(__linux__) #include - #include // THP disable, PR_SET_VMA - #if defined(__GLIBC__) && !defined(PR_SET_VMA) - #include - #endif + //#if defined(MI_NO_THP) + #include // THP disable + //#endif #if defined(__GLIBC__) - #include // linux mmap flags + #include // linux mmap flags #else #include #endif @@ -70,8 +69,7 @@ terms of the MIT license. A copy of the license can be found in the file #define MADV_FREE POSIX_MADV_FREE #endif -#define MI_UNIX_LARGE_PAGE_SIZE (2*MI_MiB) // TODO: can we query the OS for this? - + //------------------------------------------------------------------------------------ // Use syscalls for some primitives to allow for libraries that override open/read/close etc. // and do allocation themselves; using syscalls prevents recursion when mimalloc is @@ -157,7 +155,7 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config ) } #endif } - config->large_page_size = MI_UNIX_LARGE_PAGE_SIZE; + config->large_page_size = 2*MI_MiB; // TODO: can we query the OS for this? config->has_overcommit = unix_detect_overcommit(); config->has_partial_free = true; // mmap can free in parts config->has_virtual_reserve = true; // todo: check if this true for NetBSD? (for anonymous mmap with PROT_NONE) @@ -187,7 +185,6 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config ) //--------------------------------------------- int _mi_prim_free(void* addr, size_t size ) { - if (size==0) return 0; bool err = (munmap(addr, size) == -1); return (err ? errno : 0); } @@ -208,24 +205,14 @@ static int unix_madvise(void* addr, size_t size, int advice) { return (res==0 ? 0 : errno); } -static void* unix_mmap_prim(void* addr, size_t size, int protect_flags, int flags, int fd) { - void* p = mmap(addr, size, protect_flags, flags, fd, 0 /* offset */); - #if defined(__linux__) && defined(PR_SET_VMA) - if (p!=MAP_FAILED && p!=NULL) { - prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, p, size, "mimalloc"); - } - #endif - return p; -} - -static void* unix_mmap_prim_aligned(void* addr, size_t size, size_t try_alignment, int protect_flags, int flags, int fd) { +static void* unix_mmap_prim(void* addr, size_t size, size_t try_alignment, int protect_flags, int flags, int fd) { MI_UNUSED(try_alignment); void* p = NULL; #if defined(MAP_ALIGNED) // BSD if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0) { size_t n = mi_bsr(try_alignment); if (((size_t)1 << n) == try_alignment && n >= 12 && n <= 30) { // alignment is a power of 2 and 4096 <= alignment <= 1GiB - p = unix_mmap_prim(addr, size, protect_flags, flags | MAP_ALIGNED(n), fd); + p = mmap(addr, size, protect_flags, flags | MAP_ALIGNED(n), fd, 0); if (p==MAP_FAILED || !_mi_is_aligned(p,try_alignment)) { int err = errno; _mi_trace_message("unable to directly request aligned OS memory (error: %d (0x%x), size: 0x%zx bytes, alignment: 0x%zx, hint address: %p)\n", err, err, size, try_alignment, addr); @@ -236,7 +223,7 @@ static void* unix_mmap_prim_aligned(void* addr, size_t size, size_t try_alignmen } #elif defined(MAP_ALIGN) // Solaris if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0) { - p = unix_mmap_prim((void*)try_alignment, size, protect_flags, flags | MAP_ALIGN, fd); // addr parameter is the required alignment + p = mmap((void*)try_alignment, size, protect_flags, flags | MAP_ALIGN, fd, 0); // addr parameter is the required alignment if (p!=MAP_FAILED) return p; // fall back to regular mmap } @@ -246,7 +233,7 @@ static void* unix_mmap_prim_aligned(void* addr, size_t size, size_t try_alignmen if (addr == NULL) { void* hint = _mi_os_get_aligned_hint(try_alignment, size); if (hint != NULL) { - p = unix_mmap_prim(hint, size, protect_flags, flags, fd); + p = mmap(hint, size, protect_flags, flags, fd, 0); if (p==MAP_FAILED || !_mi_is_aligned(p,try_alignment)) { #if MI_TRACK_ENABLED // asan sometimes does not instrument errno correctly? int err = 0; @@ -261,7 +248,7 @@ static void* unix_mmap_prim_aligned(void* addr, size_t size, size_t try_alignmen } #endif // regular mmap - p = unix_mmap_prim(addr, size, protect_flags, flags, fd); + p = mmap(addr, size, protect_flags, flags, fd, 0); if (p!=MAP_FAILED) return p; // failed to allocate return NULL; @@ -332,7 +319,7 @@ static void* unix_mmap(void* addr, size_t size, size_t try_alignment, int protec if (large_only || lflags != flags) { // try large OS page allocation *is_large = true; - p = unix_mmap_prim_aligned(addr, size, try_alignment, protect_flags, lflags, lfd); + p = unix_mmap_prim(addr, size, try_alignment, protect_flags, lflags, lfd); #ifdef MAP_HUGE_1GB if (p == NULL && (lflags & MAP_HUGE_1GB) == MAP_HUGE_1GB) { mi_huge_pages_available = false; // don't try huge 1GiB pages again @@ -340,7 +327,7 @@ static void* unix_mmap(void* addr, size_t size, size_t try_alignment, int protec _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (errno: %i)\n", errno); } lflags = ((lflags & ~MAP_HUGE_1GB) | MAP_HUGE_2MB); - p = unix_mmap_prim_aligned(addr, size, try_alignment, protect_flags, lflags, lfd); + p = unix_mmap_prim(addr, size, try_alignment, protect_flags, lflags, lfd); } #endif if (large_only) return p; @@ -353,7 +340,7 @@ static void* unix_mmap(void* addr, size_t size, size_t try_alignment, int protec // regular allocation if (p == NULL) { *is_large = false; - p = unix_mmap_prim_aligned(addr, size, try_alignment, protect_flags, flags, fd); + p = unix_mmap_prim(addr, size, try_alignment, protect_flags, flags, fd); if (p != NULL) { #if defined(MADV_HUGEPAGE) // Many Linux systems don't allow MAP_HUGETLB but they support instead @@ -387,9 +374,6 @@ int _mi_prim_alloc(void* hint_addr, size_t size, size_t try_alignment, bool comm mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0); mi_assert_internal(commit || !allow_large); mi_assert_internal(try_alignment > 0); - if (hint_addr == NULL && size >= 8*MI_UNIX_LARGE_PAGE_SIZE && try_alignment > 1 && _mi_is_power_of_two(try_alignment) && try_alignment < MI_UNIX_LARGE_PAGE_SIZE) { - try_alignment = MI_UNIX_LARGE_PAGE_SIZE; // try to align along large page size for larger allocations - } *is_zero = true; int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE); @@ -414,6 +398,10 @@ static void unix_mprotect_hint(int err) { #endif } + + + + int _mi_prim_commit(void* start, size_t size, bool* is_zero) { // commit: ensure we can access the area // note: we may think that *is_zero can be true since the memory @@ -429,25 +417,11 @@ int _mi_prim_commit(void* start, size_t size, bool* is_zero) { return err; } -int _mi_prim_reuse(void* start, size_t size) { - MI_UNUSED(start); MI_UNUSED(size); - #if defined(__APPLE__) && defined(MADV_FREE_REUSE) - return unix_madvise(start, size, MADV_FREE_REUSE); - #endif - return 0; -} - int _mi_prim_decommit(void* start, size_t size, bool* needs_recommit) { int err = 0; - #if defined(__APPLE__) && defined(MADV_FREE_REUSABLE) - // decommit on macOS: use MADV_FREE_REUSABLE as it does immediate rss accounting (issue #1097) - err = unix_madvise(start, size, MADV_FREE_REUSABLE); - if (err) { err = unix_madvise(start, size, MADV_DONTNEED); } - #else - // decommit: use MADV_DONTNEED as it decreases rss immediately (unlike MADV_FREE) - err = unix_madvise(start, size, MADV_DONTNEED); - #endif - #if !MI_DEBUG && MI_SECURE<=2 + // decommit: use MADV_DONTNEED as it decreases rss immediately (unlike MADV_FREE) + err = unix_madvise(start, size, MADV_DONTNEED); + #if !MI_DEBUG && !MI_SECURE *needs_recommit = false; #else *needs_recommit = true; @@ -464,22 +438,14 @@ int _mi_prim_decommit(void* start, size_t size, bool* needs_recommit) { } int _mi_prim_reset(void* start, size_t size) { - int err = 0; - - // on macOS can use MADV_FREE_REUSABLE (but we disable this for now as it seems slower) - #if 0 && defined(__APPLE__) && defined(MADV_FREE_REUSABLE) - err = unix_madvise(start, size, MADV_FREE_REUSABLE); - if (err==0) return 0; - // fall through - #endif - - #if defined(MADV_FREE) - // Otherwise, we try to use `MADV_FREE` as that is the fastest. A drawback though is that it + // We try to use `MADV_FREE` as that is the fastest. A drawback though is that it // will not reduce the `rss` stats in tools like `top` even though the memory is available // to other processes. With the default `MIMALLOC_PURGE_DECOMMITS=1` we ensure that by // default `MADV_DONTNEED` is used though. + #if defined(MADV_FREE) static _Atomic(size_t) advice = MI_ATOMIC_VAR_INIT(MADV_FREE); int oadvice = (int)mi_atomic_load_relaxed(&advice); + int err; while ((err = unix_madvise(start, size, oadvice)) != 0 && errno == EAGAIN) { errno = 0; }; if (err != 0 && errno == EINVAL && oadvice == MADV_FREE) { // if MADV_FREE is not supported, fall back to MADV_DONTNEED from now on @@ -487,7 +453,7 @@ int _mi_prim_reset(void* start, size_t size) { err = unix_madvise(start, size, MADV_DONTNEED); } #else - err = unix_madvise(start, size, MADV_DONTNEED); + int err = unix_madvise(start, size, MADV_DONTNEED); #endif return err; } diff --git a/src/prim/wasi/prim.c b/src/prim/wasi/prim.c index 745a41fd..e1e7de5e 100644 --- a/src/prim/wasi/prim.c +++ b/src/prim/wasi/prim.c @@ -149,11 +149,6 @@ int _mi_prim_reset(void* addr, size_t size) { return 0; } -int _mi_prim_reuse(void* addr, size_t size) { - MI_UNUSED(addr); MI_UNUSED(size); - return 0; -} - int _mi_prim_protect(void* addr, size_t size, bool protect) { MI_UNUSED(addr); MI_UNUSED(size); MI_UNUSED(protect); return 0; diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c index eebdc4a6..a080f4bc 100644 --- a/src/prim/windows/prim.c +++ b/src/prim/windows/prim.c @@ -12,10 +12,6 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc/prim.h" #include // fputs, stderr -// xbox has no console IO -#if !defined(WINAPI_FAMILY_PARTITION) || WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP | WINAPI_PARTITION_SYSTEM) -#define MI_HAS_CONSOLE_IO -#endif //--------------------------------------------- // Dynamically bind Windows API points for portability @@ -49,30 +45,22 @@ typedef struct MI_MEM_ADDRESS_REQUIREMENTS_S { #define MI_MEM_EXTENDED_PARAMETER_NONPAGED_HUGE 0x00000010 #include -typedef PVOID (__stdcall *PVirtualAlloc2)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, MI_MEM_EXTENDED_PARAMETER*, ULONG); -typedef LONG (__stdcall *PNtAllocateVirtualMemoryEx)(HANDLE, PVOID*, SIZE_T*, ULONG, ULONG, MI_MEM_EXTENDED_PARAMETER*, ULONG); // avoid NTSTATUS as it is not defined on xbox (pr #1084) +typedef PVOID (__stdcall *PVirtualAlloc2)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, MI_MEM_EXTENDED_PARAMETER*, ULONG); +typedef NTSTATUS (__stdcall *PNtAllocateVirtualMemoryEx)(HANDLE, PVOID*, SIZE_T*, ULONG, ULONG, MI_MEM_EXTENDED_PARAMETER*, ULONG); static PVirtualAlloc2 pVirtualAlloc2 = NULL; static PNtAllocateVirtualMemoryEx pNtAllocateVirtualMemoryEx = NULL; -// Similarly, GetNumaProcessorNodeEx is only supported since Windows 7 (and GetNumaNodeProcessorMask is not supported on xbox) +// Similarly, GetNumaProcessorNodeEx is only supported since Windows 7 typedef struct MI_PROCESSOR_NUMBER_S { WORD Group; BYTE Number; BYTE Reserved; } MI_PROCESSOR_NUMBER; typedef VOID (__stdcall *PGetCurrentProcessorNumberEx)(MI_PROCESSOR_NUMBER* ProcNumber); typedef BOOL (__stdcall *PGetNumaProcessorNodeEx)(MI_PROCESSOR_NUMBER* Processor, PUSHORT NodeNumber); typedef BOOL (__stdcall* PGetNumaNodeProcessorMaskEx)(USHORT Node, PGROUP_AFFINITY ProcessorMask); typedef BOOL (__stdcall *PGetNumaProcessorNode)(UCHAR Processor, PUCHAR NodeNumber); -typedef BOOL (__stdcall* PGetNumaNodeProcessorMask)(UCHAR Node, PULONGLONG ProcessorMask); -typedef BOOL (__stdcall* PGetNumaHighestNodeNumber)(PULONG Node); static PGetCurrentProcessorNumberEx pGetCurrentProcessorNumberEx = NULL; static PGetNumaProcessorNodeEx pGetNumaProcessorNodeEx = NULL; static PGetNumaNodeProcessorMaskEx pGetNumaNodeProcessorMaskEx = NULL; static PGetNumaProcessorNode pGetNumaProcessorNode = NULL; -static PGetNumaNodeProcessorMask pGetNumaNodeProcessorMask = NULL; -static PGetNumaHighestNodeNumber pGetNumaHighestNodeNumber = NULL; - -// Not available on xbox -typedef SIZE_T(__stdcall* PGetLargePageMinimum)(VOID); -static PGetLargePageMinimum pGetLargePageMinimum = NULL; // Available after Windows XP typedef BOOL (__stdcall *PGetPhysicallyInstalledSystemMemory)( PULONGLONG TotalMemoryInKilobytes ); @@ -86,7 +74,6 @@ static bool win_enable_large_os_pages(size_t* large_page_size) static bool large_initialized = false; if (large_initialized) return (_mi_os_large_page_size() > 0); large_initialized = true; - if (pGetLargePageMinimum==NULL) return false; // no large page support (xbox etc.) // Try to see if large OS pages are supported // To use large pages on Windows, we first need access permission @@ -105,8 +92,8 @@ static bool win_enable_large_os_pages(size_t* large_page_size) if (ok) { err = GetLastError(); ok = (err == ERROR_SUCCESS); - if (ok && large_page_size != NULL && pGetLargePageMinimum != NULL) { - *large_page_size = (*pGetLargePageMinimum)(); + if (ok && large_page_size != NULL) { + *large_page_size = GetLargePageMinimum(); } } } @@ -162,9 +149,6 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config ) pGetNumaProcessorNodeEx = (PGetNumaProcessorNodeEx)(void (*)(void))GetProcAddress(hDll, "GetNumaProcessorNodeEx"); pGetNumaNodeProcessorMaskEx = (PGetNumaNodeProcessorMaskEx)(void (*)(void))GetProcAddress(hDll, "GetNumaNodeProcessorMaskEx"); pGetNumaProcessorNode = (PGetNumaProcessorNode)(void (*)(void))GetProcAddress(hDll, "GetNumaProcessorNode"); - pGetNumaNodeProcessorMask = (PGetNumaNodeProcessorMask)(void (*)(void))GetProcAddress(hDll, "GetNumaNodeProcessorMask"); - pGetNumaHighestNodeNumber = (PGetNumaHighestNodeNumber)(void (*)(void))GetProcAddress(hDll, "GetNumaHighestNodeNumber"); - pGetLargePageMinimum = (PGetLargePageMinimum)(void (*)(void))GetProcAddress(hDll, "GetLargePageMinimum"); // Get physical memory (not available on XP, so check dynamically) PGetPhysicallyInstalledSystemMemory pGetPhysicallyInstalledSystemMemory = (PGetPhysicallyInstalledSystemMemory)(void (*)(void))GetProcAddress(hDll,"GetPhysicallyInstalledSystemMemory"); if (pGetPhysicallyInstalledSystemMemory != NULL) { @@ -368,11 +352,6 @@ int _mi_prim_reset(void* addr, size_t size) { return (p != NULL ? 0 : (int)GetLastError()); } -int _mi_prim_reuse(void* addr, size_t size) { - MI_UNUSED(addr); MI_UNUSED(size); - return 0; -} - int _mi_prim_protect(void* addr, size_t size, bool protect) { DWORD oldprotect = 0; BOOL ok = VirtualProtect(addr, size, protect ? PAGE_NOACCESS : PAGE_READWRITE, &oldprotect); @@ -404,7 +383,7 @@ static void* _mi_prim_alloc_huge_os_pagesx(void* hint_addr, size_t size, int num } SIZE_T psize = size; void* base = hint_addr; - LONG err = (*pNtAllocateVirtualMemoryEx)(GetCurrentProcess(), &base, &psize, flags, PAGE_READWRITE, params, param_count); + NTSTATUS err = (*pNtAllocateVirtualMemoryEx)(GetCurrentProcess(), &base, &psize, flags, PAGE_READWRITE, params, param_count); if (err == 0 && base != NULL) { return base; } @@ -458,11 +437,9 @@ size_t _mi_prim_numa_node(void) { size_t _mi_prim_numa_node_count(void) { ULONG numa_max = 0; - if (pGetNumaHighestNodeNumber!=NULL) { - (*pGetNumaHighestNodeNumber)(&numa_max); - } + GetNumaHighestNodeNumber(&numa_max); // find the highest node number that has actual processors assigned to it. Issue #282 - while (numa_max > 0) { + while(numa_max > 0) { if (pGetNumaNodeProcessorMaskEx != NULL) { // Extended API is supported GROUP_AFFINITY affinity; @@ -473,10 +450,8 @@ size_t _mi_prim_numa_node_count(void) { else { // Vista or earlier, use older API that is limited to 64 processors. ULONGLONG mask; - if (pGetNumaNodeProcessorMask != NULL) { - if ((*pGetNumaNodeProcessorMask)((UCHAR)numa_max, &mask)) { - if (mask != 0) break; // found the maximum non-empty node - } + if (GetNumaNodeProcessorMask((UCHAR)numa_max, &mask)) { + if (mask != 0) break; // found the maximum non-empty node }; } // max node was invalid or had no processor assigned, try again @@ -566,21 +541,17 @@ void _mi_prim_out_stderr( const char* msg ) if (!_mi_preloading()) { // _cputs(msg); // _cputs cannot be used as it aborts when failing to lock the console static HANDLE hcon = INVALID_HANDLE_VALUE; - static bool hconIsConsole = false; + static bool hconIsConsole; if (hcon == INVALID_HANDLE_VALUE) { - hcon = GetStdHandle(STD_ERROR_HANDLE); - #ifdef MI_HAS_CONSOLE_IO CONSOLE_SCREEN_BUFFER_INFO sbi; + hcon = GetStdHandle(STD_ERROR_HANDLE); hconIsConsole = ((hcon != INVALID_HANDLE_VALUE) && GetConsoleScreenBufferInfo(hcon, &sbi)); - #endif } const size_t len = _mi_strlen(msg); if (len > 0 && len < UINT32_MAX) { DWORD written = 0; if (hconIsConsole) { - #ifdef MI_HAS_CONSOLE_IO WriteConsoleA(hcon, msg, (DWORD)len, &written, NULL); - #endif } else if (hcon != INVALID_HANDLE_VALUE) { // use direct write if stderr was redirected @@ -656,47 +627,19 @@ bool _mi_prim_random_buf(void* buf, size_t buf_len) { // Process & Thread Init/Done //---------------------------------------------------------------- -#if MI_WIN_USE_FIXED_TLS==1 -mi_decl_cache_align size_t _mi_win_tls_offset = 0; -#endif - -//static void mi_debug_out(const char* s) { -// HANDLE h = GetStdHandle(STD_ERROR_HANDLE); -// WriteConsole(h, s, (DWORD)_mi_strlen(s), NULL, NULL); -//} - -static void mi_win_tls_init(DWORD reason) { - if (reason==DLL_PROCESS_ATTACH || reason==DLL_THREAD_ATTACH) { - #if MI_WIN_USE_FIXED_TLS==1 // we must allocate a TLS slot dynamically - if (_mi_win_tls_offset == 0 && reason == DLL_PROCESS_ATTACH) { - const DWORD tls_slot = TlsAlloc(); // usually returns slot 1 - if (tls_slot == TLS_OUT_OF_INDEXES) { - _mi_error_message(EFAULT, "unable to allocate the a TLS slot (rebuild without MI_WIN_USE_FIXED_TLS?)\n"); - } - _mi_win_tls_offset = (size_t)tls_slot * sizeof(void*); - } - #endif - #if MI_HAS_TLS_SLOT >= 2 // we must initialize the TLS slot before any allocation - if (mi_prim_get_default_heap() == NULL) { - _mi_heap_set_default_direct((mi_heap_t*)&_mi_heap_empty); - #if MI_DEBUG && MI_WIN_USE_FIXED_TLS==1 - void* const p = TlsGetValue((DWORD)(_mi_win_tls_offset / sizeof(void*))); - mi_assert_internal(p == (void*)&_mi_heap_empty); - #endif - } - #endif - } -} - static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) { MI_UNUSED(reserved); MI_UNUSED(module); - mi_win_tls_init(reason); + #if MI_TLS_SLOT >= 2 + if ((reason==DLL_PROCESS_ATTACH || reason==DLL_THREAD_ATTACH) && mi_prim_get_default_heap() == NULL) { + _mi_heap_set_default_direct((mi_heap_t*)&_mi_heap_empty); + } + #endif if (reason==DLL_PROCESS_ATTACH) { - _mi_auto_process_init(); + _mi_process_load(); } else if (reason==DLL_PROCESS_DETACH) { - _mi_auto_process_done(); + _mi_process_done(); } else if (reason==DLL_THREAD_DETACH && !_mi_is_redirected()) { _mi_thread_done(NULL); @@ -786,7 +729,7 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) { static int mi_process_attach(void) { mi_win_main(NULL,DLL_PROCESS_ATTACH,NULL); - atexit(&_mi_auto_process_done); + atexit(&_mi_process_done); return 0; } typedef int(*mi_crt_callback_t)(void); @@ -853,7 +796,11 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) { #endif mi_decl_export void _mi_redirect_entry(DWORD reason) { // called on redirection; careful as this may be called before DllMain - mi_win_tls_init(reason); + #if MI_TLS_SLOT >= 2 + if ((reason==DLL_PROCESS_ATTACH || reason==DLL_THREAD_ATTACH) && mi_prim_get_default_heap() == NULL) { + _mi_heap_set_default_direct((mi_heap_t*)&_mi_heap_empty); + } + #endif if (reason == DLL_PROCESS_ATTACH) { mi_redirected = true; } diff --git a/src/random.c b/src/random.c index f17698ba..4fc8b2f8 100644 --- a/src/random.c +++ b/src/random.c @@ -143,17 +143,13 @@ void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* ctx_new) { uintptr_t _mi_random_next(mi_random_ctx_t* ctx) { mi_assert_internal(mi_random_is_initialized(ctx)); - uintptr_t r; - do { - #if MI_INTPTR_SIZE <= 4 - r = chacha_next32(ctx); - #elif MI_INTPTR_SIZE == 8 - r = (((uintptr_t)chacha_next32(ctx) << 32) | chacha_next32(ctx)); - #else - # error "define mi_random_next for this platform" - #endif - } while (r==0); - return r; + #if MI_INTPTR_SIZE <= 4 + return chacha_next32(ctx); + #elif MI_INTPTR_SIZE == 8 + return (((uintptr_t)chacha_next32(ctx) << 32) | chacha_next32(ctx)); + #else + # error "define mi_random_next for this platform" + #endif } @@ -167,7 +163,7 @@ uintptr_t _mi_os_random_weak(uintptr_t extra_seed) { x ^= _mi_prim_clock_now(); // and do a few randomization steps uintptr_t max = ((x ^ (x >> 17)) & 0x0F) + 1; - for (uintptr_t i = 0; i < max || x==0; i++, x++) { + for (uintptr_t i = 0; i < max; i++) { x = _mi_random_shuffle(x); } mi_assert_internal(x != 0); @@ -183,7 +179,7 @@ static void mi_random_init_ex(mi_random_ctx_t* ctx, bool use_weak) { if (!use_weak) { _mi_warning_message("unable to use secure randomness\n"); } #endif uintptr_t x = _mi_os_random_weak(0); - for (size_t i = 0; i < 8; i++, x++) { // key is eight 32-bit words. + for (size_t i = 0; i < 8; i++) { // key is eight 32-bit words. x = _mi_random_shuffle(x); ((uint32_t*)key)[i] = (uint32_t)x; } diff --git a/src/segment-map.c b/src/segment-map.c index bbcea28a..2f68f8c4 100644 --- a/src/segment-map.c +++ b/src/segment-map.c @@ -61,7 +61,7 @@ static mi_segmap_part_t* mi_segment_map_index_of(const mi_segment_t* segment, bo if mi_unlikely(part == NULL) { if (!create_on_demand) return NULL; mi_memid_t memid; - part = (mi_segmap_part_t*)_mi_os_zalloc(sizeof(mi_segmap_part_t), &memid); + part = (mi_segmap_part_t*)_mi_os_alloc(sizeof(mi_segmap_part_t), &memid); if (part == NULL) return NULL; part->memid = memid; mi_segmap_part_t* expected = NULL; diff --git a/src/segment.c b/src/segment.c index 32841e6d..e2730b7f 100644 --- a/src/segment.c +++ b/src/segment.c @@ -11,153 +11,20 @@ terms of the MIT license. A copy of the license can be found in the file #include // memset #include -// ------------------------------------------------------------------- -// Segments -// mimalloc pages reside in segments. See `mi_segment_valid` for invariants. -// ------------------------------------------------------------------- - - -static void mi_segment_try_purge(mi_segment_t* segment, bool force); - - -// ------------------------------------------------------------------- -// commit mask -// ------------------------------------------------------------------- - -static bool mi_commit_mask_all_set(const mi_commit_mask_t* commit, const mi_commit_mask_t* cm) { - for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { - if ((commit->mask[i] & cm->mask[i]) != cm->mask[i]) return false; - } - return true; -} - -static bool mi_commit_mask_any_set(const mi_commit_mask_t* commit, const mi_commit_mask_t* cm) { - for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { - if ((commit->mask[i] & cm->mask[i]) != 0) return true; - } - return false; -} - -static void mi_commit_mask_create_intersect(const mi_commit_mask_t* commit, const mi_commit_mask_t* cm, mi_commit_mask_t* res) { - for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { - res->mask[i] = (commit->mask[i] & cm->mask[i]); - } -} - -static void mi_commit_mask_clear(mi_commit_mask_t* res, const mi_commit_mask_t* cm) { - for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { - res->mask[i] &= ~(cm->mask[i]); - } -} - -static void mi_commit_mask_set(mi_commit_mask_t* res, const mi_commit_mask_t* cm) { - for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { - res->mask[i] |= cm->mask[i]; - } -} - -static void mi_commit_mask_create(size_t bitidx, size_t bitcount, mi_commit_mask_t* cm) { - mi_assert_internal(bitidx < MI_COMMIT_MASK_BITS); - mi_assert_internal((bitidx + bitcount) <= MI_COMMIT_MASK_BITS); - if (bitcount == MI_COMMIT_MASK_BITS) { - mi_assert_internal(bitidx==0); - mi_commit_mask_create_full(cm); - } - else if (bitcount == 0) { - mi_commit_mask_create_empty(cm); - } - else { - mi_commit_mask_create_empty(cm); - size_t i = bitidx / MI_COMMIT_MASK_FIELD_BITS; - size_t ofs = bitidx % MI_COMMIT_MASK_FIELD_BITS; - while (bitcount > 0) { - mi_assert_internal(i < MI_COMMIT_MASK_FIELD_COUNT); - size_t avail = MI_COMMIT_MASK_FIELD_BITS - ofs; - size_t count = (bitcount > avail ? avail : bitcount); - size_t mask = (count >= MI_COMMIT_MASK_FIELD_BITS ? ~((size_t)0) : (((size_t)1 << count) - 1) << ofs); - cm->mask[i] = mask; - bitcount -= count; - ofs = 0; - i++; - } - } -} - -size_t _mi_commit_mask_committed_size(const mi_commit_mask_t* cm, size_t total) { - mi_assert_internal((total%MI_COMMIT_MASK_BITS)==0); - size_t count = 0; - for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { - size_t mask = cm->mask[i]; - if (~mask == 0) { - count += MI_COMMIT_MASK_FIELD_BITS; - } - else { - for (; mask != 0; mask >>= 1) { // todo: use popcount - if ((mask&1)!=0) count++; - } - } - } - // we use total since for huge segments each commit bit may represent a larger size - return ((total / MI_COMMIT_MASK_BITS) * count); -} - - -size_t _mi_commit_mask_next_run(const mi_commit_mask_t* cm, size_t* idx) { - size_t i = (*idx) / MI_COMMIT_MASK_FIELD_BITS; - size_t ofs = (*idx) % MI_COMMIT_MASK_FIELD_BITS; - size_t mask = 0; - // find first ones - while (i < MI_COMMIT_MASK_FIELD_COUNT) { - mask = cm->mask[i]; - mask >>= ofs; - if (mask != 0) { - while ((mask&1) == 0) { - mask >>= 1; - ofs++; - } - break; - } - i++; - ofs = 0; - } - if (i >= MI_COMMIT_MASK_FIELD_COUNT) { - // not found - *idx = MI_COMMIT_MASK_BITS; - return 0; - } - else { - // found, count ones - size_t count = 0; - *idx = (i*MI_COMMIT_MASK_FIELD_BITS) + ofs; - do { - mi_assert_internal(ofs < MI_COMMIT_MASK_FIELD_BITS && (mask&1) == 1); - do { - count++; - mask >>= 1; - } while ((mask&1) == 1); - if ((((*idx + count) % MI_COMMIT_MASK_FIELD_BITS) == 0)) { - i++; - if (i >= MI_COMMIT_MASK_FIELD_COUNT) break; - mask = cm->mask[i]; - ofs = 0; - } - } while ((mask&1) == 1); - mi_assert_internal(count > 0); - return count; - } -} +#define MI_PAGE_HUGE_ALIGN (256*1024) +static uint8_t* mi_segment_raw_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size); /* -------------------------------------------------------------------------------- Segment allocation - We allocate pages inside bigger "segments" (32 MiB on 64-bit). This is to avoid + We allocate pages inside bigger "segments" (4MiB on 64-bit). This is to avoid splitting VMA's on Linux and reduce fragmentation on other OS's. Each thread owns its own segments. Currently we have: - - small pages (64KiB) - - medium pages (512KiB) - - large pages (4MiB), + - small pages (64KiB), 64 in one segment + - medium pages (512KiB), 8 in one segment + - large pages (4MiB), 1 in one segment - huge segments have 1 page in one segment that can be larger than `MI_SEGMENT_SIZE`. it is used for blocks `> MI_LARGE_OBJ_SIZE_MAX` or with alignment `> MI_BLOCK_ALIGNMENT_MAX`. @@ -171,84 +38,74 @@ size_t _mi_commit_mask_next_run(const mi_commit_mask_t* cm, size_t* idx) { /* ----------------------------------------------------------- - Slices + Queue of segments containing free pages ----------------------------------------------------------- */ +#if (MI_DEBUG>=3) +static bool mi_segment_queue_contains(const mi_segment_queue_t* queue, const mi_segment_t* segment) { + mi_assert_internal(segment != NULL); + mi_segment_t* list = queue->first; + while (list != NULL) { + if (list == segment) break; + mi_assert_internal(list->next==NULL || list->next->prev == list); + mi_assert_internal(list->prev==NULL || list->prev->next == list); + list = list->next; + } + return (list == segment); +} +#endif -static const mi_slice_t* mi_segment_slices_end(const mi_segment_t* segment) { - return &segment->slices[segment->slice_entries]; +/* +static bool mi_segment_queue_is_empty(const mi_segment_queue_t* queue) { + return (queue->first == NULL); +} +*/ + +static void mi_segment_queue_remove(mi_segment_queue_t* queue, mi_segment_t* segment) { + mi_assert_expensive(mi_segment_queue_contains(queue, segment)); + if (segment->prev != NULL) segment->prev->next = segment->next; + if (segment->next != NULL) segment->next->prev = segment->prev; + if (segment == queue->first) queue->first = segment->next; + if (segment == queue->last) queue->last = segment->prev; + segment->next = NULL; + segment->prev = NULL; } -static uint8_t* mi_slice_start(const mi_slice_t* slice) { - mi_segment_t* segment = _mi_ptr_segment(slice); - mi_assert_internal(slice >= segment->slices && slice < mi_segment_slices_end(segment)); - return ((uint8_t*)segment + ((slice - segment->slices)*MI_SEGMENT_SLICE_SIZE)); +static void mi_segment_enqueue(mi_segment_queue_t* queue, mi_segment_t* segment) { + mi_assert_expensive(!mi_segment_queue_contains(queue, segment)); + segment->next = NULL; + segment->prev = queue->last; + if (queue->last != NULL) { + mi_assert_internal(queue->last->next == NULL); + queue->last->next = segment; + queue->last = segment; + } + else { + queue->last = queue->first = segment; + } } - -/* ----------------------------------------------------------- - Bins ------------------------------------------------------------ */ -// Use bit scan forward to quickly find the first zero bit if it is available - -static inline size_t mi_slice_bin8(size_t slice_count) { - if (slice_count<=1) return slice_count; - mi_assert_internal(slice_count <= MI_SLICES_PER_SEGMENT); - slice_count--; - size_t s = mi_bsr(slice_count); // slice_count > 1 - if (s <= 2) return slice_count + 1; - size_t bin = ((s << 2) | ((slice_count >> (s - 2))&0x03)) - 4; - return bin; +static mi_segment_queue_t* mi_segment_free_queue_of_kind(mi_page_kind_t kind, mi_segments_tld_t* tld) { + if (kind == MI_PAGE_SMALL) return &tld->small_free; + else if (kind == MI_PAGE_MEDIUM) return &tld->medium_free; + else return NULL; } -static inline size_t mi_slice_bin(size_t slice_count) { - mi_assert_internal(slice_count*MI_SEGMENT_SLICE_SIZE <= MI_SEGMENT_SIZE); - mi_assert_internal(mi_slice_bin8(MI_SLICES_PER_SEGMENT) <= MI_SEGMENT_BIN_MAX); - size_t bin = mi_slice_bin8(slice_count); - mi_assert_internal(bin <= MI_SEGMENT_BIN_MAX); - return bin; +static mi_segment_queue_t* mi_segment_free_queue(const mi_segment_t* segment, mi_segments_tld_t* tld) { + return mi_segment_free_queue_of_kind(segment->page_kind, tld); } -static inline size_t mi_slice_index(const mi_slice_t* slice) { - mi_segment_t* segment = _mi_ptr_segment(slice); - ptrdiff_t index = slice - segment->slices; - mi_assert_internal(index >= 0 && index < (ptrdiff_t)segment->slice_entries); - return index; +// remove from free queue if it is in one +static void mi_segment_remove_from_free_queue(mi_segment_t* segment, mi_segments_tld_t* tld) { + mi_segment_queue_t* queue = mi_segment_free_queue(segment, tld); // may be NULL + bool in_queue = (queue!=NULL && (segment->next != NULL || segment->prev != NULL || queue->first == segment)); + if (in_queue) { + mi_segment_queue_remove(queue, segment); + } } - -/* ----------------------------------------------------------- - Slice span queues ------------------------------------------------------------ */ - -static void mi_span_queue_push(mi_span_queue_t* sq, mi_slice_t* slice) { - // todo: or push to the end? - mi_assert_internal(slice->prev == NULL && slice->next==NULL); - slice->prev = NULL; // paranoia - slice->next = sq->first; - sq->first = slice; - if (slice->next != NULL) slice->next->prev = slice; - else sq->last = slice; - slice->block_size = 0; // free -} - -static mi_span_queue_t* mi_span_queue_for(size_t slice_count, mi_segments_tld_t* tld) { - size_t bin = mi_slice_bin(slice_count); - mi_span_queue_t* sq = &tld->spans[bin]; - mi_assert_internal(sq->slice_count >= slice_count); - return sq; -} - -static void mi_span_queue_delete(mi_span_queue_t* sq, mi_slice_t* slice) { - mi_assert_internal(slice->block_size==0 && slice->slice_count>0 && slice->slice_offset==0); - // should work too if the queue does not contain slice (which can happen during reclaim) - if (slice->prev != NULL) slice->prev->next = slice->next; - if (slice == sq->first) sq->first = slice->next; - if (slice->next != NULL) slice->next->prev = slice->prev; - if (slice == sq->last) sq->last = slice->prev; - slice->prev = NULL; - slice->next = NULL; - slice->block_size = 1; // no more free +static void mi_segment_insert_in_free_queue(mi_segment_t* segment, mi_segments_tld_t* tld) { + mi_segment_enqueue(mi_segment_free_queue(segment, tld), segment); } @@ -256,136 +113,366 @@ static void mi_span_queue_delete(mi_span_queue_t* sq, mi_slice_t* slice) { Invariant checking ----------------------------------------------------------- */ -static bool mi_slice_is_used(const mi_slice_t* slice) { - return (slice->block_size > 0); +#if (MI_DEBUG >= 2) || (MI_SECURE >= 2) +static size_t mi_segment_page_size(const mi_segment_t* segment) { + if (segment->capacity > 1) { + mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM); + return ((size_t)1 << segment->page_shift); + } + else { + mi_assert_internal(segment->page_kind >= MI_PAGE_LARGE); + return segment->segment_size; + } } +#endif - -#if (MI_DEBUG>=3) -static bool mi_span_queue_contains(mi_span_queue_t* sq, mi_slice_t* slice) { - for (mi_slice_t* s = sq->first; s != NULL; s = s->next) { - if (s==slice) return true; +#if (MI_DEBUG>=2) +static bool mi_pages_purge_contains(const mi_page_t* page, mi_segments_tld_t* tld) { + mi_page_t* p = tld->pages_purge.first; + while (p != NULL) { + if (p == page) return true; + p = p->next; } return false; } +#endif -static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) { +#if (MI_DEBUG>=3) +static bool mi_segment_is_valid(const mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(segment != NULL); mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie); + mi_assert_internal(segment->used <= segment->capacity); mi_assert_internal(segment->abandoned <= segment->used); - mi_assert_internal(segment->thread_id == 0 || segment->thread_id == _mi_thread_id()); - mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->purge_mask)); // can only decommit committed blocks - //mi_assert_internal(segment->segment_info_size % MI_SEGMENT_SLICE_SIZE == 0); - mi_slice_t* slice = &segment->slices[0]; - const mi_slice_t* end = mi_segment_slices_end(segment); - size_t used_count = 0; - mi_span_queue_t* sq; - while(slice < end) { - mi_assert_internal(slice->slice_count > 0); - mi_assert_internal(slice->slice_offset == 0); - size_t index = mi_slice_index(slice); - size_t maxindex = (index + slice->slice_count >= segment->slice_entries ? segment->slice_entries : index + slice->slice_count) - 1; - if (mi_slice_is_used(slice)) { // a page in use, we need at least MAX_SLICE_OFFSET_COUNT valid back offsets - used_count++; - mi_assert_internal(slice->is_huge == (segment->kind == MI_SEGMENT_HUGE)); - for (size_t i = 0; i <= MI_MAX_SLICE_OFFSET_COUNT && index + i <= maxindex; i++) { - mi_assert_internal(segment->slices[index + i].slice_offset == i*sizeof(mi_slice_t)); - mi_assert_internal(i==0 || segment->slices[index + i].slice_count == 0); - mi_assert_internal(i==0 || segment->slices[index + i].block_size == 1); - } - // and the last entry as well (for coalescing) - const mi_slice_t* last = slice + slice->slice_count - 1; - if (last > slice && last < mi_segment_slices_end(segment)) { - mi_assert_internal(last->slice_offset == (slice->slice_count-1)*sizeof(mi_slice_t)); - mi_assert_internal(last->slice_count == 0); - mi_assert_internal(last->block_size == 1); - } + mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM || segment->capacity == 1); // one large or huge page per segment + size_t nfree = 0; + for (size_t i = 0; i < segment->capacity; i++) { + const mi_page_t* const page = &segment->pages[i]; + if (!page->segment_in_use) { + nfree++; } - else { // free range of slices; only last slice needs a valid back offset - mi_slice_t* last = &segment->slices[maxindex]; - if (segment->kind != MI_SEGMENT_HUGE || slice->slice_count <= (segment->slice_entries - segment->segment_info_slices)) { - mi_assert_internal((uint8_t*)slice == (uint8_t*)last - last->slice_offset); - } - mi_assert_internal(slice == last || last->slice_count == 0 ); - mi_assert_internal(last->block_size == 0 || (segment->kind==MI_SEGMENT_HUGE && last->block_size==1)); - if (segment->kind != MI_SEGMENT_HUGE && segment->thread_id != 0) { // segment is not huge or abandoned - sq = mi_span_queue_for(slice->slice_count,tld); - mi_assert_internal(mi_span_queue_contains(sq,slice)); - } + if (page->segment_in_use) { + mi_assert_expensive(!mi_pages_purge_contains(page, tld)); } - slice = &segment->slices[maxindex+1]; + mi_assert_internal(page->is_huge == (segment->page_kind == MI_PAGE_HUGE)); } - mi_assert_internal(slice == end); - mi_assert_internal(used_count == segment->used + 1); + mi_assert_internal(nfree + segment->used == segment->capacity); + // mi_assert_internal(segment->thread_id == _mi_thread_id() || (segment->thread_id==0)); // or 0 + mi_assert_internal(segment->page_kind == MI_PAGE_HUGE || + (mi_segment_page_size(segment) * segment->capacity == segment->segment_size)); return true; } #endif +static bool mi_page_not_in_queue(const mi_page_t* page, mi_segments_tld_t* tld) { + mi_assert_internal(page != NULL); + if (page->next != NULL || page->prev != NULL) { + mi_assert_internal(mi_pages_purge_contains(page, tld)); + return false; + } + else { + // both next and prev are NULL, check for singleton list + return (tld->pages_purge.first != page && tld->pages_purge.last != page); + } +} + + +/* ----------------------------------------------------------- + Guard pages +----------------------------------------------------------- */ + +static void mi_segment_protect_range(void* p, size_t size, bool protect) { + if (protect) { + _mi_os_protect(p, size); + } + else { + _mi_os_unprotect(p, size); + } +} + +static void mi_segment_protect(mi_segment_t* segment, bool protect) { + // add/remove guard pages + if (MI_SECURE != 0) { + // in secure mode, we set up a protected page in between the segment info and the page data + const size_t os_psize = _mi_os_page_size(); + mi_assert_internal((segment->segment_info_size - os_psize) >= (sizeof(mi_segment_t) + ((segment->capacity - 1) * sizeof(mi_page_t)))); + mi_assert_internal(((uintptr_t)segment + segment->segment_info_size) % os_psize == 0); + mi_segment_protect_range((uint8_t*)segment + segment->segment_info_size - os_psize, os_psize, protect); + #if (MI_SECURE >= 2) + if (segment->capacity == 1) + #endif + { + // and protect the last (or only) page too + mi_assert_internal(MI_SECURE <= 1 || segment->page_kind >= MI_PAGE_LARGE); + uint8_t* start = (uint8_t*)segment + segment->segment_size - os_psize; + if (protect && !segment->memid.initially_committed) { + if (protect) { + // ensure secure page is committed + if (_mi_os_commit(start, os_psize, NULL)) { // if this fails that is ok (as it is an unaccessible page) + mi_segment_protect_range(start, os_psize, protect); + } + } + } + else { + mi_segment_protect_range(start, os_psize, protect); + } + } + #if (MI_SECURE >= 2) + else { + // or protect every page + const size_t page_size = mi_segment_page_size(segment); + for (size_t i = 0; i < segment->capacity; i++) { + if (segment->pages[i].is_committed) { + mi_segment_protect_range((uint8_t*)segment + (i+1)*page_size - os_psize, os_psize, protect); + } + } + } + #endif + } +} + +/* ----------------------------------------------------------- + Page reset +----------------------------------------------------------- */ + +static void mi_page_purge(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) { + // todo: should we purge the guard page as well when MI_SECURE>=2 ? + mi_assert_internal(page->is_committed); + mi_assert_internal(!page->segment_in_use); + if (!segment->allow_purge) return; + mi_assert_internal(page->used == 0); + mi_assert_internal(page->free == NULL); + mi_assert_expensive(!mi_pages_purge_contains(page, tld)); MI_UNUSED(tld); + size_t psize; + void* start = mi_segment_raw_page_start(segment, page, &psize); + const bool needs_recommit = _mi_os_purge(start, psize); + if (needs_recommit) { page->is_committed = false; } +} + +static bool mi_page_ensure_committed(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) { + if (page->is_committed) return true; + mi_assert_internal(segment->allow_decommit); + mi_assert_expensive(!mi_pages_purge_contains(page, tld)); MI_UNUSED(tld); + + size_t psize; + uint8_t* start = mi_segment_raw_page_start(segment, page, &psize); + bool is_zero = false; + const size_t gsize = (MI_SECURE >= 2 ? _mi_os_page_size() : 0); + bool ok = _mi_os_commit(start, psize + gsize, &is_zero); + if (!ok) return false; // failed to commit! + page->is_committed = true; + page->used = 0; + page->free = NULL; + page->is_zero_init = is_zero; + if (gsize > 0) { + mi_segment_protect_range(start + psize, gsize, true); + } + return true; +} + + +/* ----------------------------------------------------------- + The free page queue +----------------------------------------------------------- */ + +// we re-use the `free` field for the expiration counter. Since this is a +// a pointer size field while the clock is always 64-bit we need to guard +// against overflow, we use subtraction to check for expiry which works +// as long as the reset delay is under (2^30 - 1) milliseconds (~12 days) +static uint32_t mi_page_get_expire( mi_page_t* page ) { + return (uint32_t)((uintptr_t)page->free); +} + +static void mi_page_set_expire( mi_page_t* page, uint32_t expire ) { + page->free = (mi_block_t*)((uintptr_t)expire); +} + +static void mi_page_purge_set_expire(mi_page_t* page) { + mi_assert_internal(mi_page_get_expire(page)==0); + uint32_t expire = (uint32_t)_mi_clock_now() + mi_option_get(mi_option_purge_delay); + mi_page_set_expire(page, expire); +} + +// we re-use the `free` field for the expiration counter. Since this is a +// a pointer size field while the clock is always 64-bit we need to guard +// against overflow, we use subtraction to check for expiry which work +// as long as the reset delay is under (2^30 - 1) milliseconds (~12 days) +static bool mi_page_purge_is_expired(mi_page_t* page, mi_msecs_t now) { + int32_t expire = (int32_t)mi_page_get_expire(page); + return (((int32_t)now - expire) >= 0); +} + +static void mi_segment_schedule_purge(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) { + mi_assert_internal(!page->segment_in_use); + mi_assert_internal(mi_page_not_in_queue(page,tld)); + mi_assert_expensive(!mi_pages_purge_contains(page, tld)); + mi_assert_internal(_mi_page_segment(page)==segment); + if (!segment->allow_purge) return; + + if (mi_option_get(mi_option_purge_delay) == 0) { + // purge immediately? + mi_page_purge(segment, page, tld); + } + else if (mi_option_get(mi_option_purge_delay) > 0) { // no purging if the delay is negative + // otherwise push on the delayed page reset queue + mi_page_queue_t* pq = &tld->pages_purge; + // push on top + mi_page_purge_set_expire(page); + page->next = pq->first; + page->prev = NULL; + if (pq->first == NULL) { + mi_assert_internal(pq->last == NULL); + pq->first = pq->last = page; + } + else { + pq->first->prev = page; + pq->first = page; + } + } +} + +static void mi_page_purge_remove(mi_page_t* page, mi_segments_tld_t* tld) { + if (mi_page_not_in_queue(page,tld)) return; + + mi_page_queue_t* pq = &tld->pages_purge; + mi_assert_internal(pq!=NULL); + mi_assert_internal(!page->segment_in_use); + mi_assert_internal(mi_page_get_expire(page) != 0); + mi_assert_internal(mi_pages_purge_contains(page, tld)); + if (page->prev != NULL) page->prev->next = page->next; + if (page->next != NULL) page->next->prev = page->prev; + if (page == pq->last) pq->last = page->prev; + if (page == pq->first) pq->first = page->next; + page->next = page->prev = NULL; + mi_page_set_expire(page,0); +} + +static void mi_segment_remove_all_purges(mi_segment_t* segment, bool force_purge, mi_segments_tld_t* tld) { + if (segment->memid.is_pinned) return; // never reset in huge OS pages + for (size_t i = 0; i < segment->capacity; i++) { + mi_page_t* page = &segment->pages[i]; + if (!page->segment_in_use) { + mi_page_purge_remove(page, tld); + if (force_purge && page->is_committed) { + mi_page_purge(segment, page, tld); + } + } + else { + mi_assert_internal(mi_page_not_in_queue(page,tld)); + } + } +} + +static void mi_pages_try_purge(bool force, mi_segments_tld_t* tld) { + if (mi_option_get(mi_option_purge_delay) < 0) return; // purging is not allowed + + mi_msecs_t now = _mi_clock_now(); + mi_page_queue_t* pq = &tld->pages_purge; + // from oldest up to the first that has not expired yet + mi_page_t* page = pq->last; + while (page != NULL && (force || mi_page_purge_is_expired(page,now))) { + mi_page_t* const prev = page->prev; // save previous field + mi_page_purge_remove(page, tld); // remove from the list to maintain invariant for mi_page_purge + mi_page_purge(_mi_page_segment(page), page, tld); + page = prev; + } + // discard the reset pages from the queue + pq->last = page; + if (page != NULL){ + page->next = NULL; + } + else { + pq->first = NULL; + } +} + + /* ----------------------------------------------------------- Segment size calculations ----------------------------------------------------------- */ -static size_t mi_segment_info_size(mi_segment_t* segment) { - return segment->segment_info_slices * MI_SEGMENT_SLICE_SIZE; +static size_t mi_segment_raw_page_size(const mi_segment_t* segment) { + return (segment->page_kind == MI_PAGE_HUGE ? segment->segment_size : (size_t)1 << segment->page_shift); } -static uint8_t* _mi_segment_page_start_from_slice(const mi_segment_t* segment, const mi_slice_t* slice, size_t block_size, size_t* page_size) -{ - const ptrdiff_t idx = slice - segment->slices; - const size_t psize = (size_t)slice->slice_count * MI_SEGMENT_SLICE_SIZE; - uint8_t* const pstart = (uint8_t*)segment + (idx*MI_SEGMENT_SLICE_SIZE); - // make the start not OS page aligned for smaller blocks to avoid page/cache effects - // note: the offset must always be a block_size multiple since we assume small allocations - // are aligned (see `mi_heap_malloc_aligned`). - size_t start_offset = 0; - if (block_size > 0 && block_size <= MI_MAX_ALIGN_GUARANTEE) { - // for small objects, ensure the page start is aligned with the block size (PR#66 by kickunderscore) - const size_t adjust = block_size - ((uintptr_t)pstart % block_size); - if (adjust < block_size && psize >= block_size + adjust) { - start_offset += adjust; - } +// Raw start of the page available memory; can be used on uninitialized pages (only `segment_idx` must be set) +// The raw start is not taking aligned block allocation into consideration. +static uint8_t* mi_segment_raw_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) { + size_t psize = mi_segment_raw_page_size(segment); + uint8_t* p = (uint8_t*)segment + page->segment_idx * psize; + + if (page->segment_idx == 0) { + // the first page starts after the segment info (and possible guard page) + p += segment->segment_info_size; + psize -= segment->segment_info_size; } - if (block_size >= MI_INTPTR_SIZE) { - if (block_size <= 64) { start_offset += 3*block_size; } - else if (block_size <= 512) { start_offset += block_size; } + +#if (MI_SECURE > 1) // every page has an os guard page + psize -= _mi_os_page_size(); +#elif (MI_SECURE==1) // the last page has an os guard page at the end + if (page->segment_idx == segment->capacity - 1) { + psize -= _mi_os_page_size(); } - start_offset = _mi_align_up(start_offset, MI_MAX_ALIGN_SIZE); - mi_assert_internal(_mi_is_aligned(pstart + start_offset, MI_MAX_ALIGN_SIZE)); - mi_assert_internal(block_size == 0 || block_size > MI_MAX_ALIGN_GUARANTEE || _mi_is_aligned(pstart + start_offset,block_size)); - if (page_size != NULL) { *page_size = psize - start_offset; } - return (pstart + start_offset); +#endif + + if (page_size != NULL) *page_size = psize; + mi_assert_internal(page->block_size == 0 || _mi_ptr_page(p) == page); + mi_assert_internal(_mi_ptr_segment(p) == segment); + return p; } -// Start of the page available memory; can be used on uninitialized pages +// Start of the page available memory; can be used on uninitialized pages (only `segment_idx` must be set) uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) { - const mi_slice_t* slice = mi_page_to_slice((mi_page_t*)page); - uint8_t* p = _mi_segment_page_start_from_slice(segment, slice, mi_page_block_size(page), page_size); - mi_assert_internal(mi_page_block_size(page) > 0 || _mi_ptr_page(p) == page); + size_t psize; + uint8_t* p = mi_segment_raw_page_start(segment, page, &psize); + const size_t block_size = mi_page_block_size(page); + if (/*page->segment_idx == 0 &&*/ block_size > 0 && block_size <= MI_MAX_ALIGN_GUARANTEE) { + // for small and medium objects, ensure the page start is aligned with the block size (PR#66 by kickunderscore) + mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM); + size_t adjust = block_size - ((uintptr_t)p % block_size); + if (adjust < block_size && psize >= block_size + adjust) { + p += adjust; + psize -= adjust; + mi_assert_internal((uintptr_t)p % block_size == 0); + } + } + mi_assert_internal(_mi_is_aligned(p, MI_MAX_ALIGN_SIZE)); + mi_assert_internal(block_size == 0 || block_size > MI_MAX_ALIGN_GUARANTEE || _mi_is_aligned(p,block_size)); + + if (page_size != NULL) *page_size = psize; + mi_assert_internal(_mi_ptr_page(p) == page); mi_assert_internal(_mi_ptr_segment(p) == segment); return p; } -static size_t mi_segment_calculate_slices(size_t required, size_t* info_slices) { - size_t page_size = _mi_os_page_size(); - size_t isize = _mi_align_up(sizeof(mi_segment_t), page_size); +static size_t mi_segment_calculate_sizes(size_t capacity, size_t required, size_t* pre_size, size_t* info_size) +{ + const size_t minsize = sizeof(mi_segment_t) + ((capacity - 1) * sizeof(mi_page_t)) + 16 /* padding */; size_t guardsize = 0; + size_t isize = 0; - if (MI_SECURE>0) { + + if (MI_SECURE == 0) { + // normally no guard pages + #if MI_GUARDED + isize = _mi_align_up(minsize, _mi_os_page_size()); + #else + isize = _mi_align_up(minsize, 16 * MI_MAX_ALIGN_SIZE); + #endif + } + else { // in secure mode, we set up a protected page in between the segment info // and the page data (and one at the end of the segment) + const size_t page_size = _mi_os_page_size(); + isize = _mi_align_up(minsize, page_size); guardsize = page_size; - if (required > 0) { - required = _mi_align_up(required, MI_SEGMENT_SLICE_SIZE) + page_size; - } + //required = _mi_align_up(required, isize + guardsize); } - isize = _mi_align_up(isize + guardsize, MI_SEGMENT_SLICE_SIZE); - if (info_slices != NULL) *info_slices = isize / MI_SEGMENT_SLICE_SIZE; - size_t segment_size = (required==0 ? MI_SEGMENT_SIZE : _mi_align_up( required + isize + guardsize, MI_SEGMENT_SLICE_SIZE) ); - mi_assert_internal(segment_size % MI_SEGMENT_SLICE_SIZE == 0); - return (segment_size / MI_SEGMENT_SLICE_SIZE); + if (info_size != NULL) *info_size = isize; + if (pre_size != NULL) *pre_size = isize + guardsize; + return (required==0 ? MI_SEGMENT_SIZE : _mi_align_up( required + isize + 2*guardsize, MI_PAGE_HUGE_ALIGN) ); } @@ -404,424 +491,43 @@ static void mi_segments_track_size(long segment_size, mi_segments_tld_t* tld) { if (tld->current_size > tld->peak_size) tld->peak_size = tld->current_size; } -static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) { +static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_segments_tld_t* tld) { segment->thread_id = 0; _mi_segment_map_freed_at(segment); - mi_segments_track_size(-((long)mi_segment_size(segment)),tld); + mi_segments_track_size(-((long)segment_size),tld); if (segment->was_reclaimed) { tld->reclaim_count--; segment->was_reclaimed = false; } - if (MI_SECURE>0) { - // _mi_os_unprotect(segment, mi_segment_size(segment)); // ensure no more guard pages are set - // unprotect the guard pages; we cannot just unprotect the whole segment size as part may be decommitted - size_t os_pagesize = _mi_os_page_size(); - _mi_os_unprotect((uint8_t*)segment + mi_segment_info_size(segment) - os_pagesize, os_pagesize); - uint8_t* end = (uint8_t*)segment + mi_segment_size(segment) - os_pagesize; - _mi_os_unprotect(end, os_pagesize); + + if (MI_SECURE != 0) { + mi_assert_internal(!segment->memid.is_pinned); + mi_segment_protect(segment, false); // ensure no more guard pages are set } - // purge delayed decommits now? (no, leave it to the arena) - // mi_segment_try_purge(segment,true,tld->stats); + bool fully_committed = true; + size_t committed_size = 0; + const size_t page_size = mi_segment_raw_page_size(segment); + for (size_t i = 0; i < segment->capacity; i++) { + mi_page_t* page = &segment->pages[i]; + if (page->is_committed) { committed_size += page_size; } + if (!page->is_committed) { fully_committed = false; } + } + MI_UNUSED(fully_committed); + mi_assert_internal((fully_committed && committed_size == segment_size) || (!fully_committed && committed_size < segment_size)); - const size_t size = mi_segment_size(segment); - const size_t csize = _mi_commit_mask_committed_size(&segment->commit_mask, size); - - _mi_arena_free(segment, mi_segment_size(segment), csize, segment->memid); + _mi_arena_free(segment, segment_size, committed_size, segment->memid); } -/* ----------------------------------------------------------- - Commit/Decommit ranges ------------------------------------------------------------ */ - -static void mi_segment_commit_mask(mi_segment_t* segment, bool conservative, uint8_t* p, size_t size, uint8_t** start_p, size_t* full_size, mi_commit_mask_t* cm) { - mi_assert_internal(_mi_ptr_segment(p + 1) == segment); - mi_assert_internal(segment->kind != MI_SEGMENT_HUGE); - mi_commit_mask_create_empty(cm); - if (size == 0 || size > MI_SEGMENT_SIZE || segment->kind == MI_SEGMENT_HUGE) return; - const size_t segstart = mi_segment_info_size(segment); - const size_t segsize = mi_segment_size(segment); - if (p >= (uint8_t*)segment + segsize) return; - - size_t pstart = (p - (uint8_t*)segment); - mi_assert_internal(pstart + size <= segsize); - - size_t start; - size_t end; - if (conservative) { - // decommit conservative - start = _mi_align_up(pstart, MI_COMMIT_SIZE); - end = _mi_align_down(pstart + size, MI_COMMIT_SIZE); - mi_assert_internal(start >= segstart); - mi_assert_internal(end <= segsize); +// called from `heap_collect`. +void _mi_segments_collect(bool force, mi_segments_tld_t* tld) { + mi_pages_try_purge(force,tld); + #if MI_DEBUG>=2 + if (!_mi_is_main_thread()) { + mi_assert_internal(tld->pages_purge.first == NULL); + mi_assert_internal(tld->pages_purge.last == NULL); } - else { - // commit liberal - start = _mi_align_down(pstart, MI_MINIMAL_COMMIT_SIZE); - end = _mi_align_up(pstart + size, MI_MINIMAL_COMMIT_SIZE); - } - if (pstart >= segstart && start < segstart) { // note: the mask is also calculated for an initial commit of the info area - start = segstart; - } - if (end > segsize) { - end = segsize; - } - - mi_assert_internal(start <= pstart && (pstart + size) <= end); - mi_assert_internal(start % MI_COMMIT_SIZE==0 && end % MI_COMMIT_SIZE == 0); - *start_p = (uint8_t*)segment + start; - *full_size = (end > start ? end - start : 0); - if (*full_size == 0) return; - - size_t bitidx = start / MI_COMMIT_SIZE; - mi_assert_internal(bitidx < MI_COMMIT_MASK_BITS); - - size_t bitcount = *full_size / MI_COMMIT_SIZE; // can be 0 - if (bitidx + bitcount > MI_COMMIT_MASK_BITS) { - _mi_warning_message("commit mask overflow: idx=%zu count=%zu start=%zx end=%zx p=0x%p size=%zu fullsize=%zu\n", bitidx, bitcount, start, end, p, size, *full_size); - } - mi_assert_internal((bitidx + bitcount) <= MI_COMMIT_MASK_BITS); - mi_commit_mask_create(bitidx, bitcount, cm); -} - -static bool mi_segment_commit(mi_segment_t* segment, uint8_t* p, size_t size) { - mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->purge_mask)); - - // commit liberal - uint8_t* start = NULL; - size_t full_size = 0; - mi_commit_mask_t mask; - mi_segment_commit_mask(segment, false /* conservative? */, p, size, &start, &full_size, &mask); - if (mi_commit_mask_is_empty(&mask) || full_size == 0) return true; - - if (!mi_commit_mask_all_set(&segment->commit_mask, &mask)) { - // committing - bool is_zero = false; - mi_commit_mask_t cmask; - mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask); - _mi_stat_decrease(&_mi_stats_main.committed, _mi_commit_mask_committed_size(&cmask, MI_SEGMENT_SIZE)); // adjust for overlap - if (!_mi_os_commit(start, full_size, &is_zero)) return false; - mi_commit_mask_set(&segment->commit_mask, &mask); - } - - // increase purge expiration when using part of delayed purges -- we assume more allocations are coming soon. - if (mi_commit_mask_any_set(&segment->purge_mask, &mask)) { - segment->purge_expire = _mi_clock_now() + mi_option_get(mi_option_purge_delay); - } - - // always clear any delayed purges in our range (as they are either committed now) - mi_commit_mask_clear(&segment->purge_mask, &mask); - return true; -} - -static bool mi_segment_ensure_committed(mi_segment_t* segment, uint8_t* p, size_t size) { - mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->purge_mask)); - // note: assumes commit_mask is always full for huge segments as otherwise the commit mask bits can overflow - if (mi_commit_mask_is_full(&segment->commit_mask) && mi_commit_mask_is_empty(&segment->purge_mask)) return true; // fully committed - mi_assert_internal(segment->kind != MI_SEGMENT_HUGE); - return mi_segment_commit(segment, p, size); -} - -static bool mi_segment_purge(mi_segment_t* segment, uint8_t* p, size_t size) { - mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->purge_mask)); - if (!segment->allow_purge) return true; - - // purge conservative - uint8_t* start = NULL; - size_t full_size = 0; - mi_commit_mask_t mask; - mi_segment_commit_mask(segment, true /* conservative? */, p, size, &start, &full_size, &mask); - if (mi_commit_mask_is_empty(&mask) || full_size==0) return true; - - if (mi_commit_mask_any_set(&segment->commit_mask, &mask)) { - // purging - mi_assert_internal((void*)start != (void*)segment); - mi_assert_internal(segment->allow_decommit); - const bool decommitted = _mi_os_purge(start, full_size); // reset or decommit - if (decommitted) { - mi_commit_mask_t cmask; - mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask); - _mi_stat_increase(&_mi_stats_main.committed, full_size - _mi_commit_mask_committed_size(&cmask, MI_SEGMENT_SIZE)); // adjust for double counting - mi_commit_mask_clear(&segment->commit_mask, &mask); - } - } - - // always clear any scheduled purges in our range - mi_commit_mask_clear(&segment->purge_mask, &mask); - return true; -} - -static void mi_segment_schedule_purge(mi_segment_t* segment, uint8_t* p, size_t size) { - if (!segment->allow_purge) return; - - if (mi_option_get(mi_option_purge_delay) == 0) { - mi_segment_purge(segment, p, size); - } - else { - // register for future purge in the purge mask - uint8_t* start = NULL; - size_t full_size = 0; - mi_commit_mask_t mask; - mi_segment_commit_mask(segment, true /*conservative*/, p, size, &start, &full_size, &mask); - if (mi_commit_mask_is_empty(&mask) || full_size==0) return; - - // update delayed commit - mi_assert_internal(segment->purge_expire > 0 || mi_commit_mask_is_empty(&segment->purge_mask)); - mi_commit_mask_t cmask; - mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask); // only purge what is committed; span_free may try to decommit more - mi_commit_mask_set(&segment->purge_mask, &cmask); - mi_msecs_t now = _mi_clock_now(); - if (segment->purge_expire == 0) { - // no previous purgess, initialize now - segment->purge_expire = now + mi_option_get(mi_option_purge_delay); - } - else if (segment->purge_expire <= now) { - // previous purge mask already expired - if (segment->purge_expire + mi_option_get(mi_option_purge_extend_delay) <= now) { - mi_segment_try_purge(segment, true); - } - else { - segment->purge_expire = now + mi_option_get(mi_option_purge_extend_delay); // (mi_option_get(mi_option_purge_delay) / 8); // wait a tiny bit longer in case there is a series of free's - } - } - else { - // previous purge mask is not yet expired, increase the expiration by a bit. - segment->purge_expire += mi_option_get(mi_option_purge_extend_delay); - } - } -} - -static void mi_segment_try_purge(mi_segment_t* segment, bool force) { - if (!segment->allow_purge || segment->purge_expire == 0 || mi_commit_mask_is_empty(&segment->purge_mask)) return; - mi_msecs_t now = _mi_clock_now(); - if (!force && now < segment->purge_expire) return; - - mi_commit_mask_t mask = segment->purge_mask; - segment->purge_expire = 0; - mi_commit_mask_create_empty(&segment->purge_mask); - - size_t idx; - size_t count; - mi_commit_mask_foreach(&mask, idx, count) { - // if found, decommit that sequence - if (count > 0) { - uint8_t* p = (uint8_t*)segment + (idx*MI_COMMIT_SIZE); - size_t size = count * MI_COMMIT_SIZE; - mi_segment_purge(segment, p, size); - } - } - mi_commit_mask_foreach_end() - mi_assert_internal(mi_commit_mask_is_empty(&segment->purge_mask)); -} - -// called from `mi_heap_collect_ex` -// this can be called per-page so it is important that try_purge has fast exit path -void _mi_segment_collect(mi_segment_t* segment, bool force) { - mi_segment_try_purge(segment, force); -} - -/* ----------------------------------------------------------- - Span free ------------------------------------------------------------ */ - -static bool mi_segment_is_abandoned(mi_segment_t* segment) { - return (mi_atomic_load_relaxed(&segment->thread_id) == 0); -} - -// note: can be called on abandoned segments -static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size_t slice_count, bool allow_purge, mi_segments_tld_t* tld) { - mi_assert_internal(slice_index < segment->slice_entries); - mi_span_queue_t* sq = (segment->kind == MI_SEGMENT_HUGE || mi_segment_is_abandoned(segment) - ? NULL : mi_span_queue_for(slice_count,tld)); - if (slice_count==0) slice_count = 1; - mi_assert_internal(slice_index + slice_count - 1 < segment->slice_entries); - - // set first and last slice (the intermediates can be undetermined) - mi_slice_t* slice = &segment->slices[slice_index]; - slice->slice_count = (uint32_t)slice_count; - mi_assert_internal(slice->slice_count == slice_count); // no overflow? - slice->slice_offset = 0; - if (slice_count > 1) { - mi_slice_t* last = slice + slice_count - 1; - mi_slice_t* end = (mi_slice_t*)mi_segment_slices_end(segment); - if (last > end) { last = end; } - last->slice_count = 0; - last->slice_offset = (uint32_t)(sizeof(mi_page_t)*(slice_count - 1)); - last->block_size = 0; - } - - // perhaps decommit - if (allow_purge) { - mi_segment_schedule_purge(segment, mi_slice_start(slice), slice_count * MI_SEGMENT_SLICE_SIZE); - } - - // and push it on the free page queue (if it was not a huge page) - if (sq != NULL) mi_span_queue_push( sq, slice ); - else slice->block_size = 0; // mark huge page as free anyways -} - -/* -// called from reclaim to add existing free spans -static void mi_segment_span_add_free(mi_slice_t* slice, mi_segments_tld_t* tld) { - mi_segment_t* segment = _mi_ptr_segment(slice); - mi_assert_internal(slice->xblock_size==0 && slice->slice_count>0 && slice->slice_offset==0); - size_t slice_index = mi_slice_index(slice); - mi_segment_span_free(segment,slice_index,slice->slice_count,tld); -} -*/ - -static void mi_segment_span_remove_from_queue(mi_slice_t* slice, mi_segments_tld_t* tld) { - mi_assert_internal(slice->slice_count > 0 && slice->slice_offset==0 && slice->block_size==0); - mi_assert_internal(_mi_ptr_segment(slice)->kind != MI_SEGMENT_HUGE); - mi_span_queue_t* sq = mi_span_queue_for(slice->slice_count, tld); - mi_span_queue_delete(sq, slice); -} - -// note: can be called on abandoned segments -static mi_slice_t* mi_segment_span_free_coalesce(mi_slice_t* slice, mi_segments_tld_t* tld) { - mi_assert_internal(slice != NULL && slice->slice_count > 0 && slice->slice_offset == 0); - mi_segment_t* const segment = _mi_ptr_segment(slice); - - // for huge pages, just mark as free but don't add to the queues - if (segment->kind == MI_SEGMENT_HUGE) { - // issue #691: segment->used can be 0 if the huge page block was freed while abandoned (reclaim will get here in that case) - mi_assert_internal((segment->used==0 && slice->block_size==0) || segment->used == 1); // decreased right after this call in `mi_segment_page_clear` - slice->block_size = 0; // mark as free anyways - // we should mark the last slice `xblock_size=0` now to maintain invariants but we skip it to - // avoid a possible cache miss (and the segment is about to be freed) - return slice; - } - - // otherwise coalesce the span and add to the free span queues - const bool is_abandoned = (segment->thread_id == 0); // mi_segment_is_abandoned(segment); - size_t slice_count = slice->slice_count; - mi_slice_t* next = slice + slice->slice_count; - mi_assert_internal(next <= mi_segment_slices_end(segment)); - if (next < mi_segment_slices_end(segment) && next->block_size==0) { - // free next block -- remove it from free and merge - mi_assert_internal(next->slice_count > 0 && next->slice_offset==0); - slice_count += next->slice_count; // extend - if (!is_abandoned) { mi_segment_span_remove_from_queue(next, tld); } - } - if (slice > segment->slices) { - mi_slice_t* prev = mi_slice_first(slice - 1); - mi_assert_internal(prev >= segment->slices); - if (prev->block_size==0) { - // free previous slice -- remove it from free and merge - mi_assert_internal(prev->slice_count > 0 && prev->slice_offset==0); - slice_count += prev->slice_count; - slice->slice_count = 0; - slice->slice_offset = (uint32_t)((uint8_t*)slice - (uint8_t*)prev); // set the slice offset for `segment_force_abandon` (in case the previous free block is very large). - if (!is_abandoned) { mi_segment_span_remove_from_queue(prev, tld); } - slice = prev; - } - } - - // and add the new free page - mi_segment_span_free(segment, mi_slice_index(slice), slice_count, true, tld); - return slice; -} - - - -/* ----------------------------------------------------------- - Page allocation ------------------------------------------------------------ */ - -// Note: may still return NULL if committing the memory failed -static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_index, size_t slice_count) { - mi_assert_internal(slice_index < segment->slice_entries); - mi_slice_t* const slice = &segment->slices[slice_index]; - mi_assert_internal(slice->block_size==0 || slice->block_size==1); - - // commit before changing the slice data - if (!mi_segment_ensure_committed(segment, _mi_segment_page_start_from_slice(segment, slice, 0, NULL), slice_count * MI_SEGMENT_SLICE_SIZE)) { - return NULL; // commit failed! - } - - // convert the slices to a page - slice->slice_offset = 0; - slice->slice_count = (uint32_t)slice_count; - mi_assert_internal(slice->slice_count == slice_count); - const size_t bsize = slice_count * MI_SEGMENT_SLICE_SIZE; - slice->block_size = bsize; - mi_page_t* page = mi_slice_to_page(slice); - mi_assert_internal(mi_page_block_size(page) == bsize); - - // set slice back pointers for the first MI_MAX_SLICE_OFFSET_COUNT entries - size_t extra = slice_count-1; - if (extra > MI_MAX_SLICE_OFFSET_COUNT) extra = MI_MAX_SLICE_OFFSET_COUNT; - if (slice_index + extra >= segment->slice_entries) extra = segment->slice_entries - slice_index - 1; // huge objects may have more slices than avaiable entries in the segment->slices - - mi_slice_t* slice_next = slice + 1; - for (size_t i = 1; i <= extra; i++, slice_next++) { - slice_next->slice_offset = (uint32_t)(sizeof(mi_slice_t)*i); - slice_next->slice_count = 0; - slice_next->block_size = 1; - } - - // and also for the last one (if not set already) (the last one is needed for coalescing and for large alignments) - // note: the cast is needed for ubsan since the index can be larger than MI_SLICES_PER_SEGMENT for huge allocations (see #543) - mi_slice_t* last = slice + slice_count - 1; - mi_slice_t* end = (mi_slice_t*)mi_segment_slices_end(segment); - if (last > end) last = end; - if (last > slice) { - last->slice_offset = (uint32_t)(sizeof(mi_slice_t) * (last - slice)); - last->slice_count = 0; - last->block_size = 1; - } - - // and initialize the page - page->is_committed = true; - page->is_huge = (segment->kind == MI_SEGMENT_HUGE); - segment->used++; - return page; -} - -static void mi_segment_slice_split(mi_segment_t* segment, mi_slice_t* slice, size_t slice_count, mi_segments_tld_t* tld) { - mi_assert_internal(_mi_ptr_segment(slice) == segment); - mi_assert_internal(slice->slice_count >= slice_count); - mi_assert_internal(slice->block_size > 0); // no more in free queue - if (slice->slice_count <= slice_count) return; - mi_assert_internal(segment->kind != MI_SEGMENT_HUGE); - size_t next_index = mi_slice_index(slice) + slice_count; - size_t next_count = slice->slice_count - slice_count; - mi_segment_span_free(segment, next_index, next_count, false /* don't purge left-over part */, tld); - slice->slice_count = (uint32_t)slice_count; -} - -static mi_page_t* mi_segments_page_find_and_allocate(size_t slice_count, mi_arena_id_t req_arena_id, mi_segments_tld_t* tld) { - mi_assert_internal(slice_count*MI_SEGMENT_SLICE_SIZE <= MI_LARGE_OBJ_SIZE_MAX); - // search from best fit up - mi_span_queue_t* sq = mi_span_queue_for(slice_count, tld); - if (slice_count == 0) slice_count = 1; - while (sq <= &tld->spans[MI_SEGMENT_BIN_MAX]) { - for (mi_slice_t* slice = sq->first; slice != NULL; slice = slice->next) { - if (slice->slice_count >= slice_count) { - // found one - mi_segment_t* segment = _mi_ptr_segment(slice); - if (_mi_arena_memid_is_suitable(segment->memid, req_arena_id)) { - // found a suitable page span - mi_span_queue_delete(sq, slice); - - if (slice->slice_count > slice_count) { - mi_segment_slice_split(segment, slice, slice_count, tld); - } - mi_assert_internal(slice != NULL && slice->slice_count == slice_count && slice->block_size > 0); - mi_page_t* page = mi_segment_span_allocate(segment, mi_slice_index(slice), slice->slice_count); - if (page == NULL) { - // commit failed; return NULL but first restore the slice - mi_segment_span_free_coalesce(slice, tld); - return NULL; - } - return page; - } - } - } - sq++; - } - // could not find a page.. - return NULL; + #endif } @@ -829,223 +535,215 @@ static mi_page_t* mi_segments_page_find_and_allocate(size_t slice_count, mi_aren Segment allocation ----------------------------------------------------------- */ -static mi_segment_t* mi_segment_os_alloc( size_t required, size_t page_alignment, bool eager_delayed, mi_arena_id_t req_arena_id, - size_t* psegment_slices, size_t* pinfo_slices, - bool commit, mi_segments_tld_t* tld) - +static mi_segment_t* mi_segment_os_alloc(bool eager_delayed, size_t page_alignment, mi_arena_id_t req_arena_id, + size_t pre_size, size_t info_size, bool commit, size_t segment_size, + mi_segments_tld_t* tld) { mi_memid_t memid; bool allow_large = (!eager_delayed && (MI_SECURE == 0)); // only allow large OS pages once we are no longer lazy size_t align_offset = 0; - size_t alignment = MI_SEGMENT_ALIGN; - + size_t alignment = MI_SEGMENT_SIZE; if (page_alignment > 0) { - // mi_assert_internal(huge_page != NULL); - mi_assert_internal(page_alignment >= MI_SEGMENT_ALIGN); alignment = page_alignment; - const size_t info_size = (*pinfo_slices) * MI_SEGMENT_SLICE_SIZE; - align_offset = _mi_align_up( info_size, MI_SEGMENT_ALIGN ); - const size_t extra = align_offset - info_size; - // recalculate due to potential guard pages - *psegment_slices = mi_segment_calculate_slices(required + extra, pinfo_slices); - mi_assert_internal(*psegment_slices > 0 && *psegment_slices <= UINT32_MAX); + align_offset = _mi_align_up(pre_size, MI_SEGMENT_SIZE); + segment_size = segment_size + (align_offset - pre_size); // adjust the segment size } - const size_t segment_size = (*psegment_slices) * MI_SEGMENT_SLICE_SIZE; mi_segment_t* segment = (mi_segment_t*)_mi_arena_alloc_aligned(segment_size, alignment, align_offset, commit, allow_large, req_arena_id, &memid); if (segment == NULL) { return NULL; // failed to allocate } - // ensure metadata part of the segment is committed - mi_commit_mask_t commit_mask; - if (memid.initially_committed) { - mi_commit_mask_create_full(&commit_mask); - } - else { - // at least commit the info slices - const size_t commit_needed = _mi_divide_up((*pinfo_slices)*MI_SEGMENT_SLICE_SIZE, MI_COMMIT_SIZE); - mi_assert_internal(commit_needed>0); - mi_commit_mask_create(0, commit_needed, &commit_mask); - mi_assert_internal(commit_needed*MI_COMMIT_SIZE >= (*pinfo_slices)*MI_SEGMENT_SLICE_SIZE); - if (!_mi_os_commit(segment, commit_needed*MI_COMMIT_SIZE, NULL)) { - _mi_arena_free(segment,segment_size,0,memid); + if (!memid.initially_committed) { + // ensure the initial info is committed + mi_assert_internal(!memid.is_pinned); + bool ok = _mi_os_commit(segment, pre_size, NULL); + if (!ok) { + // commit failed; we cannot touch the memory: free the segment directly and return `NULL` + _mi_arena_free(segment, segment_size, 0, memid); return NULL; } } - mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0); + MI_UNUSED(info_size); segment->memid = memid; segment->allow_decommit = !memid.is_pinned; segment->allow_purge = segment->allow_decommit && (mi_option_get(mi_option_purge_delay) >= 0); segment->segment_size = segment_size; segment->subproc = tld->subproc; - segment->commit_mask = commit_mask; - segment->purge_expire = 0; - mi_commit_mask_create_empty(&segment->purge_mask); - mi_segments_track_size((long)(segment_size), tld); _mi_segment_map_allocated_at(segment); return segment; } - // Allocate a segment from the OS aligned to `MI_SEGMENT_SIZE` . -static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi_arena_id_t req_arena_id, mi_segments_tld_t* tld, mi_page_t** huge_page) +static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, size_t page_shift, size_t page_alignment, + mi_arena_id_t req_arena_id, mi_segments_tld_t* tld) { - mi_assert_internal((required==0 && huge_page==NULL) || (required>0 && huge_page != NULL)); + // required is only > 0 for huge page allocations + mi_assert_internal((required > 0 && page_kind > MI_PAGE_LARGE)|| (required==0 && page_kind <= MI_PAGE_LARGE)); // calculate needed sizes first - size_t info_slices; - size_t segment_slices = mi_segment_calculate_slices(required, &info_slices); - mi_assert_internal(segment_slices > 0 && segment_slices <= UINT32_MAX); - - // Commit eagerly only if not the first N lazy segments (to reduce impact of many threads that allocate just a little) - const bool eager_delay = (// !_mi_os_has_overcommit() && // never delay on overcommit systems - _mi_current_thread_count() > 1 && // do not delay for the first N threads - tld->peak_count < (size_t)mi_option_get(mi_option_eager_commit_delay)); - const bool eager = !eager_delay && mi_option_is_enabled(mi_option_eager_commit); - bool commit = eager || (required > 0); - - // Allocate the segment from the OS - mi_segment_t* segment = mi_segment_os_alloc(required, page_alignment, eager_delay, req_arena_id, - &segment_slices, &info_slices, commit, tld); - if (segment == NULL) return NULL; - - // zero the segment info? -- not always needed as it may be zero initialized from the OS - if (!segment->memid.initially_zero) { - ptrdiff_t ofs = offsetof(mi_segment_t, next); - size_t prefix = offsetof(mi_segment_t, slices) - ofs; - size_t zsize = prefix + (sizeof(mi_slice_t) * (segment_slices + 1)); // one more - _mi_memzero((uint8_t*)segment + ofs, zsize); - } - - // initialize the rest of the segment info - const size_t slice_entries = (segment_slices > MI_SLICES_PER_SEGMENT ? MI_SLICES_PER_SEGMENT : segment_slices); - segment->segment_slices = segment_slices; - segment->segment_info_slices = info_slices; - segment->thread_id = _mi_thread_id(); - segment->cookie = _mi_ptr_cookie(segment); - segment->slice_entries = slice_entries; - segment->kind = (required == 0 ? MI_SEGMENT_NORMAL : MI_SEGMENT_HUGE); - - // _mi_memzero(segment->slices, sizeof(mi_slice_t)*(info_slices+1)); - _mi_stat_increase(&tld->stats->page_committed, mi_segment_info_size(segment)); - - // set up guard pages - size_t guard_slices = 0; - if (MI_SECURE>0) { - // in secure mode, we set up a protected page in between the segment info - // and the page data, and at the end of the segment. - size_t os_pagesize = _mi_os_page_size(); - _mi_os_protect((uint8_t*)segment + mi_segment_info_size(segment) - os_pagesize, os_pagesize); - uint8_t* end = (uint8_t*)segment + mi_segment_size(segment) - os_pagesize; - mi_segment_ensure_committed(segment, end, os_pagesize); - _mi_os_protect(end, os_pagesize); - if (slice_entries == segment_slices) segment->slice_entries--; // don't use the last slice :-( - guard_slices = 1; - } - - // reserve first slices for segment info - mi_page_t* page0 = mi_segment_span_allocate(segment, 0, info_slices); - mi_assert_internal(page0!=NULL); if (page0==NULL) return NULL; // cannot fail as we always commit in advance - mi_assert_internal(segment->used == 1); - segment->used = 0; // don't count our internal slices towards usage - - // initialize initial free pages - if (segment->kind == MI_SEGMENT_NORMAL) { // not a huge page - mi_assert_internal(huge_page==NULL); - mi_segment_span_free(segment, info_slices, segment->slice_entries - info_slices, false /* don't purge */, tld); + size_t capacity; + if (page_kind == MI_PAGE_HUGE) { + mi_assert_internal(page_shift == MI_SEGMENT_SHIFT + 1 && required > 0); + capacity = 1; } else { - mi_assert_internal(huge_page!=NULL); - mi_assert_internal(mi_commit_mask_is_empty(&segment->purge_mask)); - mi_assert_internal(mi_commit_mask_is_full(&segment->commit_mask)); - *huge_page = mi_segment_span_allocate(segment, info_slices, segment_slices - info_slices - guard_slices); - mi_assert_internal(*huge_page != NULL); // cannot fail as we commit in advance + mi_assert_internal(required == 0 && page_alignment == 0); + size_t page_size = (size_t)1 << page_shift; + capacity = MI_SEGMENT_SIZE / page_size; + mi_assert_internal(MI_SEGMENT_SIZE % page_size == 0); + mi_assert_internal(capacity >= 1 && capacity <= MI_SMALL_PAGES_PER_SEGMENT); + } + size_t info_size; + size_t pre_size; + const size_t init_segment_size = mi_segment_calculate_sizes(capacity, required, &pre_size, &info_size); + mi_assert_internal(init_segment_size >= required); + + // Initialize parameters + const bool eager_delayed = (page_kind <= MI_PAGE_MEDIUM && // don't delay for large objects + // !_mi_os_has_overcommit() && // never delay on overcommit systems + _mi_current_thread_count() > 1 && // do not delay for the first N threads + tld->peak_count < (size_t)mi_option_get(mi_option_eager_commit_delay)); + const bool eager = !eager_delayed && mi_option_is_enabled(mi_option_eager_commit); + const bool init_commit = eager; // || (page_kind >= MI_PAGE_LARGE); + + // Allocate the segment from the OS (segment_size can change due to alignment) + mi_segment_t* segment = mi_segment_os_alloc(eager_delayed, page_alignment, req_arena_id, pre_size, info_size, init_commit, init_segment_size, tld); + if (segment == NULL) return NULL; + mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0); + mi_assert_internal(segment->memid.is_pinned ? segment->memid.initially_committed : true); + + // zero the segment info (but not the `mem` fields) + ptrdiff_t ofs = offsetof(mi_segment_t, next); + _mi_memzero((uint8_t*)segment + ofs, info_size - ofs); + + // initialize pages info + const bool is_huge = (page_kind == MI_PAGE_HUGE); + for (size_t i = 0; i < capacity; i++) { + mi_assert_internal(i <= 255); + segment->pages[i].segment_idx = (uint8_t)i; + segment->pages[i].is_committed = segment->memid.initially_committed; + segment->pages[i].is_zero_init = segment->memid.initially_zero; + segment->pages[i].is_huge = is_huge; + } + + // initialize + segment->page_kind = page_kind; + segment->capacity = capacity; + segment->page_shift = page_shift; + segment->segment_info_size = pre_size; + segment->thread_id = _mi_thread_id(); + segment->cookie = _mi_ptr_cookie(segment); + + // set protection + mi_segment_protect(segment, true); + + // insert in free lists for small and medium pages + if (page_kind <= MI_PAGE_MEDIUM) { + mi_segment_insert_in_free_queue(segment, tld); } - mi_assert_expensive(mi_segment_is_valid(segment,tld)); return segment; } static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t* tld) { MI_UNUSED(force); - mi_assert_internal(segment != NULL); - mi_assert_internal(segment->next == NULL); - mi_assert_internal(segment->used == 0); + mi_assert(segment != NULL); // in `mi_segment_force_abandon` we set this to true to ensure the segment's memory stays valid if (segment->dont_free) return; - // Remove the free pages - mi_slice_t* slice = &segment->slices[0]; - const mi_slice_t* end = mi_segment_slices_end(segment); - #if MI_DEBUG>1 - size_t page_count = 0; - #endif - while (slice < end) { - mi_assert_internal(slice->slice_count > 0); - mi_assert_internal(slice->slice_offset == 0); - mi_assert_internal(mi_slice_index(slice)==0 || slice->block_size == 0); // no more used pages .. - if (slice->block_size == 0 && segment->kind != MI_SEGMENT_HUGE) { - mi_segment_span_remove_from_queue(slice, tld); - } - #if MI_DEBUG>1 - page_count++; - #endif - slice = slice + slice->slice_count; - } - mi_assert_internal(page_count == 2); // first page is allocated by the segment itself + // don't purge as we are freeing now + mi_segment_remove_all_purges(segment, false /* don't force as we are about to free */, tld); + mi_segment_remove_from_free_queue(segment, tld); - // stats - // _mi_stat_decrease(&tld->stats->page_committed, mi_segment_info_size(segment)); + mi_assert_expensive(!mi_segment_queue_contains(&tld->small_free, segment)); + mi_assert_expensive(!mi_segment_queue_contains(&tld->medium_free, segment)); + mi_assert(segment->next == NULL); + mi_assert(segment->prev == NULL); + // _mi_stat_decrease(&tld->stats->page_committed, segment->segment_info_size); // return it to the OS - mi_segment_os_free(segment, tld); + mi_segment_os_free(segment, segment->segment_size, tld); +} + +/* ----------------------------------------------------------- + Free page management inside a segment +----------------------------------------------------------- */ + + +static bool mi_segment_has_free(const mi_segment_t* segment) { + return (segment->used < segment->capacity); +} + +static bool mi_segment_page_claim(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) { + mi_assert_internal(_mi_page_segment(page) == segment); + mi_assert_internal(!page->segment_in_use); + mi_page_purge_remove(page, tld); + + // check commit + if (!mi_page_ensure_committed(segment, page, tld)) return false; + + // set in-use before doing unreset to prevent delayed reset + page->segment_in_use = true; + segment->used++; + mi_assert_internal(page->segment_in_use && page->is_committed && page->used==0 && !mi_pages_purge_contains(page,tld)); + mi_assert_internal(segment->used <= segment->capacity); + if (segment->used == segment->capacity && segment->page_kind <= MI_PAGE_MEDIUM) { + // if no more free pages, remove from the queue + mi_assert_internal(!mi_segment_has_free(segment)); + mi_segment_remove_from_free_queue(segment, tld); + } + return true; } /* ----------------------------------------------------------- - Page Free + Free ----------------------------------------------------------- */ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld); -// note: can be called on abandoned pages -static mi_slice_t* mi_segment_page_clear(mi_page_t* page, mi_segments_tld_t* tld) { - mi_assert_internal(page->block_size > 0); +// clear page data; can be called on abandoned segments +static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) +{ + mi_assert_internal(page->segment_in_use); mi_assert_internal(mi_page_all_free(page)); - mi_segment_t* segment = _mi_ptr_segment(page); - mi_assert_internal(segment->used > 0); + mi_assert_internal(page->is_committed); + mi_assert_internal(mi_page_not_in_queue(page, tld)); size_t inuse = page->capacity * mi_page_block_size(page); _mi_stat_decrease(&tld->stats->page_committed, inuse); _mi_stat_decrease(&tld->stats->pages, 1); - _mi_stat_decrease(&tld->stats->page_bins[_mi_page_bin(page)], 1); - // reset the page memory to reduce memory pressure? - if (segment->allow_decommit && mi_option_is_enabled(mi_option_deprecated_page_reset)) { - size_t psize; - uint8_t* start = _mi_segment_page_start(segment, page, &psize); - _mi_os_reset(start, psize); - } - - // zero the page data, but not the segment fields and heap tag page->is_zero_init = false; - uint8_t heap_tag = page->heap_tag; - ptrdiff_t ofs = offsetof(mi_page_t, capacity); - _mi_memzero((uint8_t*)page + ofs, sizeof(*page) - ofs); - page->block_size = 1; - page->heap_tag = heap_tag; + page->segment_in_use = false; - // and free it - mi_slice_t* slice = mi_segment_span_free_coalesce(mi_page_to_slice(page), tld); + // zero the page data, but not the segment fields and capacity, page start, and block_size (for page size calculations) + size_t block_size = page->block_size; + uint8_t block_size_shift = page->block_size_shift; + uint8_t heap_tag = page->heap_tag; + uint8_t* page_start = page->page_start; + uint16_t capacity = page->capacity; + uint16_t reserved = page->reserved; + ptrdiff_t ofs = offsetof(mi_page_t,capacity); + _mi_memzero((uint8_t*)page + ofs, sizeof(*page) - ofs); + page->capacity = capacity; + page->reserved = reserved; + page->block_size = block_size; + page->block_size_shift = block_size_shift; + page->heap_tag = heap_tag; + page->page_start = page_start; segment->used--; - // cannot assert segment valid as it is called during reclaim - // mi_assert_expensive(mi_segment_is_valid(segment, tld)); - return slice; + + // schedule purge + mi_segment_schedule_purge(segment, page, tld); + + page->capacity = 0; // after purge these can be zero'd now + page->reserved = 0; } void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld) @@ -1053,22 +751,27 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld) mi_assert(page != NULL); mi_segment_t* segment = _mi_page_segment(page); mi_assert_expensive(mi_segment_is_valid(segment,tld)); + mi_pages_try_purge(false /*force?*/, tld); // mark it as free now - mi_segment_page_clear(page, tld); - mi_assert_expensive(mi_segment_is_valid(segment, tld)); + mi_segment_page_clear(segment, page, tld); if (segment->used == 0) { // no more used pages; remove from the free list and free the segment mi_segment_free(segment, force, tld); } - else if (segment->used == segment->abandoned) { - // only abandoned pages; remove from free list and abandon - mi_segment_abandon(segment,tld); - } else { - // perform delayed purges - mi_segment_try_purge(segment, false /* force? */); + if (segment->used == segment->abandoned) { + // only abandoned pages; remove from free list and abandon + mi_segment_abandon(segment,tld); + } + else if (segment->used + 1 == segment->capacity) { + mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM); // large and huge pages are always the single page in a segment + if (segment->page_kind <= MI_PAGE_MEDIUM) { + // move back to segments free list + mi_segment_insert_in_free_queue(segment,tld); + } + } } } @@ -1077,7 +780,7 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld) Abandonment When threads terminate, they can leave segments with -live blocks (reachable through other threads). Such segments +live blocks (reached through other threads). Such segments are "abandoned" and will be reclaimed by other threads to reuse their pages and/or free them eventually. The `thread_id` of such segments is 0. @@ -1098,33 +801,22 @@ by scanning the arena memory static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(segment->used == segment->abandoned); mi_assert_internal(segment->used > 0); - mi_assert_internal(segment->abandoned_visits == 0); - mi_assert_expensive(mi_segment_is_valid(segment,tld)); + mi_assert_expensive(mi_segment_is_valid(segment, tld)); - // remove the free pages from the free page queues - mi_slice_t* slice = &segment->slices[0]; - const mi_slice_t* end = mi_segment_slices_end(segment); - while (slice < end) { - mi_assert_internal(slice->slice_count > 0); - mi_assert_internal(slice->slice_offset == 0); - if (slice->block_size == 0) { // a free page - mi_segment_span_remove_from_queue(slice,tld); - slice->block_size = 0; // but keep it free - } - slice = slice + slice->slice_count; - } + // Potentially force purge. Only abandoned segments in arena memory can be + // reclaimed without a free so if a segment is not from an arena we force purge here to be conservative. + mi_pages_try_purge(false /*force?*/,tld); + const bool force_purge = (segment->memid.memkind != MI_MEM_ARENA) || mi_option_is_enabled(mi_option_abandoned_page_purge); + mi_segment_remove_all_purges(segment, force_purge, tld); - // perform delayed decommits (forcing is much slower on mstress) - // Only abandoned segments in arena memory can be reclaimed without a free - // so if a segment is not from an arena we force purge here to be conservative. - const bool force_purge = (segment->memid.memkind != MI_MEM_ARENA) || mi_option_is_enabled(mi_option_abandoned_page_purge); - mi_segment_try_purge(segment, force_purge); + // remove the segment from the free page queue if needed + mi_segment_remove_from_free_queue(segment, tld); + mi_assert_internal(segment->next == NULL && segment->prev == NULL); // all pages in the segment are abandoned; add it to the abandoned list _mi_stat_increase(&tld->stats->segments_abandoned, 1); - mi_segments_track_size(-((long)mi_segment_size(segment)), tld); - segment->thread_id = 0; - segment->abandoned_visits = 1; // from 0 to 1 to signify it is abandoned + mi_segments_track_size(-((long)segment->segment_size), tld); + segment->abandoned_visits = 0; if (segment->was_reclaimed) { tld->reclaim_count--; segment->was_reclaimed = false; @@ -1137,10 +829,9 @@ void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) { mi_assert_internal(mi_page_thread_free_flag(page)==MI_NEVER_DELAYED_FREE); mi_assert_internal(mi_page_heap(page) == NULL); mi_segment_t* segment = _mi_page_segment(page); - - mi_assert_expensive(mi_segment_is_valid(segment,tld)); + mi_assert_expensive(!mi_pages_purge_contains(page, tld)); + mi_assert_expensive(mi_segment_is_valid(segment, tld)); segment->abandoned++; - _mi_stat_increase(&tld->stats->pages_abandoned, 1); mi_assert_internal(segment->abandoned <= segment->used); if (segment->used == segment->abandoned) { @@ -1153,40 +844,24 @@ void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) { Reclaim abandoned pages ----------------------------------------------------------- */ -static mi_slice_t* mi_slices_start_iterate(mi_segment_t* segment, const mi_slice_t** end) { - mi_slice_t* slice = &segment->slices[0]; - *end = mi_segment_slices_end(segment); - mi_assert_internal(slice->slice_count>0 && slice->block_size>0); // segment allocated page - slice = slice + slice->slice_count; // skip the first segment allocated page - return slice; -} - -// Possibly free pages and check if free space is available -static bool mi_segment_check_free(mi_segment_t* segment, size_t slices_needed, size_t block_size, mi_segments_tld_t* tld) +// Possibly clear pages and check if free space is available +static bool mi_segment_check_free(mi_segment_t* segment, size_t block_size, bool* all_pages_free) { - mi_assert_internal(mi_segment_is_abandoned(segment)); + mi_assert_internal(mi_atomic_load_relaxed(&segment->thread_id) == 0); bool has_page = false; - - // for all slices - const mi_slice_t* end; - mi_slice_t* slice = mi_slices_start_iterate(segment, &end); - while (slice < end) { - mi_assert_internal(slice->slice_count > 0); - mi_assert_internal(slice->slice_offset == 0); - if (mi_slice_is_used(slice)) { // used page + size_t pages_used = 0; + size_t pages_used_empty = 0; + for (size_t i = 0; i < segment->capacity; i++) { + mi_page_t* page = &segment->pages[i]; + if (page->segment_in_use) { + pages_used++; // ensure used count is up to date and collect potential concurrent frees - mi_page_t* const page = mi_slice_to_page(slice); _mi_page_free_collect(page, false); if (mi_page_all_free(page)) { - // if this page is all free now, free it without adding to any queues (yet) - mi_assert_internal(page->next == NULL && page->prev==NULL); - _mi_stat_decrease(&tld->stats->pages_abandoned, 1); - segment->abandoned--; - slice = mi_segment_page_clear(page, tld); // re-assign slice due to coalesce! - mi_assert_internal(!mi_slice_is_used(slice)); - if (slice->slice_count >= slices_needed) { - has_page = true; - } + // if everything free already, page can be reused for some block size + // note: don't clear the page yet as we can only OS reset it once it is reclaimed + pages_used_empty++; + has_page = true; } else if (mi_page_block_size(page) == block_size && mi_page_has_any_available(page)) { // a page has available free blocks of the right size @@ -1194,17 +869,19 @@ static bool mi_segment_check_free(mi_segment_t* segment, size_t slices_needed, s } } else { - // empty span - if (slice->slice_count >= slices_needed) { - has_page = true; - } + // whole empty page + has_page = true; } - slice = slice + slice->slice_count; + } + mi_assert_internal(pages_used == segment->used && pages_used >= pages_used_empty); + if (all_pages_free != NULL) { + *all_pages_free = ((pages_used - pages_used_empty) == 0); } return has_page; } -// Reclaim an abandoned segment; returns NULL if the segment was freed + +// Reclaim a segment; returns NULL if the segment was freed // set `right_page_reclaimed` to `true` if it reclaimed a page of the right `block_size` that was not full. static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, size_t requested_block_size, bool* right_page_reclaimed, mi_segments_tld_t* tld) { if (right_page_reclaimed != NULL) { *right_page_reclaimed = false; } @@ -1215,25 +892,21 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, segment->abandoned_visits = 0; segment->was_reclaimed = true; tld->reclaim_count++; - mi_segments_track_size((long)mi_segment_size(segment), tld); - mi_assert_internal(segment->next == NULL); + mi_segments_track_size((long)segment->segment_size, tld); + mi_assert_internal(segment->next == NULL && segment->prev == NULL); + mi_assert_expensive(mi_segment_is_valid(segment, tld)); _mi_stat_decrease(&tld->stats->segments_abandoned, 1); - // for all slices - const mi_slice_t* end; - mi_slice_t* slice = mi_slices_start_iterate(segment, &end); - while (slice < end) { - mi_assert_internal(slice->slice_count > 0); - mi_assert_internal(slice->slice_offset == 0); - if (mi_slice_is_used(slice)) { - // in use: reclaim the page in our heap - mi_page_t* page = mi_slice_to_page(slice); + for (size_t i = 0; i < segment->capacity; i++) { + mi_page_t* page = &segment->pages[i]; + if (page->segment_in_use) { mi_assert_internal(page->is_committed); + mi_assert_internal(mi_page_not_in_queue(page, tld)); mi_assert_internal(mi_page_thread_free_flag(page)==MI_NEVER_DELAYED_FREE); mi_assert_internal(mi_page_heap(page) == NULL); - mi_assert_internal(page->next == NULL && page->prev==NULL); - _mi_stat_decrease(&tld->stats->pages_abandoned, 1); segment->abandoned--; + mi_assert(page->next == NULL); + _mi_stat_decrease(&tld->stats->pages_abandoned, 1); // get the target heap for this thread which has a matching heap tag (so we reclaim into a matching heap) mi_heap_t* target_heap = _mi_heap_by_tag(heap, page->heap_tag); // allow custom heaps to separate objects if (target_heap == NULL) { @@ -1245,8 +918,8 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, true); // override never (after heap is set) _mi_page_free_collect(page, false); // ensure used count is up to date if (mi_page_all_free(page)) { - // if everything free by now, free the page - slice = mi_segment_page_clear(page, tld); // set slice again due to coalesceing + // if everything free already, clear the page directly + mi_segment_page_clear(segment, page, tld); // reset is ok now } else { // otherwise reclaim it into the heap @@ -1256,22 +929,23 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, } } } - else { - // the span is free, add it to our page queues - slice = mi_segment_span_free_coalesce(slice, tld); // set slice again due to coalesceing + /* expired + else if (page->is_committed) { // not in-use, and not reset yet + // note: do not reset as this includes pages that were not touched before + // mi_pages_purge_add(segment, page, tld); } - mi_assert_internal(slice->slice_count>0 && slice->slice_offset==0); - slice = slice + slice->slice_count; + */ } - - mi_assert(segment->abandoned == 0); - mi_assert_expensive(mi_segment_is_valid(segment, tld)); - if (segment->used == 0) { // due to page_clear + mi_assert_internal(segment->abandoned == 0); + if (segment->used == 0) { mi_assert_internal(right_page_reclaimed == NULL || !(*right_page_reclaimed)); mi_segment_free(segment, false, tld); return NULL; } else { + if (segment->page_kind <= MI_PAGE_MEDIUM && mi_segment_has_free(segment)) { + mi_segment_insert_in_free_queue(segment, tld); + } return segment; } } @@ -1328,7 +1002,7 @@ static long mi_segment_get_reclaim_tries(mi_segments_tld_t* tld) { return max_tries; } -static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slices, size_t block_size, bool* reclaimed, mi_segments_tld_t* tld) +static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t block_size, mi_page_kind_t page_kind, bool* reclaimed, mi_segments_tld_t* tld) { *reclaimed = false; long max_tries = mi_segment_get_reclaim_tries(tld); @@ -1346,8 +1020,9 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice // todo: an arena exclusive heap will potentially visit many abandoned unsuitable segments and use many tries // Perhaps we can skip non-suitable ones in a better way? bool is_suitable = _mi_heap_memid_is_suitable(heap, segment->memid); - bool has_page = mi_segment_check_free(segment,needed_slices,block_size,tld); // try to free up pages (due to concurrent frees) - if (segment->used == 0) { + bool all_pages_free; + bool has_page = mi_segment_check_free(segment,block_size,&all_pages_free); // try to free up pages (due to concurrent frees) + if (all_pages_free) { // free the segment (by forced reclaim) to make it available to other threads. // note1: we prefer to free a segment as that might lead to reclaiming another // segment that is still partially used. @@ -1355,8 +1030,8 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice // freeing but that would violate some invariants temporarily) mi_segment_reclaim(segment, heap, 0, NULL, tld); } - else if (has_page && is_suitable) { - // found a large enough free span, or a page of the right block_size with free space + else if (has_page && segment->page_kind == page_kind && is_suitable) { + // found a free page of the right kind, or page of the right block_size with free space // we return the result of reclaim (which is usually `segment`) as it might free // the segment due to concurrent frees (in which case `NULL` is returned). result = mi_segment_reclaim(segment, heap, block_size, reclaimed, tld); @@ -1367,9 +1042,9 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice mi_segment_reclaim(segment, heap, 0, NULL, tld); } else { - // otherwise, push on the visited list so it gets not looked at too quickly again + // otherwise, mark it back as abandoned + // todo: reset delayed pages in the segment? max_tries++; // don't count this as a try since it was not suitable - mi_segment_try_purge(segment, false /* true force? */); // force purge if needed as we may not visit soon again _mi_arena_segment_mark_abandoned(segment); } } @@ -1377,72 +1052,38 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice return result; } -// collect abandoned segments -void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld) -{ - mi_segment_t* segment; - mi_arena_field_cursor_t current; _mi_arena_field_cursor_init(heap, tld->subproc, force /* blocking? */, ¤t); - long max_tries = (force ? (long)mi_atomic_load_relaxed(&tld->subproc->abandoned_count) : 1024); // limit latency - while ((max_tries-- > 0) && ((segment = _mi_arena_segment_clear_abandoned_next(¤t)) != NULL)) { - mi_segment_check_free(segment,0,0,tld); // try to free up pages (due to concurrent frees) - if (segment->used == 0) { - // free the segment (by forced reclaim) to make it available to other threads. - // note: we could in principle optimize this by skipping reclaim and directly - // freeing but that would violate some invariants temporarily) - mi_segment_reclaim(segment, heap, 0, NULL, tld); - } - else { - // otherwise, purge if needed and push on the visited list - // note: forced purge can be expensive if many threads are destroyed/created as in mstress. - mi_segment_try_purge(segment, force); - _mi_arena_segment_mark_abandoned(segment); - } - } - _mi_arena_field_cursor_done(¤t); -} /* ----------------------------------------------------------- - Force abandon a segment that is in use by our thread + Force abandon a segment that is in use by our thread ----------------------------------------------------------- */ // force abandon a segment static void mi_segment_force_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { - mi_assert_internal(!mi_segment_is_abandoned(segment)); + mi_assert_internal(segment->abandoned < segment->used); mi_assert_internal(!segment->dont_free); // ensure the segment does not get free'd underneath us (so we can check if a page has been freed in `mi_page_force_abandon`) segment->dont_free = true; - // for all slices - const mi_slice_t* end; - mi_slice_t* slice = mi_slices_start_iterate(segment, &end); - while (slice < end) { - mi_assert_internal(slice->slice_count > 0); - mi_assert_internal(slice->slice_offset == 0); - if (mi_slice_is_used(slice)) { - // ensure used count is up to date and collect potential concurrent frees - mi_page_t* const page = mi_slice_to_page(slice); - _mi_page_free_collect(page, false); - { - // abandon the page if it is still in-use (this will free it if possible as well) - mi_assert_internal(segment->used > 0); - if (segment->used == segment->abandoned+1) { - // the last page.. abandon and return as the segment will be abandoned after this - // and we should no longer access it. - segment->dont_free = false; - _mi_page_force_abandon(page); - return; - } - else { - // abandon and continue - _mi_page_force_abandon(page); - // it might be freed, reset the slice (note: relies on coalesce setting the slice_offset) - slice = mi_slice_first(slice); - } + // for all pages + for (size_t i = 0; i < segment->capacity; i++) { + mi_page_t* page = &segment->pages[i]; + if (page->segment_in_use) { + // abandon the page if it is still in-use (this will free the page if possible as well (but not our segment)) + mi_assert_internal(segment->used > 0); + if (segment->used == segment->abandoned+1) { + // the last page.. abandon and return as the segment will be abandoned after this + // and we should no longer access it. + segment->dont_free = false; + _mi_page_force_abandon(page); + return; + } + else { + // abandon and continue + _mi_page_force_abandon(page); } } - slice = slice + slice->slice_count; } segment->dont_free = false; mi_assert(segment->used == segment->abandoned); @@ -1453,7 +1094,7 @@ static void mi_segment_force_abandon(mi_segment_t* segment, mi_segments_tld_t* t } else { // perform delayed purges - mi_segment_try_purge(segment, false /* force? */); + mi_pages_try_purge(false /* force? */, tld); } } @@ -1466,7 +1107,7 @@ static void mi_segments_try_abandon_to_target(mi_heap_t* heap, size_t target, mi // todo: we should maintain a list of segments per thread; for now, only consider segments from the heap full pages for (int i = 0; i < 64 && tld->count >= min_target; i++) { mi_page_t* page = heap->pages[MI_BIN_FULL].first; - while (page != NULL && mi_page_block_size(page) > MI_LARGE_OBJ_SIZE_MAX) { + while (page != NULL && mi_page_is_huge(page)) { page = page->next; } if (page==NULL) { @@ -1502,8 +1143,9 @@ void mi_collect_reduce(size_t target_size) mi_attr_noexcept { Reclaim or allocate ----------------------------------------------------------- */ -static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t needed_slices, size_t block_size, mi_segments_tld_t* tld) +static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t block_size, mi_page_kind_t page_kind, size_t page_shift, mi_segments_tld_t* tld) { + mi_assert_internal(page_kind <= MI_PAGE_LARGE); mi_assert_internal(block_size <= MI_LARGE_OBJ_SIZE_MAX); // try to abandon some segments to increase reuse between threads @@ -1511,83 +1153,126 @@ static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t needed_ // 1. try to reclaim an abandoned segment bool reclaimed; - mi_segment_t* segment = mi_segment_try_reclaim(heap, needed_slices, block_size, &reclaimed, tld); + mi_segment_t* segment = mi_segment_try_reclaim(heap, block_size, page_kind, &reclaimed, tld); + mi_assert_internal(segment == NULL || _mi_arena_memid_is_suitable(segment->memid, heap->arena_id)); if (reclaimed) { // reclaimed the right page right into the heap - mi_assert_internal(segment != NULL); + mi_assert_internal(segment != NULL && segment->page_kind == page_kind && page_kind <= MI_PAGE_LARGE); return NULL; // pretend out-of-memory as the page will be in the page queue of the heap with available blocks } else if (segment != NULL) { - // reclaimed a segment with a large enough empty span in it + // reclaimed a segment with empty pages (of `page_kind`) in it return segment; } // 2. otherwise allocate a fresh segment - return mi_segment_alloc(0, 0, heap->arena_id, tld, NULL); + return mi_segment_alloc(0, page_kind, page_shift, 0, heap->arena_id, tld); } /* ----------------------------------------------------------- - Page allocation + Small page allocation ----------------------------------------------------------- */ -static mi_page_t* mi_segments_page_alloc(mi_heap_t* heap, mi_page_kind_t page_kind, size_t required, size_t block_size, mi_segments_tld_t* tld) -{ - mi_assert_internal(required <= MI_LARGE_OBJ_SIZE_MAX && page_kind <= MI_PAGE_LARGE); - - // find a free page - size_t page_size = _mi_align_up(required, (required > MI_MEDIUM_PAGE_SIZE ? MI_MEDIUM_PAGE_SIZE : MI_SEGMENT_SLICE_SIZE)); - size_t slices_needed = page_size / MI_SEGMENT_SLICE_SIZE; - mi_assert_internal(slices_needed * MI_SEGMENT_SLICE_SIZE == page_size); - mi_page_t* page = mi_segments_page_find_and_allocate(slices_needed, heap->arena_id, tld); //(required <= MI_SMALL_SIZE_MAX ? 0 : slices_needed), tld); - if (page==NULL) { - // no free page, allocate a new segment and try again - if (mi_segment_reclaim_or_alloc(heap, slices_needed, block_size, tld) == NULL) { - // OOM or reclaimed a good page in the heap - return NULL; - } - else { - // otherwise try again - return mi_segments_page_alloc(heap, page_kind, required, block_size, tld); +static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_segments_tld_t* tld) { + mi_assert_internal(mi_segment_has_free(segment)); + mi_assert_expensive(mi_segment_is_valid(segment, tld)); + for (size_t i = 0; i < segment->capacity; i++) { // TODO: use a bitmap instead of search? + mi_page_t* page = &segment->pages[i]; + if (!page->segment_in_use) { + bool ok = mi_segment_page_claim(segment, page, tld); + if (ok) return page; } } - mi_assert_internal(page != NULL && page->slice_count*MI_SEGMENT_SLICE_SIZE == page_size); - mi_assert_internal(_mi_ptr_segment(page)->thread_id == _mi_thread_id()); - mi_segment_try_purge(_mi_ptr_segment(page), false); + mi_assert(false); + return NULL; +} + +// Allocate a page inside a segment. Requires that the page has free pages +static mi_page_t* mi_segment_page_alloc_in(mi_segment_t* segment, mi_segments_tld_t* tld) { + mi_assert_internal(mi_segment_has_free(segment)); + return mi_segment_find_free(segment, tld); +} + +static mi_page_t* mi_segment_page_try_alloc_in_queue(mi_heap_t* heap, mi_page_kind_t kind, mi_segments_tld_t* tld) { + // find an available segment the segment free queue + mi_segment_queue_t* const free_queue = mi_segment_free_queue_of_kind(kind, tld); + for (mi_segment_t* segment = free_queue->first; segment != NULL; segment = segment->next) { + if (_mi_arena_memid_is_suitable(segment->memid, heap->arena_id) && mi_segment_has_free(segment)) { + return mi_segment_page_alloc_in(segment, tld); + } + } + return NULL; +} + +static mi_page_t* mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, mi_page_kind_t kind, size_t page_shift, mi_segments_tld_t* tld) { + mi_page_t* page = mi_segment_page_try_alloc_in_queue(heap, kind, tld); + if (page == NULL) { + // possibly allocate or reclaim a fresh segment + mi_segment_t* const segment = mi_segment_reclaim_or_alloc(heap, block_size, kind, page_shift, tld); + if (segment == NULL) return NULL; // return NULL if out-of-memory (or reclaimed) + mi_assert_internal(segment->page_kind==kind); + mi_assert_internal(segment->used < segment->capacity); + mi_assert_internal(_mi_arena_memid_is_suitable(segment->memid, heap->arena_id)); + page = mi_segment_page_try_alloc_in_queue(heap, kind, tld); // this should now succeed + } + mi_assert_internal(page != NULL); + #if MI_DEBUG>=2 && !MI_TRACK_ENABLED // && !MI_TSAN + // verify it is committed + mi_segment_raw_page_start(_mi_page_segment(page), page, NULL)[0] = 0; + #endif return page; } +static mi_page_t* mi_segment_small_page_alloc(mi_heap_t* heap, size_t block_size, mi_segments_tld_t* tld) { + return mi_segment_page_alloc(heap, block_size, MI_PAGE_SMALL,MI_SMALL_PAGE_SHIFT,tld); +} +static mi_page_t* mi_segment_medium_page_alloc(mi_heap_t* heap, size_t block_size, mi_segments_tld_t* tld) { + return mi_segment_page_alloc(heap, block_size, MI_PAGE_MEDIUM, MI_MEDIUM_PAGE_SHIFT, tld); +} /* ----------------------------------------------------------- - Huge page allocation + large page allocation ----------------------------------------------------------- */ +static mi_page_t* mi_segment_large_page_alloc(mi_heap_t* heap, size_t block_size, mi_segments_tld_t* tld) { + mi_segment_t* segment = mi_segment_reclaim_or_alloc(heap,block_size,MI_PAGE_LARGE,MI_LARGE_PAGE_SHIFT,tld); + if (segment == NULL) return NULL; + mi_page_t* page = mi_segment_find_free(segment, tld); + mi_assert_internal(page != NULL); +#if MI_DEBUG>=2 && !MI_TRACK_ENABLED // && !MI_TSAN + mi_segment_raw_page_start(segment, page, NULL)[0] = 0; +#endif + return page; +} + static mi_page_t* mi_segment_huge_page_alloc(size_t size, size_t page_alignment, mi_arena_id_t req_arena_id, mi_segments_tld_t* tld) { - mi_page_t* page = NULL; - mi_segment_t* segment = mi_segment_alloc(size,page_alignment,req_arena_id,tld,&page); - if (segment == NULL || page==NULL) return NULL; - mi_assert_internal(segment->used==1); - mi_assert_internal(mi_page_block_size(page) >= size); + mi_segment_t* segment = mi_segment_alloc(size, MI_PAGE_HUGE, MI_SEGMENT_SHIFT + 1, page_alignment, req_arena_id, tld); + if (segment == NULL) return NULL; + mi_assert_internal(mi_segment_page_size(segment) - segment->segment_info_size - (2*(MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= size); #if MI_HUGE_PAGE_ABANDON - segment->thread_id = 0; // huge segments are immediately abandoned + segment->thread_id = 0; // huge pages are immediately abandoned + mi_segments_track_size(-(long)segment->segment_size, tld); #endif + mi_page_t* page = mi_segment_find_free(segment, tld); + mi_assert_internal(page != NULL); + mi_assert_internal(page->is_huge); // for huge pages we initialize the block_size as we may // overallocate to accommodate large alignments. size_t psize; - uint8_t* start = _mi_segment_page_start(segment, page, &psize); + uint8_t* start = mi_segment_raw_page_start(segment, page, &psize); page->block_size = psize; - mi_assert_internal(page->is_huge); - // decommit the part of the prefix of a page that will not be used; this can be quite large (close to MI_SEGMENT_SIZE) - if (page_alignment > 0 && segment->allow_decommit) { + // reset the part of the page that will not be used; this can be quite large (close to MI_SEGMENT_SIZE) + if (page_alignment > 0 && segment->allow_decommit && page->is_committed) { uint8_t* aligned_p = (uint8_t*)_mi_align_up((uintptr_t)start, page_alignment); mi_assert_internal(_mi_is_aligned(aligned_p, page_alignment)); mi_assert_internal(psize - (aligned_p - start) >= size); - uint8_t* decommit_start = start + sizeof(mi_block_t); // for the free list + uint8_t* decommit_start = start + sizeof(mi_block_t); // for the free list ptrdiff_t decommit_size = aligned_p - decommit_start; - _mi_os_reset(decommit_start, decommit_size); // note: cannot use segment_decommit on huge segments + _mi_os_reset(decommit_start, decommit_size); // do not decommit as it may be in a region } return page; @@ -1597,7 +1282,7 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, size_t page_alignment, // free huge block from another thread void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block_t* block) { // huge page segments are always abandoned and can be freed immediately by any thread - mi_assert_internal(segment->kind==MI_SEGMENT_HUGE); + mi_assert_internal(segment->page_kind==MI_PAGE_HUGE); mi_assert_internal(segment == _mi_page_segment(page)); mi_assert_internal(mi_atomic_load_relaxed(&segment->thread_id)==0); @@ -1612,6 +1297,7 @@ void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block page->is_zero_init = false; mi_assert(page->used == 0); mi_tld_t* tld = heap->tld; + mi_segments_track_size((long)segment->segment_size, &tld->segments); _mi_segment_page_free(page, true, &tld->segments); } #if (MI_DEBUG!=0) @@ -1624,47 +1310,50 @@ void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block #else // reset memory of a huge block from another thread void _mi_segment_huge_page_reset(mi_segment_t* segment, mi_page_t* page, mi_block_t* block) { - MI_UNUSED(page); - mi_assert_internal(segment->kind == MI_SEGMENT_HUGE); + mi_assert_internal(segment->page_kind == MI_PAGE_HUGE); mi_assert_internal(segment == _mi_page_segment(page)); mi_assert_internal(page->used == 1); // this is called just before the free mi_assert_internal(page->free == NULL); - if (segment->allow_decommit) { - size_t csize = mi_usable_size(block); - if (csize > sizeof(mi_block_t)) { - csize = csize - sizeof(mi_block_t); + if (segment->allow_decommit && page->is_committed) { + size_t usize = mi_usable_size(block); + if (usize > sizeof(mi_block_t)) { + usize = usize - sizeof(mi_block_t); uint8_t* p = (uint8_t*)block + sizeof(mi_block_t); - _mi_os_reset(p, csize); // note: cannot use segment_decommit on huge segments + _mi_os_reset(p, usize); } } } #endif /* ----------------------------------------------------------- - Page allocation and free + Page allocation ----------------------------------------------------------- */ + mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment, mi_segments_tld_t* tld) { mi_page_t* page; if mi_unlikely(page_alignment > MI_BLOCK_ALIGNMENT_MAX) { mi_assert_internal(_mi_is_power_of_two(page_alignment)); mi_assert_internal(page_alignment >= MI_SEGMENT_SIZE); + //mi_assert_internal((MI_SEGMENT_SIZE % page_alignment) == 0); if (page_alignment < MI_SEGMENT_SIZE) { page_alignment = MI_SEGMENT_SIZE; } - page = mi_segment_huge_page_alloc(block_size,page_alignment,heap->arena_id,tld); + page = mi_segment_huge_page_alloc(block_size, page_alignment, heap->arena_id, tld); } else if (block_size <= MI_SMALL_OBJ_SIZE_MAX) { - page = mi_segments_page_alloc(heap,MI_PAGE_SMALL,block_size,block_size,tld); + page = mi_segment_small_page_alloc(heap, block_size, tld); } else if (block_size <= MI_MEDIUM_OBJ_SIZE_MAX) { - page = mi_segments_page_alloc(heap,MI_PAGE_MEDIUM,MI_MEDIUM_PAGE_SIZE,block_size,tld); + page = mi_segment_medium_page_alloc(heap, block_size, tld); } - else if (block_size <= MI_LARGE_OBJ_SIZE_MAX) { - page = mi_segments_page_alloc(heap,MI_PAGE_LARGE,block_size,block_size,tld); + else if (block_size <= MI_LARGE_OBJ_SIZE_MAX /* || mi_is_good_fit(block_size, MI_LARGE_PAGE_SIZE - sizeof(mi_segment_t)) */ ) { + page = mi_segment_large_page_alloc(heap, block_size, tld); } else { - page = mi_segment_huge_page_alloc(block_size,page_alignment,heap->arena_id,tld); + page = mi_segment_huge_page_alloc(block_size, page_alignment, heap->arena_id, tld); } - mi_assert_internal(page == NULL || _mi_heap_memid_is_suitable(heap, _mi_page_segment(page)->memid)); mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld)); + mi_assert_internal(page == NULL || (mi_segment_page_size(_mi_page_segment(page)) - (MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= block_size); + // mi_segment_try_purge(tld); + mi_assert_internal(page == NULL || mi_page_not_in_queue(page, tld)); mi_assert_internal(page == NULL || _mi_page_segment(page)->subproc == tld->subproc); return page; } @@ -1687,16 +1376,13 @@ static bool mi_segment_visit_page(mi_page_t* page, bool visit_blocks, mi_block_v } bool _mi_segment_visit_blocks(mi_segment_t* segment, int heap_tag, bool visit_blocks, mi_block_visit_fun* visitor, void* arg) { - const mi_slice_t* end; - mi_slice_t* slice = mi_slices_start_iterate(segment, &end); - while (slice < end) { - if (mi_slice_is_used(slice)) { - mi_page_t* const page = mi_slice_to_page(slice); + for (size_t i = 0; i < segment->capacity; i++) { + mi_page_t* const page = &segment->pages[i]; + if (page->segment_in_use) { if (heap_tag < 0 || (int)page->heap_tag == heap_tag) { if (!mi_segment_visit_page(page, visit_blocks, visitor, arg)) return false; } } - slice = slice + slice->slice_count; } return true; } diff --git a/src/stats.c b/src/stats.c index 34b3d4e4..1cfc3104 100644 --- a/src/stats.c +++ b/src/stats.c @@ -30,7 +30,6 @@ static void mi_stat_update(mi_stat_count_t* stat, int64_t amount) { { // add atomically (for abandoned pages) int64_t current = mi_atomic_addi64_relaxed(&stat->current, amount); - // if (stat == &_mi_stats_main.committed) { mi_assert_internal(current + amount >= 0); }; mi_atomic_maxi64_relaxed(&stat->peak, current + amount); if (amount > 0) { mi_atomic_addi64_relaxed(&stat->total,amount); @@ -62,25 +61,6 @@ void _mi_stat_decrease(mi_stat_count_t* stat, size_t amount) { } -static void mi_stat_adjust(mi_stat_count_t* stat, int64_t amount) { - if (amount == 0) return; - if mi_unlikely(mi_is_in_main(stat)) - { - // adjust atomically - mi_atomic_addi64_relaxed(&stat->current, amount); - mi_atomic_addi64_relaxed(&stat->total,amount); - } - else { - // adjust local - stat->current += amount; - stat->total += amount; - } -} - -void _mi_stat_adjust_decrease(mi_stat_count_t* stat, size_t amount) { - mi_stat_adjust(stat, -((int64_t)amount)); -} - // must be thread safe as it is called from stats_merge static void mi_stat_count_add_mt(mi_stat_count_t* stat, const mi_stat_count_t* src) { @@ -114,8 +94,8 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) { } #endif for (size_t i = 0; i <= MI_BIN_HUGE; i++) { - mi_stat_count_add_mt(&stats->page_bins[i], &src->page_bins[i]); - } + mi_stat_count_add_mt(&stats->page_bins[i], &src->page_bins[i]); + } } #undef MI_STAT_COUNT @@ -218,15 +198,6 @@ static void mi_stat_peak_print(const mi_stat_count_t* stat, const char* msg, int _mi_fprintf(out, arg, "\n"); } -#if MI_STAT>1 -static void mi_stat_total_print(const mi_stat_count_t* stat, const char* msg, int64_t unit, mi_output_fun* out, void* arg) { - _mi_fprintf(out, arg, "%10s:", msg); - _mi_fprintf(out, arg, "%12s", " "); // no peak - mi_print_amount(stat->total, unit, out, arg); - _mi_fprintf(out, arg, "\n"); -} -#endif - static void mi_stat_counter_print(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out, void* arg ) { _mi_fprintf(out, arg, "%10s:", msg); mi_print_amount(stat->total, -1, out, arg); @@ -243,7 +214,7 @@ static void mi_stat_counter_print_avg(const mi_stat_counter_t* stat, const char* static void mi_print_header(mi_output_fun* out, void* arg ) { - _mi_fprintf(out, arg, "%10s: %11s %11s %11s %11s %11s\n", "heap stats", "peak ", "total ", "current ", "block ", "total# "); + _mi_fprintf(out, arg, "%10s: %11s %11s %11s %11s %11s\n", "heap stats", "peak ", "total ", "current ", "unit ", "total# "); } #if MI_STAT>1 @@ -312,20 +283,18 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0) // and print using that mi_print_header(out,arg); #if MI_STAT>1 - mi_stats_print_bins(stats->malloc_bins, MI_BIN_HUGE, "bin",out,arg); + mi_stats_print_bins(stats->malloc_bins, MI_BIN_HUGE, "normal",out,arg); #endif #if MI_STAT - mi_stat_print(&stats->malloc_normal, "binned", (stats->malloc_normal_count.total == 0 ? 1 : -1), out, arg); - // mi_stat_print(&stats->malloc_large, "large", (stats->malloc_large_count.total == 0 ? 1 : -1), out, arg); + mi_stat_print(&stats->malloc_normal, "normal", (stats->malloc_normal_count.total == 0 ? 1 : -1), out, arg); mi_stat_print(&stats->malloc_huge, "huge", (stats->malloc_huge_count.total == 0 ? 1 : -1), out, arg); mi_stat_count_t total = { 0,0,0 }; mi_stat_count_add_mt(&total, &stats->malloc_normal); - // mi_stat_count_add(&total, &stats->malloc_large); mi_stat_count_add_mt(&total, &stats->malloc_huge); mi_stat_print_ex(&total, "total", 1, out, arg, ""); #endif #if MI_STAT>1 - mi_stat_total_print(&stats->malloc_requested, "malloc req", 1, out, arg); + mi_stat_print_ex(&stats->malloc_requested, "malloc req", 1, out, arg, ""); _mi_fprintf(out, arg, "\n"); #endif mi_stat_print_ex(&stats->reserved, "reserved", 1, out, arg, ""); @@ -350,7 +319,7 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0) mi_stat_counter_print(&stats->malloc_guarded_count, "guarded", out, arg); mi_stat_print(&stats->threads, "threads", -1, out, arg); mi_stat_counter_print_avg(&stats->page_searches, "searches", out, arg); - _mi_fprintf(out, arg, "%10s: %5i\n", "numa nodes", _mi_os_numa_node_count()); + _mi_fprintf(out, arg, "%10s: %5zu\n", "numa nodes", _mi_os_numa_node_count()); size_t elapsed; size_t user_time; @@ -361,9 +330,9 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0) size_t peak_commit; size_t page_faults; mi_process_info(&elapsed, &user_time, &sys_time, ¤t_rss, &peak_rss, ¤t_commit, &peak_commit, &page_faults); - _mi_fprintf(out, arg, "%10s: %5zu.%03zu s\n", "elapsed", elapsed/1000, elapsed%1000); - _mi_fprintf(out, arg, "%10s: user: %zu.%03zu s, system: %zu.%03zu s, faults: %zu, rss: ", "process", - user_time/1000, user_time%1000, sys_time/1000, sys_time%1000, page_faults ); + _mi_fprintf(out, arg, "%10s: %5ld.%03ld s\n", "elapsed", elapsed/1000, elapsed%1000); + _mi_fprintf(out, arg, "%10s: user: %ld.%03ld s, system: %ld.%03ld s, faults: %lu, rss: ", "process", + user_time/1000, user_time%1000, sys_time/1000, sys_time%1000, (unsigned long)page_faults ); mi_printf_amount((int64_t)peak_rss, 1, out, arg, "%s"); if (peak_commit > 0) { _mi_fprintf(out, arg, ", commit: "); @@ -397,10 +366,6 @@ void mi_stats_merge(void) mi_attr_noexcept { mi_stats_merge_from( mi_stats_get_default() ); } -void _mi_stats_merge_thread(mi_tld_t* tld) { - mi_stats_merge_from( &tld->stats ); -} - void _mi_stats_done(mi_stats_t* stats) { // called from `mi_thread_done` mi_stats_merge_from(stats); } @@ -504,7 +469,7 @@ static bool mi_heap_buf_expand(mi_heap_buf_t* hbuf) { hbuf->buf[hbuf->size-1] = 0; } if (hbuf->size > SIZE_MAX/2 || !hbuf->can_realloc) return false; - const size_t newsize = (hbuf->size == 0 ? mi_good_size(12*MI_KiB) : 2*hbuf->size); + const size_t newsize = (hbuf->size == 0 ? 2*MI_KiB : 2*hbuf->size); char* const newbuf = (char*)mi_rezalloc(hbuf->buf, newsize); if (newbuf == NULL) return false; hbuf->buf = newbuf; @@ -531,12 +496,7 @@ static void mi_heap_buf_print_count_bin(mi_heap_buf_t* hbuf, const char* prefix, const size_t binsize = _mi_bin_size(bin); const size_t pagesize = (binsize <= MI_SMALL_OBJ_SIZE_MAX ? MI_SMALL_PAGE_SIZE : (binsize <= MI_MEDIUM_OBJ_SIZE_MAX ? MI_MEDIUM_PAGE_SIZE : - #if MI_LARGE_PAGE_SIZE - (binsize <= MI_LARGE_OBJ_SIZE_MAX ? MI_LARGE_PAGE_SIZE : 0) - #else - 0 - #endif - )); + (binsize <= MI_LARGE_OBJ_SIZE_MAX ? MI_LARGE_PAGE_SIZE : 0))); char buf[128]; _mi_snprintf(buf, 128, "%s{ \"total\": %lld, \"peak\": %lld, \"current\": %lld, \"block_size\": %zu, \"page_size\": %zu }%s\n", prefix, stat->total, stat->peak, stat->current, binsize, pagesize, (add_comma ? "," : "")); buf[127] = 0; @@ -629,7 +589,7 @@ char* mi_stats_get_json(size_t output_size, char* output_buf) mi_attr_noexcept { for (size_t i = 0; i <= MI_BIN_HUGE; i++) { mi_heap_buf_print_count_bin(&hbuf, " ", &stats->page_bins[i], i, i!=MI_BIN_HUGE); } - mi_heap_buf_print(&hbuf, " ]\n"); + mi_heap_buf_print(&hbuf, " ]\n"); mi_heap_buf_print(&hbuf, "}\n"); return hbuf.buf; } diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index c5fff1a6..5905613c 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -16,7 +16,7 @@ if (NOT CMAKE_BUILD_TYPE) endif() # Import mimalloc (if installed) -find_package(mimalloc 2.2 CONFIG REQUIRED) +find_package(mimalloc 1.9 CONFIG REQUIRED) message(STATUS "Found mimalloc installed at: ${MIMALLOC_LIBRARY_DIR} (${MIMALLOC_VERSION_DIR})") diff --git a/test/main-override-dep.cpp b/test/main-override-dep.cpp index d89e3fca..e92f6fc4 100644 --- a/test/main-override-dep.cpp +++ b/test/main-override-dep.cpp @@ -1,7 +1,6 @@ // Issue #981: test overriding allocation in a DLL that is compiled independent of mimalloc. // This is imported by the `mimalloc-test-override` project. #include -#include #include "main-override-dep.h" std::string TestAllocInDll::GetString() @@ -11,41 +10,6 @@ std::string TestAllocInDll::GetString() const char* t = "test"; memcpy(test, t, 4); std::string r = test; - std::cout << "override-dep: GetString: " << r << "\n"; delete[] test; return r; -} - - -class Static { -private: - void* p; -public: - Static() { - printf("override-dep: static constructor\n"); - p = malloc(64); - return; - } - ~Static() { - free(p); - printf("override-dep: static destructor\n"); - return; - } -}; - -static Static s = Static(); - - -#include - -BOOL WINAPI DllMain(HINSTANCE module, DWORD reason, LPVOID reserved) { - (void)(reserved); - (void)(module); - if (reason==DLL_PROCESS_ATTACH) { - printf("override-dep: dll attach\n"); - } - else if (reason==DLL_PROCESS_DETACH) { - printf("override-dep: dll detach\n"); - } - return TRUE; -} +} \ No newline at end of file diff --git a/test/main-override-static.c b/test/main-override-static.c index 420b8bf7..06d7baa5 100644 --- a/test/main-override-static.c +++ b/test/main-override-static.c @@ -10,6 +10,7 @@ #include #include // redefines malloc etc. +static void mi_bins(void); static void double_free1(); static void double_free2(); @@ -23,12 +24,11 @@ static void test_reserved(void); static void negative_stat(void); static void alloc_huge(void); static void test_heap_walk(void); -static void test_heap_arena(void); -static void test_align(void); static void test_canary_leak(void); static void test_manage_os_memory(void); // static void test_large_pages(void); + int main() { mi_version(); mi_stats_reset(); @@ -43,17 +43,15 @@ int main() { // corrupt_free(); // block_overflow1(); // block_overflow2(); - test_canary_leak(); + // test_canary_leak(); // test_aslr(); // invalid_free(); // test_reserved(); // negative_stat(); // test_heap_walk(); // alloc_huge(); - // test_heap_walk(); - // test_heap_arena(); - // test_align(); - + + void* p1 = malloc(78); void* p2 = malloc(24); free(p1); @@ -69,7 +67,7 @@ int main() { free(p1); free(p2); free(s); - + /* now test if override worked by allocating/freeing across the api's*/ //p1 = mi_malloc(32); //free(p1); @@ -84,13 +82,6 @@ int main() { return 0; } -static void test_align() { - void* p = mi_malloc_aligned(256, 256); - if (((uintptr_t)p % 256) != 0) { - fprintf(stderr, "%p is not 256 alignend!\n", p); - } -} - static void invalid_free() { free((void*)0xBADBEEF); realloc((void*)0xBADBEEF,10); @@ -248,20 +239,6 @@ static void test_heap_walk(void) { mi_heap_visit_blocks(heap, true, &test_visit, NULL); } -static void test_heap_arena(void) { - mi_arena_id_t arena_id; - int err = mi_reserve_os_memory_ex(100 * 1024 * 1024, false /* commit */, false /* allow large */, true /* exclusive */, &arena_id); - if (err) abort(); - mi_heap_t* heap = mi_heap_new_in_arena(arena_id); - for (int i = 0; i < 500000; i++) { - void* p = mi_heap_malloc(heap, 1024); - if (p == NULL) { - printf("out of memory after %d kb (expecting about 100_000kb)\n", i); - break; - } - } -} - static void test_canary_leak(void) { char* p = mi_mallocn_tp(char,23); for(int i = 0; i < 23; i++) { diff --git a/test/main-override.cpp b/test/main-override.cpp index 75b409fd..db594acc 100644 --- a/test/main-override.cpp +++ b/test/main-override.cpp @@ -27,12 +27,9 @@ static void heap_late_free(); // issue #204 static void padding_shrink(); // issue #209 static void various_tests(); static void test_mt_shutdown(); -static void large_alloc(void); // issue #363 static void fail_aslr(); // issue #372 static void tsan_numa_test(); // issue #414 static void strdup_test(); // issue #445 -static void bench_alloc_large(void); // issue #xxx -//static void test_large_migrate(void); // issue #691 static void heap_thread_free_huge(); static void test_std_string(); // issue #697 static void test_thread_local(); // issue #944 @@ -40,7 +37,7 @@ static void test_thread_local(); // issue #944 static void test_mixed1(); // issue #942 static void test_stl_allocators(); -#if _WIN32 +#if x_WIN32 #include "main-override-dep.h" static void test_dep(); // issue #981: test overriding in another DLL #else @@ -58,20 +55,18 @@ int main() { //test_thread_local(); // heap_thread_free_huge(); /* - heap_thread_free_huge(); - heap_thread_free_large(); - heap_no_delete(); - heap_late_free(); - padding_shrink(); - various_tests(); - large_alloc(); - tsan_numa_test(); - strdup_test(); - */ - // test_stl_allocators(); - // test_mt_shutdown(); - // test_large_migrate(); + heap_thread_free_large(); + heap_no_delete(); + heap_late_free(); + padding_shrink(); + tsan_numa_test(); + */ + /* + strdup_test(); + test_stl_allocators(); + test_mt_shutdown(); + */ //fail_aslr(); mi_stats_print(NULL); return 0; @@ -150,12 +145,11 @@ static bool test_stl_allocator1() { struct some_struct { int i; int j; double z; }; -#if _WIN32 +#if x_WIN32 static void test_dep() { TestAllocInDll t; std::string s = t.GetString(); - std::cout << "test_dep GetString: " << s << "\n"; } #endif @@ -364,7 +358,7 @@ static void heap_thread_free_large_worker() { static void heap_thread_free_large() { for (int i = 0; i < 100; i++) { - shared_p = mi_malloc_aligned(2 * 1024 * 1024 + 1, 8); + shared_p = mi_malloc_aligned(2*1024*1024 + 1, 8); auto t1 = std::thread(heap_thread_free_large_worker); t1.join(); } @@ -375,13 +369,14 @@ static void heap_thread_free_huge_worker() { } static void heap_thread_free_huge() { - for (int i = 0; i < 100; i++) { + for (int i = 0; i < 10; i++) { shared_p = mi_malloc(1024 * 1024 * 1024); auto t1 = std::thread(heap_thread_free_huge_worker); t1.join(); } } + static void test_mt_shutdown() { const int threads = 5; @@ -406,18 +401,6 @@ static void test_mt_shutdown() std::cout << "done" << std::endl; } -// issue #363 -using namespace std; - -void large_alloc(void) -{ - char* a = new char[1ull << 25]; - thread th([&] { - delete[] a; - }); - th.join(); -} - // issue #372 static void fail_aslr() { size_t sz = (size_t)(4ULL << 40); // 4TiB @@ -438,36 +421,6 @@ static void tsan_numa_test() { t1.join(); } -// issue #? -#include -#include -#include - -static void bench_alloc_large(void) { - static constexpr int kNumBuffers = 20; - static constexpr size_t kMinBufferSize = 5 * 1024 * 1024; - static constexpr size_t kMaxBufferSize = 25 * 1024 * 1024; - std::unique_ptr buffers[kNumBuffers]; - - std::random_device rd; (void)rd; - std::mt19937 gen(42); //rd()); - std::uniform_int_distribution<> size_distribution(kMinBufferSize, kMaxBufferSize); - std::uniform_int_distribution<> buf_number_distribution(0, kNumBuffers - 1); - - static constexpr int kNumIterations = 2000; - const auto start = std::chrono::steady_clock::now(); - for (int i = 0; i < kNumIterations; ++i) { - int buffer_idx = buf_number_distribution(gen); - size_t new_size = size_distribution(gen); - buffers[buffer_idx] = std::make_unique(new_size); - } - const auto end = std::chrono::steady_clock::now(); - const auto num_ms = std::chrono::duration_cast(end - start).count(); - const auto us_per_allocation = std::chrono::duration_cast(end - start).count() / kNumIterations; - std::cout << kNumIterations << " allocations Done in " << num_ms << "ms." << std::endl; - std::cout << "Avg " << us_per_allocation << " us per allocation" << std::endl; -} - class MTest { @@ -494,4 +447,4 @@ void test_thread_local() mi_stats_print(NULL); } return; -} +} \ No newline at end of file diff --git a/test/test-api.c b/test/test-api.c index 6f5d6722..15484544 100644 --- a/test/test-api.c +++ b/test/test-api.c @@ -203,11 +203,7 @@ int main(void) { CHECK_BODY("malloc-aligned9") { // test large alignments bool ok = true; void* p[8]; - size_t sizes[8] = { 8, 512, 1024 * 1024, MI_BLOCK_ALIGNMENT_MAX, MI_BLOCK_ALIGNMENT_MAX + 1, - #if SIZE_MAX > UINT32_MAX - 2 * MI_BLOCK_ALIGNMENT_MAX, 8 * MI_BLOCK_ALIGNMENT_MAX, - #endif - 0 }; + size_t sizes[8] = { 8, 512, 1024 * 1024, MI_BLOCK_ALIGNMENT_MAX, MI_BLOCK_ALIGNMENT_MAX + 1, 2 * MI_BLOCK_ALIGNMENT_MAX, 8 * MI_BLOCK_ALIGNMENT_MAX, 0 }; for (int i = 0; i < 28 && ok; i++) { int align = (1 << i); for (int j = 0; j < 8 && ok; j++) { diff --git a/test/test-stress.c b/test/test-stress.c index 4f5a3d58..9e041064 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -320,17 +320,11 @@ int main(int argc, char** argv) { // Run ITER full iterations where half the objects in the transfer buffer survive to the next round. srand(0x7feb352d); - - //mi_reserve_os_memory(512ULL << 20, true, true); - - #if !defined(NDEBUG) && !defined(USE_STD_MALLOC) - mi_stats_reset(); - #endif - + // mi_stats_reset(); #ifdef STRESS - test_stress(); + test_stress(); #else - test_leak(); + test_leak(); #endif #ifndef USE_STD_MALLOC @@ -343,7 +337,6 @@ int main(int argc, char** argv) { mi_free(json); } #endif - mi_collect(true); mi_stats_print(NULL); #endif //bench_end_program();