diff --git a/.gitattributes b/.gitattributes index 4a42e93d..acdbdbf4 100644 --- a/.gitattributes +++ b/.gitattributes @@ -6,3 +6,5 @@ *.suo binary *.vcproj binary *.patch binary +*.dll binary +*.lib binary diff --git a/CMakeLists.txt b/CMakeLists.txt index c9de8618..8b37e579 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,7 +26,6 @@ set(mi_sources src/options.c src/init.c) - # Set default build type if (NOT CMAKE_BUILD_TYPE) if ("${CMAKE_BINARY_DIR}" MATCHES ".*(D|d)ebug$") @@ -44,6 +43,11 @@ if("${CMAKE_BINARY_DIR}" MATCHES ".*(S|s)ecure$") set(MI_SECURE "ON") endif() +if(CMAKE_C_COMPILER_ID MATCHES "MSVC") + set(MI_USE_CXX "ON") +endif() + + # Options if(MI_OVERRIDE MATCHES "ON") message(STATUS "Override standard malloc (MI_OVERRIDE=ON)") @@ -78,7 +82,7 @@ endif() if(MI_USE_CXX MATCHES "ON") message(STATUS "Use the C++ compiler to compile (MI_USE_CXX=ON)") set_source_files_properties(${mi_sources} PROPERTIES LANGUAGE CXX ) - set_source_files_properties(src/static.c test/test-api.c PROPERTIES LANGUAGE CXX ) + set_source_files_properties(src/static.c test/test-api.c test/test-stress.c PROPERTIES LANGUAGE CXX ) endif() # Compiler flags @@ -87,6 +91,7 @@ if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU") if(CMAKE_C_COMPILER_ID MATCHES "GNU") list(APPEND mi_cflags -Wno-invalid-memory-model) list(APPEND mi_cflags -fvisibility=hidden) + list(APPEND mi_cflags -fbranch-target-load-optimize ) endif() endif() @@ -120,14 +125,28 @@ add_library(mimalloc SHARED ${mi_sources}) set_target_properties(mimalloc PROPERTIES VERSION ${mi_version} NO_SONAME "YES" OUTPUT_NAME ${mi_basename} ) target_compile_definitions(mimalloc PRIVATE ${mi_defines} MI_SHARED_LIB MI_SHARED_LIB_EXPORT) target_compile_options(mimalloc PRIVATE ${mi_cflags}) +target_link_libraries(mimalloc PUBLIC ${mi_libraries}) target_include_directories(mimalloc PUBLIC $ $ ) -target_link_libraries(mimalloc PUBLIC ${mi_libraries}) +if(WIN32) + # On windows copy the mimalloc redirection dll too. + target_link_libraries(mimalloc PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/bin/mimalloc-redirect.lib) + add_custom_command(TARGET mimalloc POST_BUILD + COMMAND "${CMAKE_COMMAND}" -E copy "${CMAKE_CURRENT_SOURCE_DIR}/bin/mimalloc-redirect.dll" $ + COMMENT "Copy mimalloc-redirect.dll to output directory") +endif() # static library add_library(mimalloc-static STATIC ${mi_sources}) +target_compile_definitions(mimalloc-static PRIVATE ${mi_defines} MI_STATIC_LIB) +target_compile_options(mimalloc-static PRIVATE ${mi_cflags}) +target_link_libraries(mimalloc-static PUBLIC ${mi_libraries}) +target_include_directories(mimalloc-static PUBLIC + $ + $ +) if(WIN32) # When building both static and shared libraries on Windows, a static library should use a # different output name to avoid the conflict with the import library of a shared one. @@ -136,19 +155,12 @@ if(WIN32) else() set_target_properties(mimalloc-static PROPERTIES OUTPUT_NAME ${mi_basename}) endif() -target_compile_definitions(mimalloc-static PRIVATE ${mi_defines} MI_STATIC_LIB) -target_compile_options(mimalloc-static PRIVATE ${mi_cflags}) - -target_include_directories(mimalloc-static PUBLIC - $ - $ -) -target_link_libraries(mimalloc-static PUBLIC ${mi_libraries}) # install static and shared library, and the include files install(TARGETS mimalloc EXPORT mimalloc DESTINATION ${mi_install_dir} LIBRARY NAMELINK_SKIP) install(TARGETS mimalloc-static EXPORT mimalloc DESTINATION ${mi_install_dir}) install(FILES include/mimalloc.h DESTINATION ${mi_install_dir}/include) +install(FILES include/mimalloc-override.h DESTINATION ${mi_install_dir}/include) install(FILES cmake/mimalloc-config.cmake DESTINATION ${mi_install_dir}/cmake) install(FILES cmake/mimalloc-config-version.cmake DESTINATION ${mi_install_dir}/cmake) install(EXPORT mimalloc DESTINATION ${mi_install_dir}/cmake) diff --git a/bin/mimalloc-redirect.dll b/bin/mimalloc-redirect.dll new file mode 100644 index 00000000..d3427e95 Binary files /dev/null and b/bin/mimalloc-redirect.dll differ diff --git a/bin/mimalloc-redirect.lib b/bin/mimalloc-redirect.lib new file mode 100644 index 00000000..cf2f95e4 Binary files /dev/null and b/bin/mimalloc-redirect.lib differ diff --git a/ide/vs2017/mimalloc-override-test.vcxproj b/ide/vs2017/mimalloc-override-test.vcxproj index c50f80dc..7df1e79a 100644 --- a/ide/vs2017/mimalloc-override-test.vcxproj +++ b/ide/vs2017/mimalloc-override-test.vcxproj @@ -90,10 +90,18 @@ true ..\..\include MultiThreadedDebugDLL + false + Default + false Console + kernel32.lib;%(AdditionalDependencies) + + + COPY /Y $(SolutionDir)..\..\bin\mimalloc-redirect32.dll $(OutputPath) + @@ -103,14 +111,20 @@ true ..\..\include MultiThreadedDebugDLL + false + Default + false Console - kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) - false + kernel32.lib;%(AdditionalDependencies) + + + COPY /Y $(SolutionDir)..\..\bin\mimalloc-redirect.dll $(OutputPath) + @@ -128,7 +142,11 @@ true true Console + kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + COPY /Y $(SolutionDir)..\..\bin\mimalloc-redirect32.dll $(OutputPath) + @@ -150,15 +168,18 @@ kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + COPY /Y $(SolutionDir)..\..\bin\mimalloc-redirect.dll $(OutputPath) + - - - {abb5eae7-b3e6-432e-b636-333449892ea7} + + + diff --git a/ide/vs2017/mimalloc-override.vcxproj b/ide/vs2017/mimalloc-override.vcxproj index 5fe9f10e..7d452b55 100644 --- a/ide/vs2017/mimalloc-override.vcxproj +++ b/ide/vs2017/mimalloc-override.vcxproj @@ -35,7 +35,6 @@ DynamicLibrary false v141 - true DynamicLibrary @@ -46,7 +45,6 @@ DynamicLibrary false v141 - true @@ -70,21 +68,25 @@ $(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\ $(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ .dll + mimalloc-override $(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\ $(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ .dll + mimalloc-override $(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\ $(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ .dll + mimalloc-override $(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\ $(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ .dll + mimalloc-override @@ -93,25 +95,20 @@ true true ../../include - MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;_MBCS;%(PreprocessorDefinitions); + MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions); MultiThreadedDebugDLL false + Default - - - - - - - - - DllEntry - kernel32.lib;%(AdditionalDependencies) + %(AdditionalDependencies) + Default + DllEntry + false @@ -121,26 +118,27 @@ true true ../../include - MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;_MBCS;%(PreprocessorDefinitions); + MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions); MultiThreadedDebugDLL false + Default - - - - - - - - - DllEntry - kernel32.lib;%(AdditionalDependencies) + %(AdditionalDependencies) + Default + DllEntry + false + + COPY /Y $(SolutionDir)..\..\bin\mimalloc-redirect.dll $(OutputPath) + + + copy mimalloc-redirect.dll to the output directory + @@ -151,28 +149,23 @@ true true ../../include - MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;_MBCS;%(PreprocessorDefinitions);NDEBUG + MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions);NDEBUG AssemblyAndSourceCode $(IntDir) false MultiThreadedDLL + Default true true - DllEntry - kernel32.lib;%(AdditionalDependencies) + %(AdditionalDependencies) + Default + DllEntry + false - - - - - - - - @@ -183,33 +176,35 @@ true true ../../include - MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;_MBCS;%(PreprocessorDefinitions);NDEBUG + MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions);NDEBUG AssemblyAndSourceCode $(IntDir) false MultiThreadedDLL + Default true true - DllEntry - kernel32.lib;%(AdditionalDependencies) + %(AdditionalDependencies) + Default + DllEntry + false - - + COPY /Y $(SolutionDir)..\..\bin\mimalloc-redirect.dll $(OutputPath) - - + copy mimalloc-redirect.dll to the output directory + @@ -220,6 +215,12 @@ false + + true + true + true + true + diff --git a/ide/vs2017/mimalloc-override.vcxproj.filters b/ide/vs2017/mimalloc-override.vcxproj.filters index d2892c32..df0bf5ed 100644 --- a/ide/vs2017/mimalloc-override.vcxproj.filters +++ b/ide/vs2017/mimalloc-override.vcxproj.filters @@ -23,6 +23,9 @@ Header Files + + Header Files + @@ -46,9 +49,6 @@ Source Files - - Source Files - Source Files @@ -61,8 +61,14 @@ Source Files + + Source Files + Source Files + + Source Files + - + \ No newline at end of file diff --git a/ide/vs2017/mimalloc-test-stress.vcxproj b/ide/vs2017/mimalloc-test-stress.vcxproj index e8cc5045..b8267d0b 100644 --- a/ide/vs2017/mimalloc-test-stress.vcxproj +++ b/ide/vs2017/mimalloc-test-stress.vcxproj @@ -67,19 +67,19 @@ - $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ + $(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\ $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ - $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ + $(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\ $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ - $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ + $(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\ $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ - $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ + $(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\ $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ diff --git a/ide/vs2017/mimalloc-test.vcxproj b/ide/vs2017/mimalloc-test.vcxproj index 7976af56..27c7bb6e 100644 --- a/ide/vs2017/mimalloc-test.vcxproj +++ b/ide/vs2017/mimalloc-test.vcxproj @@ -67,19 +67,19 @@ - $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ + $(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\ $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ - $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ + $(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\ $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ - $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ + $(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\ $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ - $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ + $(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\ $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ @@ -144,19 +144,14 @@ Console - - - AssemblyAndSourceCode - AssemblyAndSourceCode - AssemblyAndSourceCode - AssemblyAndSourceCode - - {abb5eae7-b3e6-432e-b636-333449892ea6} + + + diff --git a/ide/vs2017/mimalloc-test.vcxproj.filters b/ide/vs2017/mimalloc-test.vcxproj.filters index 9254f6c0..fca75e1c 100644 --- a/ide/vs2017/mimalloc-test.vcxproj.filters +++ b/ide/vs2017/mimalloc-test.vcxproj.filters @@ -15,7 +15,7 @@ - + Source Files diff --git a/ide/vs2017/mimalloc.vcxproj b/ide/vs2017/mimalloc.vcxproj index bb1818b0..3e453471 100644 --- a/ide/vs2017/mimalloc.vcxproj +++ b/ide/vs2017/mimalloc.vcxproj @@ -70,21 +70,25 @@ $(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\ $(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ .lib + mimalloc-static $(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\ $(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ .lib + mimalloc-static $(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\ $(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ .lib + mimalloc-static $(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\ $(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ .lib + mimalloc-static @@ -94,8 +98,9 @@ true ../../include MI_DEBUG=3;%(PreprocessorDefinitions); - Default + CompileAsCpp false + stdcpp17 @@ -112,8 +117,9 @@ true ../../include MI_DEBUG=3;%(PreprocessorDefinitions); - Default + CompileAsCpp false + stdcpp17 @@ -148,7 +154,7 @@ Neither false false - Default + CompileAsCpp true @@ -179,7 +185,7 @@ Neither false false - Default + CompileAsCpp true @@ -211,12 +217,6 @@ true true - - true - true - true - true - true true @@ -243,8 +243,9 @@ - - + + + diff --git a/ide/vs2017/mimalloc.vcxproj.filters b/ide/vs2017/mimalloc.vcxproj.filters index 8bdeccf9..28d94e99 100644 --- a/ide/vs2017/mimalloc.vcxproj.filters +++ b/ide/vs2017/mimalloc.vcxproj.filters @@ -41,9 +41,6 @@ Source Files - - Source Files - Source Files @@ -64,11 +61,14 @@ Header Files - + Header Files - + + Header Files + + Header Files - + \ No newline at end of file diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 26251878..ad9b3ecf 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -35,10 +35,10 @@ bool _mi_is_main_thread(void); uintptr_t _mi_ptr_cookie(const void* p); uintptr_t _mi_random_shuffle(uintptr_t x); uintptr_t _mi_random_init(uintptr_t seed /* can be zero */); +bool _mi_preloading(); // true while the C runtime is not ready // os.c size_t _mi_os_page_size(void); -uintptr_t _mi_align_up(uintptr_t sz, size_t alignment); void _mi_os_init(void); // called from process init void* _mi_os_alloc(size_t size, mi_stats_t* stats); // to allocate thread local data void _mi_os_free(void* p, size_t size, mi_stats_t* stats); // to free thread local data @@ -164,6 +164,20 @@ static inline bool mi_mul_overflow(size_t size, size_t count, size_t* total) { #endif } +// Align upwards +static inline uintptr_t _mi_is_power_of_two(uintptr_t x) { + return ((x & (x - 1)) == 0); +} +static inline uintptr_t _mi_align_up(uintptr_t sz, size_t alignment) { + uintptr_t mask = alignment - 1; + if ((alignment & mask) == 0) { // power of two? + return ((sz + mask) & ~mask); + } + else { + return (((sz + mask)/alignment)*alignment); + } +} + // Align a byte size to a size in _machine words_, // i.e. byte size == `wsize*sizeof(void*)`. static inline size_t _mi_wsize_from_size(size_t size) { @@ -293,13 +307,23 @@ static inline bool mi_page_all_used(mi_page_t* page) { static inline bool mi_page_mostly_used(const mi_page_t* page) { if (page==NULL) return true; uint16_t frac = page->reserved / 8U; - return (page->reserved - page->used + page->thread_freed < frac); + return (page->reserved - page->used + page->thread_freed <= frac); } static inline mi_page_queue_t* mi_page_queue(const mi_heap_t* heap, size_t size) { return &((mi_heap_t*)heap)->pages[_mi_bin(size)]; } +static inline uintptr_t mi_page_thread_id(const mi_page_t* page) { + return (page->flags.xthread_id << MI_PAGE_FLAGS_BITS); +} + +static inline void mi_page_init_flags(mi_page_t* page, uintptr_t thread_id) { + page->flags.value = 0; + page->flags.xthread_id = (thread_id >> MI_PAGE_FLAGS_BITS); + mi_assert(page->flags.value == thread_id); +} + // ------------------------------------------------------------------- // Encoding/Decoding the free list next pointers // ------------------------------------------------------------------- @@ -323,12 +347,23 @@ static inline void mi_block_set_nextx(uintptr_t cookie, mi_block_t* block, mi_bl } static inline mi_block_t* mi_block_next(mi_page_t* page, mi_block_t* block) { + #if MI_SECURE return mi_block_nextx(page->cookie,block); + #else + UNUSED(page); + return mi_block_nextx(0, block); + #endif } static inline void mi_block_set_next(mi_page_t* page, mi_block_t* block, mi_block_t* next) { + #if MI_SECURE mi_block_set_nextx(page->cookie,block,next); + #else + UNUSED(page); + mi_block_set_nextx(0, block, next); + #endif } + // ------------------------------------------------------------------- // Getting the thread id should be performant // as it is called in the fast path of `_mi_free`, diff --git a/include/mimalloc-override.h b/include/mimalloc-override.h new file mode 100644 index 00000000..56b41e6b --- /dev/null +++ b/include/mimalloc-override.h @@ -0,0 +1,110 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018,2019 Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ +#pragma once +#ifndef MIMALLOC_OVERRIDE_H +#define MIMALLOC_OVERRIDE_H + +/* ---------------------------------------------------------------------------- +This header can be used to statically redirect malloc/free and new/delete +to the mimalloc variants. This can be useful if one can include this file on +each source file in a project (but be careful when using external code to +not accidentally mix pointers from different allocators). + +On windows it can still be good to always try to include this header even +when dynamically overriding since this will give better performance especially +for new/delete. On Unix dynamic overriding already includes all variants so +including this header is not necessary. +-----------------------------------------------------------------------------*/ + +#include "mimalloc.h" + +// Standard C allocation +#define malloc(n) mi_malloc(n) +#define calloc(n,c) mi_calloc(n,c) +#define realloc(p,n) mi_realloc(p,n) +#define free(p) mi_free(p) + +#define strdup(s) mi_strdup(s) +#define strndup(s) mi_strndup(s) +#define realpath(f,n) mi_realpath(f,n) + +// Microsoft extensions +#define _expand(p,n) mi_expand(p,n) +#define _msize(p) mi_usable_size(p) +#define _recalloc(p,n,c) mi_recalloc(p,n,c) + +#define _strdup(s) mi_strdup(s) +#define _strndup(s) mi_strndup(s) +#define _wcsdup(s) (wchar_t*)mi_wcsdup((const unsigned short*)(s)) +#define _mbsdup(s) mi_mbsdup(s) +#define _dupenv_s(b,n,v) mi_dupenv_s(b,n,v) +#define _wdupenv_s(b,n,v) mi_wdupenv_s((unsigned short*)(b),n,(const unsigned short*)(v)) + +// Various Posix and Unix variants +#define reallocf(p,n) mi_reallocf(p,n) +#define malloc_size(p) mi_usable_size(p) +#define malloc_usable_size(p) mi_usable_size(p) +#define cfree(p) mi_free(p) + +#define valloc(n) mi_valloc(n) +#define pvalloc(n) mi_pvalloc(n) +#define reallocarray(p,s,n) mi_reallocarray(p,s,n) +#define memalign(a,n) mi_memalign(a,n) +#define aligned_alloc(a,n) mi_aligned_alloc(a,n) +#define posix_memalign(p,a,n) mi_posix_memalign(p,a,n) +#define _posix_memalign(p,a,n) mi_posix_memalign(p,a,n) + +// Microsoft aligned variants +#define _aligned_malloc(n,a) mi_malloc_aligned(n,a) +#define _aligned_realloc(p,n,a) mi_realloc_aligned(p,n,a) +#define _aligned_recalloc(p,s,n,a) mi_aligned_recalloc(p,s,n,a) +#define _aligned_msize(p,a,o) mi_usable_size(p) +#define _aligned_free(p) mi_free(p) +#define _aligned_offset_malloc(n,a,o) mi_malloc_aligned_at(n,a,o) +#define _aligned_offset_realloc(p,n,a,o) mi_realloc_aligned_at(p,n,a,o) +#define _aligned_offset_recalloc(p,s,n,a,o) mi_recalloc_aligned_at(p,s,n,a,o) + + +// ----------------------------------------------------------------- +// With a C++ compiler we can override all the new/delete operators +// by defining 'MIMALLOC_DEFINE_NEW_DELETE' in some source file and +// then including this header file. This is not needed when linking +// statically with the mimalloc library, but it can be more performant +// on Windows when using dynamic overiding as well. +// see +// ----------------------------------------------------------------- +#if defined(__cplusplus) && defined(MIMALLOC_DEFINE_NEW_DELETE) + #include + + void operator delete(void* p) noexcept { mi_free(p); }; + void operator delete[](void* p) noexcept { mi_free(p); }; + + void* operator new(std::size_t n) noexcept(false) { return mi_new(n); } + void* operator new[](std::size_t n) noexcept(false) { return mi_new(n); } + + void* operator new (std::size_t n, const std::nothrow_t& tag) noexcept { (void)(tag); return mi_new_nothrow(n); } + void* operator new[](std::size_t n, const std::nothrow_t& tag) noexcept { (void)(tag); return mi_new_nothrow(n); } + + #if (__cplusplus >= 201402L || _MSC_VER >= 1916) + void operator delete (void* p, std::size_t n) { mi_free_size(p,n); }; + void operator delete[](void* p, std::size_t n) { mi_free_size(p,n); }; + #endif + + #if (__cplusplus > 201402L || defined(__cpp_aligned_new)) + void operator delete (void* p, std::align_val_t al) noexcept { mi_free_aligned(p, static_cast(al)); } + void operator delete[](void* p, std::align_val_t al) noexcept { mi_free_aligned(p, static_cast(al)); } + void operator delete (void* p, std::size_t n, std::align_val_t al) noexcept { mi_free_size_aligned(p, n, static_cast(al)); }; + void operator delete[](void* p, std::size_t n, std::align_val_t al) noexcept { mi_free_size_aligned(p, n, static_cast(al)); }; + + void* operator new( std::size_t n, std::align_val_t al) noexcept(false) { return mi_new_aligned(n, static_cast(al)); } + void* operator new[]( std::size_t n, std::align_val_t al) noexcept(false) { return mi_new_aligned(n, static_cast(al)); } + void* operator new (std::size_t n, std::align_val_t al, const std::nothrow_t&) noexcept { return mi_new_aligned_nothrow(n, static_cast(al)); } + void* operator new[](std::size_t n, std::align_val_t al, const std::nothrow_t&) noexcept { return mi_new_aligned_nothrow(n, static_cast(al)); } + #endif +#endif + +#endif // MIMALLOC_OVERRIDE_H diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index d591ff86..4002c12c 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -8,7 +8,6 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MIMALLOC_TYPES_H #define MIMALLOC_TYPES_H -#include // size_t etc. #include // ptrdiff_t #include // uintptr_t, uint16_t, etc @@ -92,19 +91,19 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_MEDIUM_PAGES_PER_SEGMENT (MI_SEGMENT_SIZE/MI_MEDIUM_PAGE_SIZE) #define MI_LARGE_PAGES_PER_SEGMENT (MI_SEGMENT_SIZE/MI_LARGE_PAGE_SIZE) -#define MI_MEDIUM_SIZE_MAX (MI_MEDIUM_PAGE_SIZE/8) // 64kb on 64-bit -#define MI_LARGE_SIZE_MAX (MI_LARGE_PAGE_SIZE/8) // 512kb on 64-bit +#define MI_MEDIUM_SIZE_MAX (MI_MEDIUM_PAGE_SIZE/4) // 128kb on 64-bit +#define MI_LARGE_SIZE_MAX (MI_LARGE_PAGE_SIZE/4) // 1Mb on 64-bit #define MI_LARGE_WSIZE_MAX (MI_LARGE_SIZE_MAX>>MI_INTPTR_SHIFT) -// Maximum number of size classes. (spaced exponentially in 16.7% increments) -#define MI_BIN_HUGE (64U) - // Minimal alignment necessary. On most platforms 16 bytes are needed // due to SSE registers for example. This must be at least `MI_INTPTR_SIZE` #define MI_MAX_ALIGN_SIZE 16 // sizeof(max_align_t) -#if (MI_LARGE_WSIZE_MAX > 131072) +// Maximum number of size classes. (spaced exponentially in 12.5% increments) +#define MI_BIN_HUGE (70U) + +#if (MI_LARGE_WSIZE_MAX > 393216) #error "define more bins" #endif @@ -124,18 +123,29 @@ typedef enum mi_delayed_e { } mi_delayed_t; +// Use the lowest two bits of a thread id for the `in_full` and `has_aligned` flags +// This allows a single test in `mi_free` to check for unlikely cases +// (namely, non-local free, aligned free, or freeing in a full page) +#define MI_PAGE_FLAGS_BITS (2) +#define MI_PAGE_FLAGS_TID_BITS (MI_INTPTR_SIZE*8 - MI_PAGE_FLAGS_BITS) typedef union mi_page_flags_u { - uint16_t value; + uintptr_t value; struct { - bool has_aligned; - bool in_full; + #ifdef MI_BIG_ENDIAN + uintptr_t xthread_id : MI_PAGE_FLAGS_TID_BITS; + #endif + uintptr_t in_full : 1; + uintptr_t has_aligned : 1; + #ifndef MI_BIG_ENDIAN + uintptr_t xthread_id : MI_PAGE_FLAGS_TID_BITS; + #endif }; } mi_page_flags_t; -// Thread free list. -// We use bottom 2 bits of the pointer for mi_delayed_t flags -typedef uintptr_t mi_thread_free_t; +// Thread free list. +// We use the bottom 2 bits of the pointer for mi_delayed_t flags +typedef uintptr_t mi_thread_free_t; // A page contains blocks of one specific size (`block_size`). // Each page has three list of free blocks: @@ -163,13 +173,15 @@ typedef struct mi_page_s { bool is_committed:1; // `true` if the page virtual memory is committed // layout like this to optimize access in `mi_malloc` and `mi_free` - mi_page_flags_t flags; uint16_t capacity; // number of blocks committed uint16_t reserved; // number of blocks reserved in memory - + // 16 bits padding mi_block_t* free; // list of available free blocks (`malloc` allocates from this list) + #if MI_SECURE uintptr_t cookie; // random cookie to encode the free lists + #endif size_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`) + mi_page_flags_t flags; // threadid:62 | has_aligned:1 | in_full:1 mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`) volatile uintptr_t thread_freed; // at least this number of blocks are in `thread_free` @@ -182,10 +194,10 @@ typedef struct mi_page_s { struct mi_page_s* prev; // previous page owned by this thread with the same `block_size` // improve page index calculation -#if MI_INTPTR_SIZE==8 - //void* padding[1]; // 10 words on 64-bit +#if (MI_INTPTR_SIZE==8 && MI_SECURE==0) + void* padding[1]; // 12 words on 64-bit #elif MI_INTPTR_SIZE==4 - void* padding[1]; // 12 words on 32-bit + // void* padding[1]; // 12 words on 32-bit #endif } mi_page_t; @@ -215,7 +227,7 @@ typedef struct mi_segment_s { // layout like this to optimize access in `mi_free` size_t page_shift; // `1 << page_shift` == the page sizes == `page->block_size * page->reserved` (unless the first page, then `-segment_info_size`). - uintptr_t thread_id; // unique id of the thread owning this segment + volatile uintptr_t thread_id; // unique id of the thread owning this segment mi_page_kind_t page_kind; // kind of pages: small, large, or huge mi_page_t pages[1]; // up to `MI_SMALL_PAGES_PER_SEGMENT` pages } mi_segment_t; @@ -324,12 +336,12 @@ typedef struct mi_stats_s { mi_stat_count_t pages_abandoned; mi_stat_count_t pages_extended; mi_stat_count_t mmap_calls; - mi_stat_count_t mmap_right_align; - mi_stat_count_t mmap_ensure_aligned; mi_stat_count_t commit_calls; mi_stat_count_t threads; mi_stat_count_t huge; mi_stat_count_t malloc; + mi_stat_count_t segments_cache; + mi_stat_counter_t page_no_retire; mi_stat_counter_t searches; #if MI_STAT>1 mi_stat_count_t normal[MI_BIN_HUGE+1]; diff --git a/include/mimalloc.h b/include/mimalloc.h index 95ba82e5..c6b7b5f8 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -52,8 +52,8 @@ terms of the MIT license. A copy of the license can be found in the file #define mi_attr_alloc_size2(s1,s2) #else #define mi_attr_alloc_size(s) __attribute__((alloc_size(s))) - #define mi_attr_alloc_size2(s1,s2) __attribute__((alloc_size(s1,s2))) - #define mi_cdecl // leads to warnings... __attribute__((cdecl)) + #define mi_attr_alloc_size2(s1,s2) __attribute__((alloc_size(s1,s2))) + #define mi_cdecl // leads to warnings... __attribute__((cdecl)) #endif #else #define mi_decl_thread __thread @@ -62,14 +62,13 @@ terms of the MIT license. A copy of the license can be found in the file #define mi_attr_malloc #define mi_attr_alloc_size(s) #define mi_attr_alloc_size2(s1,s2) - #define mi_cdecl + #define mi_cdecl #endif // ------------------------------------------------------ // Includes // ------------------------------------------------------ -#include // size_t, malloc etc. #include // bool #include // FILE @@ -197,6 +196,7 @@ mi_decl_export bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_b mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept; + // ------------------------------------------------------ // Convenience // ------------------------------------------------------ @@ -245,14 +245,15 @@ mi_decl_export void mi_option_set(mi_option_t option, long value); mi_decl_export void mi_option_set_default(mi_option_t option, long value); -// ---------------------------------------------------------------------------------- -// mi prefixed implementations of various posix, unix, and C++ allocation functions. -// ----------------------------------------------------------------------------------- +// ------------------------------------------------------------------------------------------------------- +// "mi" prefixed implementations of various posix, Unix, Windows, and C++ allocation functions. +// (This can be convenient when providing overrides of these functions as done in `mimalloc-override.h`.) +// ------------------------------------------------------------------------------------------------------- -mi_decl_export void* mi_recalloc(void* p, size_t count, size_t size) mi_attr_noexcept; mi_decl_export size_t mi_malloc_size(const void* p) mi_attr_noexcept; mi_decl_export size_t mi_malloc_usable_size(const void *p) mi_attr_noexcept; mi_decl_export void mi_cfree(void* p) mi_attr_noexcept; +mi_decl_export void* mi__expand(void* p, size_t newsize) mi_attr_noexcept; mi_decl_export int mi_posix_memalign(void** p, size_t alignment, size_t size) mi_attr_noexcept; mi_decl_export int mi__posix_memalign(void** p, size_t alignment, size_t size) mi_attr_noexcept; @@ -263,6 +264,15 @@ mi_decl_export mi_decl_allocator void* mi_pvalloc(size_t size) mi_attr_noexcept mi_decl_export mi_decl_allocator void* mi_aligned_alloc(size_t alignment, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); mi_decl_export mi_decl_allocator void* mi_reallocarray(void* p, size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2,3); +mi_decl_export void* mi_recalloc(void* p, size_t count, size_t size) mi_attr_noexcept; +mi_decl_export void* mi_aligned_recalloc(void* p, size_t size, size_t newcount, size_t alignment) mi_attr_noexcept; +mi_decl_export void* mi_aligned_offset_recalloc(void* p, size_t size, size_t newcount, size_t alignment, size_t offset) mi_attr_noexcept; + +mi_decl_export unsigned short* mi_wcsdup(const unsigned short* s) mi_attr_noexcept; +mi_decl_export unsigned char* mi_mbsdup(const unsigned char* s) mi_attr_noexcept; +mi_decl_export int mi_dupenv_s(char** buf, size_t* size, const char* name) mi_attr_noexcept; +mi_decl_export int mi_wdupenv_s(unsigned short** buf, size_t* size, const unsigned short* name) mi_attr_noexcept; + mi_decl_export void mi_free_size(void* p, size_t size) mi_attr_noexcept; mi_decl_export void mi_free_size_aligned(void* p, size_t size, size_t alignment) mi_attr_noexcept; mi_decl_export void mi_free_aligned(void* p, size_t alignment) mi_attr_noexcept; diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index 3ef93c83..2f44f317 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc.h" #include "mimalloc-internal.h" -#include // memset +#include // memset, memcpy // ------------------------------------------------------ // Aligned Allocation @@ -150,3 +150,14 @@ void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t of void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept { return mi_heap_realloc_aligned(mi_get_default_heap(), p, newsize, alignment); } + +void* mi_aligned_offset_recalloc(void* p, size_t size, size_t newcount, size_t alignment, size_t offset) mi_attr_noexcept { + size_t newsize; + if (mi_mul_overflow(size,newcount,&newsize)) return NULL; + return mi_heap_realloc_zero_aligned_at(mi_get_default_heap(), p, newsize, alignment, offset, true ); +} +void* mi_aligned_recalloc(void* p, size_t size, size_t newcount, size_t alignment) mi_attr_noexcept { + size_t newsize; + if (mi_mul_overflow(size, newcount, &newsize)) return NULL; + return mi_heap_realloc_zero_aligned(mi_get_default_heap(), p, newsize, alignment, true ); +} diff --git a/src/alloc-override-win.c b/src/alloc-override-win.c index f0a5959a..d1d51b9a 100644 --- a/src/alloc-override-win.c +++ b/src/alloc-override-win.c @@ -15,6 +15,9 @@ terms of the MIT license. A copy of the license can be found in the file #include #include +#include // getenv +#include // strstr + /* To override the C runtime `malloc` on Windows we need to patch the allocation @@ -98,11 +101,6 @@ static int __cdecl mi_setmaxstdio(int newmax); // Microsoft allocation extensions // ------------------------------------------------------ -static void* mi__expand(void* p, size_t newsize) { - void* res = mi_expand(p, newsize); - if (res == NULL) errno = ENOMEM; - return res; -} typedef size_t mi_nothrow_t; @@ -148,8 +146,6 @@ static size_t mi__msize_term(void* p) { } -// Debug versions, forward to base versions (that get patched) - static void* mi__malloc_dbg(size_t size, int block_type, const char* fname, int line) { UNUSED(block_type); UNUSED(fname); UNUSED(line); return _malloc_base(size); @@ -579,7 +575,7 @@ static void mi_module_resolve(const char* fname, HMODULE mod, int priority) { if (addr != NULL) { // found it! set the address patch->originals[i] = addr; - _mi_trace_message(" override %s at %s!%p (entry %i)\n", patch->name, fname, addr, i); + _mi_trace_message(" found %s at %s!%p (entry %i)\n", patch->name, fname, addr, i); } } } @@ -606,7 +602,6 @@ static bool mi_patches_resolve(void) { int ucrtbase_index = 0; int mimalloc_index = 0; // iterate through the loaded modules - _mi_trace_message("overriding malloc dynamically...\n"); for (int i = 0; i < count; i++) { HMODULE mod = modules[i]; char filename[MAX_PATH] = { 0 }; @@ -680,27 +675,39 @@ __declspec(dllexport) BOOL WINAPI DllEntry(HINSTANCE inst, DWORD reason, LPVOID mi_patches_enable_term(); } // C runtime main - BOOL ok = _DllMainCRTStartup(inst, reason, reserved); + BOOL ok = _DllMainCRTStartup(inst, reason, reserved); if (reason == DLL_PROCESS_ATTACH && ok) { + // initialize at exit lists + mi_initialize_atexit(); + // Now resolve patches ok = mi_patches_resolve(); if (ok) { - // and register our unwind entry (this must be after resolving due to possible delayed DLL initialization from GetProcAddress) - mi_fls_unwind_entry = FlsAlloc(&mi_fls_unwind); - if (mi_fls_unwind_entry != FLS_OUT_OF_INDEXES) { - FlsSetValue(mi_fls_unwind_entry, (void*)1); + // check if patching is not disabled + #pragma warning(suppress:4996) + const char* s = getenv("MIMALLOC_DISABLE_OVERRIDE"); + bool enabled = (s == NULL || !(strstr("1;TRUE;YES;ON", s) != NULL)); + if (!enabled) { + _mi_verbose_message("override is disabled\n"); } + else { + // and register our unwind entry (this must be after resolving due to possible delayed DLL initialization from GetProcAddress) + mi_fls_unwind_entry = FlsAlloc(&mi_fls_unwind); + if (mi_fls_unwind_entry != FLS_OUT_OF_INDEXES) { + FlsSetValue(mi_fls_unwind_entry, (void*)1); + } - // register our patch disabler in the global exit list - mi_initialize_atexit(); - if (crt_atexit != NULL) (*crt_atexit)(&mi_patches_atexit); - if (crt_at_quick_exit != NULL) (*crt_at_quick_exit)(&mi_patches_at_quick_exit); + // register our patch disabler in the global exit list + if (crt_atexit != NULL) (*crt_atexit)(&mi_patches_atexit); + if (crt_at_quick_exit != NULL) (*crt_at_quick_exit)(&mi_patches_at_quick_exit); - // and patch ! this also redirects the `atexit` handling for the global exit list - mi_patches_enable(); + // and patch ! this also redirects the `atexit` handling for the global exit list + mi_patches_enable(); + _mi_verbose_message("override is enabled\n"); - // hide internal allocation - mi_stats_reset(); + // hide internal allocation + mi_stats_reset(); + } } } return ok; diff --git a/src/alloc-override.c b/src/alloc-override.c index 5ca88af7..e5eeaab2 100644 --- a/src/alloc-override.c +++ b/src/alloc-override.c @@ -10,7 +10,7 @@ terms of the MIT license. A copy of the license can be found in the file #endif #if defined(MI_MALLOC_OVERRIDE) && defined(_WIN32) && !(defined(MI_SHARED_LIB) && defined(_DLL)) -#error "It is only possible to override malloc on Windows when building as a DLL (and linking the C runtime as a DLL)" +#error "It is only possible to override "malloc" on Windows when building as a 64-bit DLL (and linking the C runtime as a DLL)" #endif #if defined(MI_MALLOC_OVERRIDE) && !defined(_WIN32) @@ -19,10 +19,6 @@ terms of the MIT license. A copy of the license can be found in the file // Override system malloc // ------------------------------------------------------ -#if defined(_MSC_VER) -#pragma warning(disable:4273) // inconsistent dll linking -#endif - #if (defined(__GNUC__) || defined(__clang__)) && !defined(__MACH__) // use aliasing to alias the exported function to one of our `mi_` functions #if (defined(__GNUC__) && __GNUC__ >= 9) @@ -62,6 +58,9 @@ terms of the MIT license. A copy of the license can be found in the file MI_INTERPOSE_MI(strdup), MI_INTERPOSE_MI(strndup) }; +#elif defined(_MSC_VER) + // cannot override malloc unless using a dll. + // we just override new/delete which does work in a static library. #else // On all other systems forward to our API void* malloc(size_t size) mi_attr_noexcept MI_FORWARD1(mi_malloc, size); @@ -94,7 +93,7 @@ terms of the MIT license. A copy of the license can be found in the file void* operator new (std::size_t n, const std::nothrow_t& tag) noexcept { UNUSED(tag); return mi_new_nothrow(n); } void* operator new[](std::size_t n, const std::nothrow_t& tag) noexcept { UNUSED(tag); return mi_new_nothrow(n); } - #if (__cplusplus >= 201402L) + #if (__cplusplus >= 201402L || _MSC_VER >= 1916) void operator delete (void* p, std::size_t n) MI_FORWARD02(mi_free_size,p,n); void operator delete[](void* p, std::size_t n) MI_FORWARD02(mi_free_size,p,n); #endif @@ -194,4 +193,5 @@ int posix_memalign(void** p, size_t alignment, size_t size) { return mi_posix_me #pragma GCC visibility pop #endif -#endif // MI_MALLOC_OVERRIDE & !_WIN32 +#endif // MI_MALLOC_OVERRIDE && !_WIN32 + diff --git a/src/alloc-posix.c b/src/alloc-posix.c index b3185f15..672b73b3 100644 --- a/src/alloc-posix.c +++ b/src/alloc-posix.c @@ -18,6 +18,8 @@ terms of the MIT license. A copy of the license can be found in the file // ------------------------------------------------------ #include +#include // memcpy +#include // getenv #ifndef EINVAL #define EINVAL 22 @@ -36,7 +38,9 @@ size_t mi_malloc_usable_size(const void *p) mi_attr_noexcept { } void mi_cfree(void* p) mi_attr_noexcept { - mi_free(p); + if (mi_is_in_heap_region(p)) { + mi_free(p); + } } int mi_posix_memalign(void** p, size_t alignment, size_t size) mi_attr_noexcept { @@ -80,3 +84,68 @@ void* mi_reallocarray( void* p, size_t count, size_t size ) mi_attr_noexcept { return newp; } +void* mi__expand(void* p, size_t newsize) mi_attr_noexcept { // Microsoft + void* res = mi_expand(p, newsize); + if (res == NULL) errno = ENOMEM; + return res; +} + +void* mi_recalloc(void* p, size_t count, size_t size) mi_attr_noexcept { // Microsoft + size_t total; + if (mi_mul_overflow(count, size, &total)) return NULL; + return _mi_heap_realloc_zero(mi_get_default_heap(), p, total, true); +} + +unsigned short* mi_wcsdup(const unsigned short* s) mi_attr_noexcept { + if (s==NULL) return NULL; + size_t len; + for(len = 0; s[len] != 0; len++) { } + size_t size = (len+1)*sizeof(unsigned short); + unsigned short* p = (unsigned short*)mi_malloc(size); + if (p != NULL) { + memcpy(p,s,size); + } + return p; +} + +unsigned char* mi_mbsdup(const unsigned char* s) mi_attr_noexcept { + return (unsigned char*)mi_strdup((const char*)s); +} + +int mi_dupenv_s(char** buf, size_t* size, const char* name) mi_attr_noexcept { + if (buf==NULL || name==NULL) return EINVAL; + if (size != NULL) *size = 0; + #pragma warning(suppress:4996) + char* p = getenv(name); + if (p==NULL) { + *buf = NULL; + } + else { + *buf = mi_strdup(p); + if (*buf==NULL) return ENOMEM; + if (size != NULL) *size = strlen(p); + } + return 0; +} + +int mi_wdupenv_s(unsigned short** buf, size_t* size, const unsigned short* name) mi_attr_noexcept { + if (buf==NULL || name==NULL) return EINVAL; + if (size != NULL) *size = 0; +#if !defined(_WIN32) || (defined(WINAPI_FAMILY) && (WINAPI_FAMILY != WINAPI_FAMILY_DESKTOP_APP)) + // not supported + *buf = NULL; + return EINVAL; +#else + #pragma warning(suppress:4996) + unsigned short* p = (unsigned short*)_wgetenv((const wchar_t*)name); + if (p==NULL) { + *buf = NULL; + } + else { + *buf = mi_wcsdup(p); + if (*buf==NULL) return ENOMEM; + if (size != NULL) *size = wcslen((const wchar_t*)p); + } + return 0; +#endif +} diff --git a/src/alloc.c b/src/alloc.c index da8c69b9..bfb37d19 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -8,7 +8,8 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc-internal.h" #include "mimalloc-atomic.h" -#include // memset +#include // memset, memcpy, strlen +#include // malloc, exit #define MI_IN_ALLOC_C #include "alloc-override.c" @@ -56,6 +57,7 @@ extern inline void* mi_malloc_small(size_t size) mi_attr_noexcept { return mi_heap_malloc_small(mi_get_default_heap(), size); } + // zero initialized small block void* mi_zalloc_small(size_t size) mi_attr_noexcept { void* p = mi_malloc_small(size); @@ -70,7 +72,7 @@ extern inline void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcep void* p; if (mi_likely(size <= MI_SMALL_SIZE_MAX)) { p = mi_heap_malloc_small(heap, size); - } + } else { p = _mi_malloc_generic(heap, size); } @@ -198,28 +200,31 @@ static void mi_decl_noinline mi_free_generic(const mi_segment_t* segment, mi_pag // Free a block void mi_free(void* p) mi_attr_noexcept -{ - // optimize: merge null check with the segment masking (below) - //if (p == NULL) return; - +{ #if (MI_DEBUG>0) if (mi_unlikely(((uintptr_t)p & (MI_INTPTR_SIZE - 1)) != 0)) { _mi_error_message("trying to free an invalid (unaligned) pointer: %p\n", p); return; } #endif - + const mi_segment_t* const segment = _mi_ptr_segment(p); - if (segment == NULL) return; // checks for (p==NULL) - bool local = (_mi_thread_id() == segment->thread_id); // preload, note: putting the thread_id in the page->flags does not improve performance + if (segment == NULL) return; // checks for (p==NULL) #if (MI_DEBUG>0) + if (mi_unlikely(!mi_is_in_heap_region(p))) { + _mi_warning_message("possibly trying to mi_free a pointer that does not point to a valid heap region: 0x%p\n" + "(this may still be a valid very large allocation (over 64MiB))\n", p); + if (mi_likely(_mi_ptr_cookie(segment) == segment->cookie)) { + _mi_warning_message("(yes, the previous pointer 0x%p was valid after all)\n", p); + } + } if (mi_unlikely(_mi_ptr_cookie(segment) != segment->cookie)) { _mi_error_message("trying to mi_free a pointer that does not point to a valid heap space: %p\n", p); return; } #endif - + mi_page_t* page = _mi_segment_page_of(segment, p); #if (MI_STAT>1) @@ -231,24 +236,18 @@ void mi_free(void* p) mi_attr_noexcept // huge page stat is accounted for in `_mi_page_retire` #endif - // adjust if it might be an un-aligned block - if (mi_likely(page->flags.value==0)) { // note: merging both tests (local | value) does not matter for performance + uintptr_t tid = _mi_thread_id(); + if (mi_likely(tid == page->flags.value)) { + // local, and not full or aligned mi_block_t* block = (mi_block_t*)p; - if (mi_likely(local)) { - // owning thread can free a block directly - mi_block_set_next(page, block, page->local_free); // note: moving this write earlier does not matter for performance - page->local_free = block; - page->used--; - if (mi_unlikely(mi_page_all_free(page))) { _mi_page_retire(page); } - } - else { - // use atomic operations for a multi-threaded free - _mi_free_block_mt(page, block); - } + mi_block_set_next(page, block, page->local_free); + page->local_free = block; + page->used--; + if (mi_unlikely(mi_page_all_free(page))) { _mi_page_retire(page); } } else { - // aligned blocks, or a full page; use the more generic path - mi_free_generic(segment, page, local, p); + // non-local, aligned blocks, or a full page; use the more generic path + mi_free_generic(segment, page, tid == mi_page_thread_id(page), p); } } @@ -393,12 +392,6 @@ void* mi_realloc(void* p, size_t newsize) mi_attr_noexcept { return mi_heap_realloc(mi_get_default_heap(),p,newsize); } -void* mi_recalloc(void* p, size_t count, size_t size) mi_attr_noexcept { - size_t total; - if (mi_mul_overflow(count, size, &total)) return NULL; - return _mi_heap_realloc_zero(mi_get_default_heap(),p,total,true); -} - void* mi_reallocn(void* p, size_t count, size_t size) mi_attr_noexcept { return mi_heap_reallocn(mi_get_default_heap(),p,count,size); } @@ -467,7 +460,7 @@ char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) } } #else -#include +#include // pathconf static size_t mi_path_max() { static size_t path_max = 0; if (path_max <= 0) { @@ -537,6 +530,7 @@ std_new_handler_t mi_get_new_handler() { return _ZSt15get_new_handlerv(); } #else +// note: on windows we could dynamically link to `?get_new_handler@std@@YAP6AXXZXZ`. std_new_handler_t mi_get_new_handler() { return NULL; } diff --git a/src/heap.c b/src/heap.c index b1c62491..63954b3b 100644 --- a/src/heap.c +++ b/src/heap.c @@ -172,7 +172,7 @@ void mi_collect(bool force) mi_attr_noexcept { ----------------------------------------------------------- */ mi_heap_t* mi_heap_get_default(void) { - mi_thread_init(); + mi_thread_init(); return mi_get_default_heap(); } diff --git a/src/init.c b/src/init.c index f55b7318..f807d74a 100644 --- a/src/init.c +++ b/src/init.c @@ -7,17 +7,21 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc.h" #include "mimalloc-internal.h" -#include // memcpy +#include // memcpy, memset +#include // atexit // Empty page used to initialize the small free pages array const mi_page_t _mi_page_empty = { - 0, false, false, false, {0}, - 0, 0, - NULL, 0, 0, // free, used, cookie + 0, false, false, false, 0, 0, + NULL, // free + #if MI_SECURE + 0, + #endif + 0, {0}, // used, flags NULL, 0, 0, 0, NULL, NULL, NULL - #if (MI_INTPTR_SIZE==4) - , { NULL } + #if (MI_INTPTR_SIZE==8 && MI_SECURE==0) + , { NULL } #endif }; @@ -30,22 +34,23 @@ const mi_page_t _mi_page_empty = { #define QNULL(sz) { NULL, NULL, (sz)*sizeof(uintptr_t) } #define MI_PAGE_QUEUES_EMPTY \ { QNULL(1), \ - QNULL(1), QNULL(2), QNULL(3), QNULL(4), QNULL(5), QNULL(6), QNULL(7), QNULL(8), \ - QNULL(10), QNULL(12), QNULL(14), QNULL(16), QNULL(20), QNULL(24), QNULL(28), QNULL(32), \ - QNULL(40), QNULL(48), QNULL(56), QNULL(64), QNULL(80), QNULL(96), QNULL(112), QNULL(128), \ - QNULL(160), QNULL(192), QNULL(224), QNULL(256), QNULL(320), QNULL(384), QNULL(448), QNULL(512), \ - QNULL(640), QNULL(768), QNULL(896), QNULL(1024), QNULL(1280), QNULL(1536), QNULL(1792), QNULL(2048), \ - QNULL(2560), QNULL(3072), QNULL(3584), QNULL(4096), QNULL(5120), QNULL(6144), QNULL(7168), QNULL(8192), \ - QNULL(10240), QNULL(12288), QNULL(14336), QNULL(16384), QNULL(20480), QNULL(24576), QNULL(28672), QNULL(32768), \ - QNULL(40960), QNULL(49152), QNULL(57344), QNULL(65536), QNULL(81920), QNULL(98304), QNULL(114688), \ - QNULL(MI_LARGE_WSIZE_MAX + 1 /*131072, Huge queue */), \ + QNULL( 1), QNULL( 2), QNULL( 3), QNULL( 4), QNULL( 5), QNULL( 6), QNULL( 7), QNULL( 8), /* 8 */ \ + QNULL( 10), QNULL( 12), QNULL( 14), QNULL( 16), QNULL( 20), QNULL( 24), QNULL( 28), QNULL( 32), /* 16 */ \ + QNULL( 40), QNULL( 48), QNULL( 56), QNULL( 64), QNULL( 80), QNULL( 96), QNULL( 112), QNULL( 128), /* 24 */ \ + QNULL( 160), QNULL( 192), QNULL( 224), QNULL( 256), QNULL( 320), QNULL( 384), QNULL( 448), QNULL( 512), /* 32 */ \ + QNULL( 640), QNULL( 768), QNULL( 896), QNULL( 1024), QNULL( 1280), QNULL( 1536), QNULL( 1792), QNULL( 2048), /* 40 */ \ + QNULL( 2560), QNULL( 3072), QNULL( 3584), QNULL( 4096), QNULL( 5120), QNULL( 6144), QNULL( 7168), QNULL( 8192), /* 48 */ \ + QNULL( 10240), QNULL( 12288), QNULL( 14336), QNULL( 16384), QNULL( 20480), QNULL( 24576), QNULL( 28672), QNULL( 32768), /* 56 */ \ + QNULL( 40960), QNULL( 49152), QNULL( 57344), QNULL( 65536), QNULL( 81920), QNULL( 98304), QNULL(114688), QNULL(131072), /* 64 */ \ + QNULL(163840), QNULL(196608), QNULL(229376), QNULL(262144), QNULL(327680), /* 69 */ \ + QNULL(MI_LARGE_WSIZE_MAX + 1 /* 393216, Huge queue */), \ QNULL(MI_LARGE_WSIZE_MAX + 2) /* Full queue */ } #define MI_STAT_COUNT_NULL() {0,0,0,0} // Empty statistics #if MI_STAT>1 -#define MI_STAT_COUNT_END_NULL() , { MI_STAT_COUNT_NULL(), MI_INIT64(MI_STAT_COUNT_NULL) } +#define MI_STAT_COUNT_END_NULL() , { MI_STAT_COUNT_NULL(), MI_INIT32(MI_STAT_COUNT_NULL) } #else #define MI_STAT_COUNT_END_NULL() #endif @@ -58,7 +63,8 @@ const mi_page_t _mi_page_empty = { MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ - MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ + MI_STAT_COUNT_NULL(), \ + { 0, 0 }, \ { 0, 0 } \ MI_STAT_COUNT_END_NULL() @@ -92,8 +98,8 @@ static mi_tld_t tld_main = { 0, &_mi_heap_main, { { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats }, // segments - { 0, NULL, NULL, 0, tld_main_stats }, // os - { MI_STATS_NULL } // stats + { 0, NULL, NULL, 0, tld_main_stats }, // os + { MI_STATS_NULL } // stats }; mi_heap_t _mi_heap_main = { @@ -373,6 +379,53 @@ void mi_thread_done(void) mi_attr_noexcept { // -------------------------------------------------------- static void mi_process_done(void); +static bool os_preloading = true; // true until this module is initialized +static bool mi_redirected = false; // true if malloc redirects to mi_malloc + +// Returns true if this module has not been initialized; Don't use C runtime routines until it returns false. +bool _mi_preloading() { + return os_preloading; +} + +// Communicate with the redirection module on Windows +#if 0 +#ifdef __cplusplus +extern "C" { +#endif +mi_decl_export void _mi_redirect_init() { + // called on redirection + mi_redirected = true; +} +__declspec(dllimport) bool mi_allocator_init(const char** message); +__declspec(dllimport) void mi_allocator_done(); +#ifdef __cplusplus +} +#endif +#else +static bool mi_allocator_init(const char** message) { + if (message != NULL) *message = NULL; + return true; +} +static void mi_allocator_done() { + // nothing to do +} +#endif + +// Called once by the process loader +static void mi_process_load(void) { + os_preloading = false; + atexit(&mi_process_done); + mi_process_init(); + //mi_stats_reset(); + if (mi_redirected) _mi_verbose_message("malloc is redirected.\n"); + + // show message from the redirector (if present) + const char* msg = NULL; + mi_allocator_init(&msg); + if (msg != NULL) _mi_verbose_message(msg); +} + +// Initialize the process; called by thread_init or the process loader void mi_process_init(void) mi_attr_noexcept { // ensure we are called once if (_mi_process_is_initialized) return; @@ -381,7 +434,7 @@ void mi_process_init(void) mi_attr_noexcept { // when using dynamic linking with interpose. mi_heap_t* h = _mi_heap_default; _mi_process_is_initialized = true; - + _mi_heap_main.thread_id = _mi_thread_id(); _mi_verbose_message("process init: 0x%zx\n", _mi_heap_main.thread_id); uintptr_t random = _mi_random_init(_mi_heap_main.thread_id) ^ (uintptr_t)h; @@ -389,15 +442,16 @@ void mi_process_init(void) mi_attr_noexcept { _mi_heap_main.cookie = (uintptr_t)&_mi_heap_main ^ random; #endif _mi_heap_main.random = _mi_random_shuffle(random); + mi_process_setup_auto_thread_done(); + _mi_os_init(); #if (MI_DEBUG) _mi_verbose_message("debug level : %d\n", MI_DEBUG); #endif - atexit(&mi_process_done); - mi_process_setup_auto_thread_done(); - mi_stats_reset(); - _mi_os_init(); + mi_thread_init(); + mi_stats_reset(); // only call stat reset *after* thread init (or the heap tld == NULL) } +// Called when the process is done (through `at_exit`) static void mi_process_done(void) { // only shutdown if we were initialized if (!_mi_process_is_initialized) return; @@ -413,7 +467,9 @@ static void mi_process_done(void) { mi_option_is_enabled(mi_option_verbose)) { mi_stats_print(NULL); } + mi_allocator_done(); _mi_verbose_message("process done: 0x%zx\n", _mi_heap_main.thread_id); + os_preloading = true; // don't call the C runtime anymore } @@ -425,8 +481,8 @@ static void mi_process_done(void) { __declspec(dllexport) BOOL WINAPI DllMain(HINSTANCE inst, DWORD reason, LPVOID reserved) { UNUSED(reserved); UNUSED(inst); - if (reason==DLL_PROCESS_ATTACH) { - mi_process_init(); + if (reason==DLL_PROCESS_ATTACH) { + mi_process_load(); } else if (reason==DLL_THREAD_DETACH) { mi_thread_done(); @@ -437,7 +493,7 @@ static void mi_process_done(void) { #elif defined(__cplusplus) // C++: use static initialization to detect process start static bool _mi_process_init(void) { - mi_process_init(); + mi_process_load(); return (_mi_heap_main.thread_id != 0); } static bool mi_initialized = _mi_process_init(); @@ -445,14 +501,14 @@ static void mi_process_done(void) { #elif defined(__GNUC__) || defined(__clang__) // GCC,Clang: use the constructor attribute static void __attribute__((constructor)) _mi_process_init(void) { - mi_process_init(); + mi_process_load(); } #elif defined(_MSC_VER) // MSVC: use data section magic for static libraries // See static int _mi_process_init(void) { - mi_process_init(); + mi_process_load(); return 0; } typedef int(*_crt_cb)(void); @@ -467,5 +523,5 @@ static void mi_process_done(void) { #pragma data_seg() #else -#pragma message("define a way to call mi_process_init/done on your platform") +#pragma message("define a way to call mi_process_load on your platform") #endif diff --git a/src/memory.c b/src/memory.c index 83e90b0d..7f8cfb14 100644 --- a/src/memory.c +++ b/src/memory.c @@ -105,7 +105,8 @@ static size_t mi_good_commit_size(size_t size) { } // Return if a pointer points into a region reserved by us. -bool mi_is_in_heap_region(const void* p) { +bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { + if (p==NULL) return false; size_t count = mi_atomic_read(®ions_count); for (size_t i = 0; i < count; i++) { uint8_t* start = (uint8_t*)mi_atomic_read_ptr(®ions[i].start); @@ -114,6 +115,7 @@ bool mi_is_in_heap_region(const void* p) { return false; } + /* ---------------------------------------------------------------------------- Commit from a region -----------------------------------------------------------------------------*/ diff --git a/src/options.c b/src/options.c index bc658ca9..cd7e5da1 100644 --- a/src/options.c +++ b/src/options.c @@ -6,9 +6,11 @@ terms of the MIT license. A copy of the license can be found in the file -----------------------------------------------------------------------------*/ #include "mimalloc.h" #include "mimalloc-internal.h" +#include "mimalloc-atomic.h" #include -#include // strcmp +#include // strtol +#include // strncpy, strncat, strlen, strstr #include // toupper #include @@ -16,6 +18,10 @@ int mi_version(void) mi_attr_noexcept { return MI_MALLOC_VERSION; } +#ifdef _WIN32 +#include +#endif + // -------------------------------------------------------- // Options // -------------------------------------------------------- @@ -102,16 +108,37 @@ void mi_option_enable_default(mi_option_t option, bool enable) { // -------------------------------------------------------- // Messages // -------------------------------------------------------- +#define MAX_ERROR_COUNT (10) +static uintptr_t error_count = 0; // when MAX_ERROR_COUNT stop emitting errors and warnings + +// When overriding malloc, we may recurse into mi_vfprintf if an allocation +// inside the C runtime causes another message. +static mi_decl_thread bool recurse = false; // Define our own limited `fprintf` that avoids memory allocation. // We do this using `snprintf` with a limited buffer. static void mi_vfprintf( FILE* out, const char* prefix, const char* fmt, va_list args ) { char buf[256]; if (fmt==NULL) return; + if (_mi_preloading() || recurse) return; + recurse = true; if (out==NULL) out = stdout; vsnprintf(buf,sizeof(buf)-1,fmt,args); - if (prefix != NULL) fputs(prefix,out); - fputs(buf,out); + #ifdef _WIN32 + // on windows with redirection, the C runtime cannot handle locale dependent output + // after the main thread closes so use direct console output. + if (out==stderr) { + if (prefix != NULL) _cputs(prefix); + _cputs(buf); + } + else + #endif + { + if (prefix != NULL) fputs(prefix,out); + fputs(buf,out); + } + recurse = false; + return; } void _mi_fprintf( FILE* out, const char* fmt, ... ) { @@ -139,6 +166,7 @@ void _mi_verbose_message(const char* fmt, ...) { void _mi_error_message(const char* fmt, ...) { if (!mi_option_is_enabled(mi_option_show_errors) && !mi_option_is_enabled(mi_option_verbose)) return; + if (mi_atomic_increment(&error_count) > MAX_ERROR_COUNT) return; va_list args; va_start(args,fmt); mi_vfprintf(stderr, "mimalloc: error: ", fmt, args); @@ -148,6 +176,7 @@ void _mi_error_message(const char* fmt, ...) { void _mi_warning_message(const char* fmt, ...) { if (!mi_option_is_enabled(mi_option_show_errors) && !mi_option_is_enabled(mi_option_verbose)) return; + if (mi_atomic_increment(&error_count) > MAX_ERROR_COUNT) return; va_list args; va_start(args,fmt); mi_vfprintf(stderr, "mimalloc: warning: ", fmt, args); @@ -179,28 +208,64 @@ static void mi_strlcat(char* dest, const char* src, size_t dest_size) { dest[dest_size - 1] = 0; } -static void mi_option_init(mi_option_desc_t* desc) { - desc->init = DEFAULTED; - // Read option value from the environment - char buf[32]; - mi_strlcpy(buf, "mimalloc_", sizeof(buf)); - mi_strlcat(buf, desc->name, sizeof(buf)); - #pragma warning(suppress:4996) - char* s = getenv(buf); - if (s == NULL) { - size_t buf_size = strlen(buf); - for (size_t i = 0; i < buf_size; i++) { - buf[i] = toupper(buf[i]); +#if defined _WIN32 +// On Windows use GetEnvironmentVariable instead of getenv to work +// reliably even when this is invoked before the C runtime is initialized. +// i.e. when `_mi_preloading() == true`. +#include +static bool mi_getenv(const char* name, char* result, size_t result_size) { + result[0] = 0; + bool ok = (GetEnvironmentVariableA(name, result, (DWORD)result_size) > 0); + if (!ok) { + char buf[64+1]; + size_t len = strlen(name); + if (len >= sizeof(buf)) len = sizeof(buf) - 1; + for (size_t i = 0; i < len; i++) { + buf[i] = toupper(name[i]); } + buf[len] = 0; + ok = (GetEnvironmentVariableA(name, result, (DWORD)result_size) > 0); + } + return ok; +} +#else +static bool mi_getenv(const char* name, char* result, size_t result_size) { + #pragma warning(suppress:4996) + const char* s = getenv(name); + if (s == NULL) { + char buf[64+1]; + size_t len = strlen(name); + if (len >= sizeof(buf)) len = sizeof(buf) - 1; + for (size_t i = 0; i < len; i++) { + buf[i] = toupper(name[i]); + } + buf[len] = 0; #pragma warning(suppress:4996) s = getenv(buf); } - if (s != NULL) { - mi_strlcpy(buf, s, sizeof(buf)); - size_t buf_size = strlen(buf); // TODO: use strnlen? - for (size_t i = 0; i < buf_size; i++) { - buf[i] = toupper(buf[i]); + if (s != NULL && strlen(s) < result_size) { + mi_strlcpy(result, s, result_size); + return true; + } + else { + return false; + } +} +#endif +static void mi_option_init(mi_option_desc_t* desc) { + desc->init = DEFAULTED; + // Read option value from the environment + char buf[64+1]; + mi_strlcpy(buf, "mimalloc_", sizeof(buf)); + mi_strlcat(buf, desc->name, sizeof(buf)); + char s[64+1]; + if (mi_getenv(buf, s, sizeof(s))) { + size_t len = strlen(s); + if (len >= sizeof(buf)) len = sizeof(buf) - 1; + for (size_t i = 0; i < len; i++) { + buf[i] = toupper(s[i]); } + buf[len] = 0; if (buf[0]==0 || strstr("1;TRUE;YES;ON", buf) != NULL) { desc->value = 1; desc->init = INITIALIZED; diff --git a/src/os.c b/src/os.c index d7fb38f9..b7499796 100644 --- a/src/os.c +++ b/src/os.c @@ -10,8 +10,9 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc.h" #include "mimalloc-internal.h" +#include "mimalloc-atomic.h" -#include // memset +#include // strerror #include #if defined(_WIN32) @@ -33,13 +34,6 @@ terms of the MIT license. A copy of the license can be found in the file ----------------------------------------------------------- */ bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats); -uintptr_t _mi_align_up(uintptr_t sz, size_t alignment) { - uintptr_t x = (sz / alignment) * alignment; - if (x < sz) x += alignment; - if (x < sz) return 0; // overflow - return x; -} - static void* mi_align_up_ptr(void* p, size_t alignment) { return (void*)_mi_align_up((uintptr_t)p, alignment); } @@ -205,20 +199,21 @@ static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment } static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, DWORD flags) { - static size_t large_page_try_ok = 0; + static volatile uintptr_t large_page_try_ok = 0; void* p = NULL; if (use_large_os_page(size, try_alignment)) { - if (large_page_try_ok > 0) { + uintptr_t try_ok = mi_atomic_read(&large_page_try_ok); + if (try_ok > 0) { // if a large page allocation fails, it seems the calls to VirtualAlloc get very expensive. // therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times. - large_page_try_ok--; + mi_atomic_compare_exchange(&large_page_try_ok, try_ok - 1, try_ok); } else { // large OS pages must always reserve and commit. p = mi_win_virtual_allocx(addr, size, try_alignment, MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE | flags); // fall back to non-large page allocation on error (`p == NULL`). if (p == NULL) { - large_page_try_ok = 10; // on error, don't try again for the next N allocations + mi_atomic_write(&large_page_try_ok,10); // on error, don't try again for the next N allocations } } } @@ -242,13 +237,30 @@ static void* mi_wasm_heap_grow(size_t size, size_t try_alignment) { return (void*) aligned_base; } #else +static void* mi_unix_mmapx(size_t size, size_t try_alignment, int protect_flags, int flags, int fd) { + void* p = NULL; + #if (MI_INTPTR_SIZE >= 8) && !defined(MAP_ALIGNED) + // on 64-bit systems, use a special area for 4MiB aligned allocations + static volatile intptr_t aligned_base = ((intptr_t)1 << 42); // starting at 4TiB + if (try_alignment <= MI_SEGMENT_SIZE && (size%MI_SEGMENT_SIZE)==0 && (aligned_base%try_alignment)==0) { + intptr_t hint = mi_atomic_add(&aligned_base,size) - size; + p = mmap((void*)hint,size,protect_flags,flags,fd,0); + if (p==MAP_FAILED) p = NULL; // fall back to regular mmap + } + #endif + if (p==NULL) { + p = mmap(NULL,size,protect_flags,flags,fd,0); + } + return p; +} + static void* mi_unix_mmap(size_t size, size_t try_alignment, int protect_flags) { void* p = NULL; #if !defined(MAP_ANONYMOUS) #define MAP_ANONYMOUS MAP_ANON #endif int flags = MAP_PRIVATE | MAP_ANONYMOUS; - int gfd = -1; + int fd = -1; #if defined(MAP_ALIGNED) // BSD if (try_alignment > 0) { size_t n = _mi_bsr(try_alignment); @@ -261,14 +273,12 @@ static void* mi_unix_mmap(size_t size, size_t try_alignment, int protect_flags) protect_flags |= PROT_MAX(PROT_READ | PROT_WRITE); // BSD #endif #if defined(VM_MAKE_TAG) - // tracking anonymous page with a specific ID - // all up to 98 are taken officially but LLVM - // sanitizers had taken 99 - gfd = VM_MAKE_TAG(100); + // darwin: tracking anonymous page with a specific ID all up to 98 are taken officially but LLVM sanitizers had taken 99 + fd = VM_MAKE_TAG(100); #endif if (large_os_page_size > 0 && use_large_os_page(size, try_alignment)) { int lflags = flags; - int fd = -1; + int lfd = fd; #ifdef MAP_ALIGNED_SUPER lflags |= MAP_ALIGNED_SUPER; #endif @@ -279,18 +289,18 @@ static void* mi_unix_mmap(size_t size, size_t try_alignment, int protect_flags) lflags |= MAP_HUGE_2MB; #endif #ifdef VM_FLAGS_SUPERPAGE_SIZE_2MB - fd = VM_FLAGS_SUPERPAGE_SIZE_2MB | gfd; + lfd |= VM_FLAGS_SUPERPAGE_SIZE_2MB; #endif if (lflags != flags) { // try large page allocation // TODO: if always failing due to permissions or no huge pages, try to avoid repeatedly trying? // Should we check this in _mi_os_init? (as on Windows) - p = mmap(NULL, size, protect_flags, lflags, fd, 0); + p = mi_unix_mmapx(size, try_alignment, protect_flags, lflags, lfd); if (p == MAP_FAILED) p = NULL; // fall back to regular mmap if large is exhausted or no permission } } if (p == NULL) { - p = mmap(NULL, size, protect_flags, flags, gfd, 0); + p = mi_unix_mmapx(size, try_alignment, protect_flags, flags, fd); if (p == MAP_FAILED) p = NULL; } return p; @@ -446,7 +456,7 @@ static void* mi_os_page_align_area_conservative(void* addr, size_t size, size_t* return mi_os_page_align_areax(true, addr, size, newsize); } -// Commit/Decommit memory. +// Commit/Decommit memory. // Usuelly commit is aligned liberal, while decommit is aligned conservative. // (but not for the reset version where we want commit to be conservative as well) static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservative, mi_stats_t* stats) { @@ -478,7 +488,7 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ err = mprotect(start, csize, (commit ? (PROT_READ | PROT_WRITE) : PROT_NONE)); #endif if (err != 0) { - _mi_warning_message("commit/decommit error: start: 0x%8p, csize: 0x%8zux, err: %i\n", start, csize, err); + _mi_warning_message("commit/decommit error: start: 0x%p, csize: 0x%x, err: %i\n", start, csize, err); } mi_assert_internal(err == 0); return (err == 0); @@ -510,8 +520,10 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) else _mi_stat_decrease(&stats->reset, csize); if (!reset) return true; // nothing to do on unreset! - #if MI_DEBUG>1 - memset(start, 0, csize); // pretend it is eagerly reset + #if (MI_DEBUG>1) + if (!mi_option_is_enabled(mi_option_secure)) { + memset(start, 0, csize); // pretend it is eagerly reset + } #endif #if defined(_WIN32) @@ -526,7 +538,7 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) void* p = VirtualAlloc(start, csize, MEM_RESET, PAGE_READWRITE); mi_assert_internal(p == start); if (p != start) return false; - } + } #else #if defined(MADV_FREE) static int advice = MADV_FREE; @@ -542,7 +554,7 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) int err = madvise(start, csize, MADV_DONTNEED); #endif if (err != 0) { - _mi_warning_message("madvise reset error: start: 0x%8p, csize: 0x%8zux, errno: %i\n", start, csize, errno); + _mi_warning_message("madvise reset error: start: 0x%p, csize: 0x%x, errno: %i\n", start, csize, errno); } //mi_assert(err == 0); if (err != 0) return false; @@ -591,7 +603,7 @@ static bool mi_os_protectx(void* addr, size_t size, bool protect) { err = mprotect(start, csize, protect ? PROT_NONE : (PROT_READ | PROT_WRITE)); #endif if (err != 0) { - _mi_warning_message("mprotect error: start: 0x%8p, csize: 0x%8zux, err: %i\n", start, csize, err); + _mi_warning_message("mprotect error: start: 0x%p, csize: 0x%x, err: %i\n", start, csize, err); } return (err == 0); } diff --git a/src/page-queue.c b/src/page-queue.c index fd388113..a386f8a1 100644 --- a/src/page-queue.c +++ b/src/page-queue.c @@ -97,7 +97,7 @@ uint8_t _mi_bsr(uintptr_t x) { // Returns MI_BIN_HUGE if the size is too large. // We use `wsize` for the size in "machine word sizes", // i.e. byte size == `wsize*sizeof(void*)`. -inline uint8_t _mi_bin(size_t size) { +extern inline uint8_t _mi_bin(size_t size) { size_t wsize = _mi_wsize_from_size(size); uint8_t bin; if (wsize <= 1) { @@ -120,13 +120,13 @@ inline uint8_t _mi_bin(size_t size) { bin = MI_BIN_HUGE; } else { - #if defined(MI_ALIGN4W) + #if defined(MI_ALIGN4W) if (wsize <= 16) { wsize = (wsize+3)&~3; } // round to 4x word sizes #endif wsize--; // find the highest bit uint8_t b = mi_bsr32((uint32_t)wsize); - // and use the top 3 bits to determine the bin (~16% worst internal fragmentation). + // and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation). // - adjust with 3 because we use do not round the first 8 sizes // which each get an exact bin bin = ((b << 2) + (uint8_t)((wsize >> (b - 2)) & 0x03)) - 3; diff --git a/src/page.c b/src/page.c index 60b3fc09..c644a5b3 100644 --- a/src/page.c +++ b/src/page.c @@ -15,8 +15,6 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc-internal.h" #include "mimalloc-atomic.h" -#include // memset, memcpy - /* ----------------------------------------------------------- Definition of page queues for each block size ----------------------------------------------------------- */ @@ -73,10 +71,11 @@ static bool mi_page_is_valid_init(mi_page_t* page) { mi_assert_internal(page->block_size > 0); mi_assert_internal(page->used <= page->capacity); mi_assert_internal(page->capacity <= page->reserved); - + mi_segment_t* segment = _mi_page_segment(page); uint8_t* start = _mi_page_start(segment,page,NULL); mi_assert_internal(start == _mi_segment_page_start(segment,page,page->block_size,NULL)); + mi_assert_internal(segment->thread_id==0 || segment->thread_id == mi_page_thread_id(page)); //mi_assert_internal(start + page->capacity*page->block_size == page->top); mi_assert_internal(mi_page_list_is_valid(page,page->free)); @@ -95,7 +94,9 @@ static bool mi_page_is_valid_init(mi_page_t* page) { bool _mi_page_is_valid(mi_page_t* page) { mi_assert_internal(mi_page_is_valid_init(page)); + #if MI_SECURE mi_assert_internal(page->cookie != 0); + #endif if (page->heap!=NULL) { mi_segment_t* segment = _mi_page_segment(page); mi_assert_internal(!_mi_process_is_initialized || segment->thread_id == page->heap->thread_id); @@ -121,7 +122,7 @@ void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay ) { else if (mi_unlikely(mi_tf_delayed(tfree) == MI_DELAYED_FREEING)) { mi_atomic_yield(); // delay until outstanding MI_DELAYED_FREEING are done. continue; // and try again - } + } } while((mi_tf_delayed(tfreex) != mi_tf_delayed(tfree)) && // avoid atomic operation if already equal !mi_atomic_compare_exchange((volatile uintptr_t*)&page->thread_free, tfreex, tfree)); @@ -216,7 +217,7 @@ static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size mi_page_t* page = _mi_segment_page_alloc(block_size, &heap->tld->segments, &heap->tld->os); if (page == NULL) return NULL; mi_page_init(heap, page, block_size, &heap->tld->stats); - mi_heap_stat_increase( heap, pages, 1); + _mi_stat_increase( &heap->tld->stats.pages, 1); mi_page_queue_push(heap, pq, page); mi_assert_expensive(_mi_page_is_valid(page)); return page; @@ -260,7 +261,7 @@ void _mi_heap_delayed_free(mi_heap_t* heap) { mi_block_t* next = mi_block_nextx(heap->cookie,block); // use internal free instead of regular one to keep stats etc correct if (!_mi_free_delayed_block(block)) { - // we might already start delayed freeing while another thread has not yet + // we might already start delayed freeing while another thread has not yet // reset the delayed_freeing flag; in that case delay it further by reinserting. mi_block_t* dfree; do { @@ -352,7 +353,7 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) { // account for huge pages here if (page->block_size > MI_LARGE_SIZE_MAX) { - mi_heap_stat_decrease(page->heap, huge, page->block_size); + _mi_stat_decrease(&page->heap->tld->stats.huge, page->block_size); } // remove from the page list @@ -384,8 +385,9 @@ void _mi_page_retire(mi_page_t* page) { // is the only page left with free blocks. It is not clear // how to check this efficiently though... for now we just check // if its neighbours are almost fully used. - if (mi_likely(page->block_size <= MI_SMALL_SIZE_MAX)) { + if (mi_likely(page->block_size <= MI_MEDIUM_SIZE_MAX)) { if (mi_page_mostly_used(page->prev) && mi_page_mostly_used(page->next)) { + _mi_stat_counter_increase(&_mi_stats_main.page_no_retire,1); return; // dont't retire after all } } @@ -404,7 +406,60 @@ void _mi_page_retire(mi_page_t* page) { #define MI_MAX_SLICES (1UL << MI_MAX_SLICE_SHIFT) #define MI_MIN_SLICES (2) -static void mi_page_free_list_extend( mi_heap_t* heap, mi_page_t* page, size_t extend, mi_stats_t* stats) +static void mi_page_free_list_extend_secure(mi_heap_t* heap, mi_page_t* page, size_t extend, mi_stats_t* stats) { + UNUSED(stats); + mi_assert_internal(page->free == NULL); + mi_assert_internal(page->local_free == NULL); + mi_assert_internal(page->capacity + extend <= page->reserved); + void* page_area = _mi_page_start(_mi_page_segment(page), page, NULL); + size_t bsize = page->block_size; + + // initialize a randomized free list + // set up `slice_count` slices to alternate between + size_t shift = MI_MAX_SLICE_SHIFT; + while ((extend >> shift) == 0) { + shift--; + } + size_t slice_count = (size_t)1U << shift; + size_t slice_extend = extend / slice_count; + mi_assert_internal(slice_extend >= 1); + mi_block_t* blocks[MI_MAX_SLICES]; // current start of the slice + size_t counts[MI_MAX_SLICES]; // available objects in the slice + for (size_t i = 0; i < slice_count; i++) { + blocks[i] = mi_page_block_at(page, page_area, page->capacity + i*slice_extend); + counts[i] = slice_extend; + } + counts[slice_count-1] += (extend % slice_count); // final slice holds the modulus too (todo: distribute evenly?) + + // and initialize the free list by randomly threading through them + // set up first element + size_t current = _mi_heap_random(heap) % slice_count; + counts[current]--; + page->free = blocks[current]; + // and iterate through the rest + uintptr_t rnd = heap->random; + for (size_t i = 1; i < extend; i++) { + // call random_shuffle only every INTPTR_SIZE rounds + size_t round = i%MI_INTPTR_SIZE; + if (round == 0) rnd = _mi_random_shuffle(rnd); + // select a random next slice index + size_t next = ((rnd >> 8*round) & (slice_count-1)); + while (counts[next]==0) { // ensure it still has space + next++; + if (next==slice_count) next = 0; + } + // and link the current block to it + counts[next]--; + mi_block_t* block = blocks[current]; + blocks[current] = (mi_block_t*)((uint8_t*)block + bsize); // bump to the following block + mi_block_set_next(page, block, blocks[next]); // and set next; note: we may have `current == next` + current = next; + } + mi_block_set_next(page, blocks[current], NULL); // end of the list + heap->random = _mi_random_shuffle(rnd); +} + +static void mi_page_free_list_extend( mi_page_t* page, size_t extend, mi_stats_t* stats) { UNUSED(stats); mi_assert_internal(page->free == NULL); @@ -413,66 +468,17 @@ static void mi_page_free_list_extend( mi_heap_t* heap, mi_page_t* page, size_t e void* page_area = _mi_page_start(_mi_page_segment(page), page, NULL ); size_t bsize = page->block_size; mi_block_t* start = mi_page_block_at(page, page_area, page->capacity); - if (extend < MI_MIN_SLICES || !mi_option_is_enabled(mi_option_secure)) { - // initialize a sequential free list - mi_block_t* end = mi_page_block_at(page, page_area, page->capacity + extend - 1); - mi_block_t* block = start; - for (size_t i = 0; i < extend; i++) { - mi_block_t* next = (mi_block_t*)((uint8_t*)block + bsize); - mi_block_set_next(page,block,next); - block = next; - } - mi_block_set_next(page, end, NULL); - page->free = start; - } - else { - // initialize a randomized free list - // set up `slice_count` slices to alternate between - size_t shift = MI_MAX_SLICE_SHIFT; - while ((extend >> shift) == 0) { - shift--; - } - size_t slice_count = (size_t)1U << shift; - size_t slice_extend = extend / slice_count; - mi_assert_internal(slice_extend >= 1); - mi_block_t* blocks[MI_MAX_SLICES]; // current start of the slice - size_t counts[MI_MAX_SLICES]; // available objects in the slice - for (size_t i = 0; i < slice_count; i++) { - blocks[i] = mi_page_block_at(page, page_area, page->capacity + i*slice_extend); - counts[i] = slice_extend; - } - counts[slice_count-1] += (extend % slice_count); // final slice holds the modulus too (todo: distribute evenly?) - // and initialize the free list by randomly threading through them - // set up first element - size_t current = _mi_heap_random(heap) % slice_count; - counts[current]--; - page->free = blocks[current]; - // and iterate through the rest - uintptr_t rnd = heap->random; - for (size_t i = 1; i < extend; i++) { - // call random_shuffle only every INTPTR_SIZE rounds - size_t round = i%MI_INTPTR_SIZE; - if (round == 0) rnd = _mi_random_shuffle(rnd); - // select a random next slice index - size_t next = ((rnd >> 8*round) & (slice_count-1)); - while (counts[next]==0) { // ensure it still has space - next++; - if (next==slice_count) next = 0; - } - // and link the current block to it - counts[next]--; - mi_block_t* block = blocks[current]; - blocks[current] = (mi_block_t*)((uint8_t*)block + bsize); // bump to the following block - mi_block_set_next(page, block, blocks[next]); // and set next; note: we may have `current == next` - current = next; - } - mi_block_set_next( page, blocks[current], NULL); // end of the list - heap->random = _mi_random_shuffle(rnd); + // initialize a sequential free list + mi_block_t* last = mi_page_block_at(page, page_area, page->capacity + extend - 1); + mi_block_t* block = start; + while(block <= last) { + mi_block_t* next = (mi_block_t*)((uint8_t*)block + bsize); + mi_block_set_next(page,block,next); + block = next; } - // enable the new free list - page->capacity += (uint16_t)extend; - _mi_stat_increase(&stats->page_committed, extend * page->block_size); + mi_block_set_next(page, last, NULL); + page->free = start; } /* ----------------------------------------------------------- @@ -500,7 +506,7 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_stats_t* st if (page->capacity >= page->reserved) return; size_t page_size; - _mi_page_start(_mi_page_segment(page), page, &page_size); + _mi_page_start(_mi_page_segment(page), page, &page_size); _mi_stat_increase(&stats->pages_extended, 1); // calculate the extend count @@ -518,7 +524,15 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_stats_t* st mi_assert_internal(extend < (1UL<<16)); // and append the extend the free list - mi_page_free_list_extend(heap, page, extend, stats ); + if (extend < MI_MIN_SLICES || !mi_option_is_enabled(mi_option_secure)) { + mi_page_free_list_extend(page, extend, stats ); + } + else { + mi_page_free_list_extend_secure(heap, page, extend, stats); + } + // enable the new free list + page->capacity += (uint16_t)extend; + _mi_stat_increase(&stats->page_committed, extend * page->block_size); mi_assert_expensive(mi_page_is_valid_init(page)); } @@ -535,7 +549,9 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi page->block_size = block_size; mi_assert_internal(page_size / block_size < (1L<<16)); page->reserved = (uint16_t)(page_size / block_size); + #if MI_SECURE page->cookie = _mi_heap_random(heap) | 1; + #endif mi_assert_internal(page->capacity == 0); mi_assert_internal(page->free == NULL); @@ -545,7 +561,9 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_internal(page->next == NULL); mi_assert_internal(page->prev == NULL); mi_assert_internal(page->flags.has_aligned == false); + #if MI_SECURE mi_assert_internal(page->cookie != 0); + #endif mi_assert_expensive(mi_page_is_valid_init(page)); // initialize an initial free list @@ -684,8 +702,8 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) { if (page != NULL) { mi_assert_internal(mi_page_immediate_available(page)); mi_assert_internal(page->block_size == block_size); - mi_heap_stat_increase( heap, huge, block_size); - } + _mi_stat_increase( &heap->tld->stats.huge, block_size); + } return page; } @@ -704,10 +722,10 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept // call potential deferred free routines _mi_deferred_free(heap, false); - + // free delayed frees from other threads _mi_heap_delayed_free(heap); - + // huge allocation? mi_page_t* page; if (mi_unlikely(size > MI_LARGE_SIZE_MAX)) { @@ -729,11 +747,4 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept // and try again, this time succeeding! (i.e. this should never recurse) return _mi_page_malloc(heap, page, size); - /* - if (page->used == page->reserved) { - // needed for huge pages to free reliably from other threads. - mi_page_to_full(page,mi_page_queue_of(page)); - } - return p; - */ } diff --git a/src/segment.c b/src/segment.c index 7f7bedd7..736345bf 100644 --- a/src/segment.c +++ b/src/segment.c @@ -226,6 +226,7 @@ static void mi_segments_track_size(long segment_size, mi_segments_tld_t* tld) { static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_segments_tld_t* tld) { + segment->thread_id = 0; mi_segments_track_size(-((long)segment_size),tld); if (mi_option_is_enabled(mi_option_secure)) { _mi_mem_unprotect(segment, segment->segment_size); // ensure no more guard pages are set @@ -248,17 +249,19 @@ static mi_segment_t* mi_segment_cache_pop(size_t segment_size, mi_segments_tld_t tld->cache = segment->next; segment->next = NULL; mi_assert_internal(segment->segment_size == MI_SEGMENT_SIZE); + _mi_stat_decrease(&tld->stats->segments_cache, 1); return segment; } static bool mi_segment_cache_full(mi_segments_tld_t* tld) { - if (tld->cache_count < MI_SEGMENT_CACHE_MAX && - tld->cache_count < (1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION))) { // always allow 1 element cache + if (tld->cache_count < MI_SEGMENT_CACHE_MAX + && tld->cache_count < (1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION)) + ) { // always allow 1 element cache return false; } // take the opportunity to reduce the segment cache if it is too large (now) // TODO: this never happens as we check against peak usage, should we use current usage instead? - while (tld->cache_count > (1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION))) { + while (tld->cache_count > MI_SEGMENT_CACHE_MAX ) { //(1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION))) { mi_segment_t* segment = mi_segment_cache_pop(0,tld); mi_assert_internal(segment != NULL); if (segment != NULL) mi_segment_os_free(segment, segment->segment_size, tld); @@ -269,7 +272,9 @@ static bool mi_segment_cache_full(mi_segments_tld_t* tld) { static bool mi_segment_cache_push(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(!mi_segment_is_in_free_queue(segment, tld)); mi_assert_internal(segment->next == NULL); - if (segment->segment_size != MI_SEGMENT_SIZE || mi_segment_cache_full(tld)) return false; + if (segment->segment_size != MI_SEGMENT_SIZE || mi_segment_cache_full(tld)) { + return false; + } mi_assert_internal(segment->segment_size == MI_SEGMENT_SIZE); if (mi_option_is_enabled(mi_option_cache_reset)) { _mi_mem_reset((uint8_t*)segment + segment->segment_info_size, segment->segment_size - segment->segment_info_size, tld->stats); @@ -277,6 +282,7 @@ static bool mi_segment_cache_push(mi_segment_t* segment, mi_segments_tld_t* tld) segment->next = tld->cache; tld->cache = segment; tld->cache_count++; + _mi_stat_increase(&tld->stats->segments_cache,1); return true; } @@ -318,7 +324,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, size_t page_size = (page_kind == MI_PAGE_HUGE ? segment_size : (size_t)1 << page_shift); // Try to get it from our thread local cache first - bool commit = mi_option_is_enabled(mi_option_eager_commit) || (page_kind > MI_PAGE_MEDIUM); + bool commit = mi_option_is_enabled(mi_option_eager_commit) || (page_kind > MI_PAGE_MEDIUM); bool protection_still_good = false; mi_segment_t* segment = mi_segment_cache_pop(segment_size, tld); if (segment != NULL) { @@ -407,8 +413,7 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t mi_assert_expensive(!mi_segment_queue_contains(&tld->medium_free, segment)); mi_assert(segment->next == NULL); mi_assert(segment->prev == NULL); - _mi_stat_decrease(&tld->stats->page_committed, segment->segment_info_size); - segment->thread_id = 0; + _mi_stat_decrease(&tld->stats->page_committed, segment->segment_info_size); // update reset memory statistics /* @@ -613,6 +618,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen } else { // otherwise reclaim it + mi_page_init_flags(page,segment->thread_id); _mi_page_reclaim(heap,page); } } @@ -643,6 +649,7 @@ static mi_page_t* mi_segment_page_alloc_in(mi_segment_t* segment, mi_segments_tl mi_assert_internal(mi_segment_has_free(segment)); mi_page_t* page = mi_segment_find_free(segment, tld->stats); page->segment_in_use = true; + mi_page_init_flags(page,segment->thread_id); segment->used++; mi_assert_internal(segment->used <= segment->capacity); if (segment->used == segment->capacity) { @@ -682,6 +689,7 @@ static mi_page_t* mi_segment_large_page_alloc(mi_segments_tld_t* tld, mi_os_tld_ segment->used = 1; mi_page_t* page = &segment->pages[0]; page->segment_in_use = true; + mi_page_init_flags(page,segment->thread_id); return page; } @@ -693,22 +701,27 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld segment->used = 1; mi_page_t* page = &segment->pages[0]; page->segment_in_use = true; + mi_page_init_flags(page,segment->thread_id); return page; } /* ----------------------------------------------------------- Page allocation and free ----------------------------------------------------------- */ +static bool mi_is_good_fit(size_t bsize, size_t size) { + // good fit if no more than 25% wasted + return (bsize > 0 && size > 0 && bsize < size && (size - (size % bsize)) < (size/4)); +} mi_page_t* _mi_segment_page_alloc(size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { mi_page_t* page; - if (block_size <= (MI_SMALL_PAGE_SIZE/16)*3) { + if (block_size <= MI_SMALL_SIZE_MAX || mi_is_good_fit(block_size,MI_SMALL_PAGE_SIZE)) { page = mi_segment_small_page_alloc(tld,os_tld); } - else if (block_size <= (MI_MEDIUM_PAGE_SIZE/16)*3) { + else if (block_size <= MI_MEDIUM_SIZE_MAX || mi_is_good_fit(block_size, MI_MEDIUM_PAGE_SIZE)) { page = mi_segment_medium_page_alloc(tld, os_tld); } - else if (block_size < (MI_LARGE_SIZE_MAX - sizeof(mi_segment_t))) { + else if (block_size < MI_LARGE_SIZE_MAX || mi_is_good_fit(block_size, MI_LARGE_PAGE_SIZE - sizeof(mi_segment_t))) { page = mi_segment_large_page_alloc(tld, os_tld); } else { diff --git a/src/stats.c b/src/stats.c index 2b15bf9e..8725e48c 100644 --- a/src/stats.c +++ b/src/stats.c @@ -99,14 +99,14 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) { mi_stat_add(&stats->pages_abandoned, &src->pages_abandoned, 1); mi_stat_add(&stats->segments_abandoned, &src->segments_abandoned, 1); mi_stat_add(&stats->mmap_calls, &src->mmap_calls, 1); - mi_stat_add(&stats->mmap_ensure_aligned, &src->mmap_ensure_aligned, 1); - mi_stat_add(&stats->mmap_right_align, &src->mmap_right_align, 1); mi_stat_add(&stats->commit_calls, &src->commit_calls, 1); mi_stat_add(&stats->threads, &src->threads, 1); mi_stat_add(&stats->pages_extended, &src->pages_extended, 1); mi_stat_add(&stats->malloc, &src->malloc, 1); + mi_stat_add(&stats->segments_cache, &src->segments_cache, 1); mi_stat_add(&stats->huge, &src->huge, 1); + mi_stat_counter_add(&stats->page_no_retire, &src->page_no_retire, 1); mi_stat_counter_add(&stats->searches, &src->searches, 1); #if MI_STAT>1 for (size_t i = 0; i <= MI_BIN_HUGE; i++) { @@ -172,10 +172,15 @@ static void mi_stat_print(const mi_stat_count_t* stat, const char* msg, int64_t } static void mi_stat_counter_print(const mi_stat_counter_t* stat, const char* msg, FILE* out ) { - double avg = (stat->count == 0 ? 0.0 : (double)stat->total / (double)stat->count); - _mi_fprintf(out,"%10s: %7.1f avg\n", msg, avg); + _mi_fprintf(out, "%10s:", msg); + mi_print_amount(stat->total, -1, out); + _mi_fprintf(out, "\n"); } +static void mi_stat_counter_print_avg(const mi_stat_counter_t* stat, const char* msg, FILE* out) { + double avg = (stat->count == 0 ? 0.0 : (double)stat->total / (double)stat->count); + _mi_fprintf(out, "%10s: %7.1f avg\n", msg, avg); +} static void mi_print_header( FILE* out ) { @@ -229,15 +234,15 @@ static void _mi_stats_print(mi_stats_t* stats, double secs, FILE* out) mi_attr_n mi_stat_print(&stats->page_committed, "touched", 1, out); mi_stat_print(&stats->segments, "segments", -1, out); mi_stat_print(&stats->segments_abandoned, "-abandoned", -1, out); + mi_stat_print(&stats->segments_cache, "-cached", -1, out); mi_stat_print(&stats->pages, "pages", -1, out); mi_stat_print(&stats->pages_abandoned, "-abandoned", -1, out); mi_stat_print(&stats->pages_extended, "-extended", 0, out); + mi_stat_counter_print(&stats->page_no_retire, "-noretire", out); mi_stat_print(&stats->mmap_calls, "mmaps", 0, out); - mi_stat_print(&stats->mmap_right_align, "mmap fast", 0, out); - mi_stat_print(&stats->mmap_ensure_aligned, "mmap slow", 0, out); mi_stat_print(&stats->commit_calls, "commits", 0, out); mi_stat_print(&stats->threads, "threads", 0, out); - mi_stat_counter_print(&stats->searches, "searches", out); + mi_stat_counter_print_avg(&stats->searches, "searches", out); if (secs >= 0.0) _mi_fprintf(out, "%10s: %9.3f s\n", "elapsed", secs); diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 42d4a2f4..8bf36521 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -14,24 +14,33 @@ endif() # Import mimalloc (if installed) find_package(mimalloc 1.0 REQUIRED NO_SYSTEM_ENVIRONMENT_PATH) +message(STATUS "Found mimalloc installed at: ${MIMALLOC_TARGET_DIR}") -message(STATUS "${MIMALLOC_INCLUDE_DIR}") - -# Tests +# overriding with a dynamic library add_executable(dynamic-override main-override.c) target_link_libraries(dynamic-override PUBLIC mimalloc) add_executable(dynamic-override-cxx main-override.cpp) target_link_libraries(dynamic-override-cxx PUBLIC mimalloc) -# with a static library + +# overriding with a static object file works reliable as the symbols in the +# object file have priority over those in library files +add_executable(static-override-obj main-override.c ${MIMALLOC_TARGET_DIR}/mimalloc.o) +target_include_directories(static-override-obj PUBLIC ${MIMALLOC_TARGET_DIR}/include) +target_link_libraries(static-override-obj PUBLIC pthread) + + +# overriding with a static library works too if using the `mimalloc-override.h` +# header to redefine malloc/free. (the library already overrides new/delete) +add_executable(static-override-static main-override-static.c) +target_link_libraries(static-override-static PUBLIC mimalloc-static) + + +# overriding with a static library: this may not work if the library is linked too late +# on the command line after the C runtime library; but we cannot control that well in CMake add_executable(static-override main-override.c) target_link_libraries(static-override PUBLIC mimalloc-static) add_executable(static-override-cxx main-override.cpp) target_link_libraries(static-override-cxx PUBLIC mimalloc-static) - -# and with a static object file -add_executable(static-override-obj main-override.c ${MIMALLOC_TARGET_DIR}/mimalloc.o) -target_include_directories(static-override-obj PUBLIC ${MIMALLOC_TARGET_DIR}/include) -target_link_libraries(static-override-obj PUBLIC pthread) diff --git a/test/main-override-static.c b/test/main-override-static.c new file mode 100644 index 00000000..94891cc3 --- /dev/null +++ b/test/main-override-static.c @@ -0,0 +1,32 @@ +#include +#include +#include +#include + +#include +#include // redefines malloc etc. + + +int main() { + mi_version(); + void* p1 = malloc(78); + void* p2 = malloc(24); + free(p1); + p1 = malloc(8); + //char* s = strdup("hello\n"); + free(p2); + p2 = malloc(16); + p1 = realloc(p1, 32); + free(p1); + free(p2); + //free(s); + //mi_collect(true); + + /* now test if override worked by allocating/freeing across the api's*/ + //p1 = mi_malloc(32); + //free(p1); + //p2 = malloc(32); + //mi_free(p2); + mi_stats_print(NULL); + return 0; +} diff --git a/test/main-override.c b/test/main-override.c index 836ea58d..1bec1179 100644 --- a/test/main-override.c +++ b/test/main-override.c @@ -5,28 +5,26 @@ #include - int main() { - mi_stats_reset(); + mi_version(); // ensure mimalloc library is linked void* p1 = malloc(78); void* p2 = malloc(24); free(p1); p1 = malloc(8); - char* s = strdup("hello\n"); + //char* s = strdup("hello\n"); free(p2); p2 = malloc(16); p1 = realloc(p1, 32); free(p1); free(p2); - free(s); - mi_collect(true); + //free(s); + //mi_collect(true); /* now test if override worked by allocating/freeing across the api's*/ - p1 = mi_malloc(32); - free(p1); - p2 = malloc(32); - mi_free(p2); - + //p1 = mi_malloc(32); + //free(p1); + //p2 = malloc(32); + //mi_free(p2); mi_stats_print(NULL); return 0; } diff --git a/test/main-override.cpp b/test/main-override.cpp index 3f2bc960..58d06c6a 100644 --- a/test/main-override.cpp +++ b/test/main-override.cpp @@ -4,7 +4,6 @@ #include #include - #include static void* p = malloc(8); @@ -22,29 +21,25 @@ public: ~Test() { } }; + int main() { - mi_stats_reset(); + mi_stats_reset(); // ignore earlier allocations atexit(free_p); void* p1 = malloc(78); - void* p2 = malloc(24); + void* p2 = mi_malloc_aligned(16,24); free(p1); p1 = malloc(8); char* s = mi_strdup("hello\n"); - free(p2); + mi_free(p2); p2 = malloc(16); p1 = realloc(p1, 32); free(p1); free(p2); - free(s); + mi_free(s); Test* t = new Test(42); delete t; t = new (std::nothrow) Test(42); - delete t; - int err = mi_posix_memalign(&p1,32,60); - if (!err) free(p1); - free(p); - mi_collect(true); - mi_stats_print(NULL); // MIMALLOC_VERBOSE env is set to 2 + delete t; return 0; } diff --git a/test/test-stress.c b/test/test-stress.c index 4e4d9c0d..511679ac 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -1,42 +1,71 @@ /* ---------------------------------------------------------------------------- Copyright (c) 2018,2019 Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the -terms of the MIT license. A copy of the license can be found in the file -"LICENSE" at the root of this distribution. +terms of the MIT license. -----------------------------------------------------------------------------*/ /* This is a stress test for the allocator, using multiple threads and transferring objects between threads. This is not a typical workload - but uses a random size distribution. Do not use this test as a benchmark! - Note: pthreads uses mimalloc to allocate stacks and thus not all - memory is freed at the end. (usually the 320 byte chunks). + but uses a random linear size distribution. Do not use this test as a benchmark! */ #include #include +#include +#include #include -#include "mimalloc.h" -#include "mimalloc-internal.h" -#include "mimalloc-atomic.h" +#include + +// argument defaults +static int THREADS = 32; // more repeatable if THREADS <= #processors +static int N = 10; // scaling factor + +// static int THREADS = 8; // more repeatable if THREADS <= #processors +// static int N = 100; // scaling factor -#define N (10) // scaling factor -#define THREADS (32) #define TRANSFERS (1000) static volatile void* transfer[TRANSFERS]; -#if (MI_INTPTR_SIZE==8) +#if (UINTPTR_MAX != UINT32_MAX) const uintptr_t cookie = 0xbf58476d1ce4e5b9UL; #else const uintptr_t cookie = 0x1ce4e5b9UL; #endif +static void* atomic_exchange_ptr(volatile void** p, void* newval); -static void* alloc_items(size_t items) { - if ((rand()%100) == 0) items *= 100; // 1% huge objects; +typedef uintptr_t* random_t; + +static uintptr_t pick(random_t r) { + uintptr_t x = *r; + #if (UINTPTR_MAX > UINT32_MAX) + // by Sebastiano Vigna, see: + x ^= x >> 30; + x *= 0xbf58476d1ce4e5b9UL; + x ^= x >> 27; + x *= 0x94d049bb133111ebUL; + x ^= x >> 31; + #else + // by Chris Wellons, see: + x ^= x >> 16; + x *= 0x7feb352dUL; + x ^= x >> 15; + x *= 0x846ca68bUL; + x ^= x >> 16; + #endif + *r = x; + return x; +} + +static bool chance(size_t perc, random_t r) { + return (pick(r) % 100 <= perc); +} + +static void* alloc_items(size_t items, random_t r) { + if (chance(1, r)) items *= 100; // 1% huge objects; if (items==40) items++; // pthreads uses that size for stack increases - uintptr_t* p = mi_mallocn_tp(uintptr_t,items); - if(p == NULL) return NULL; + uintptr_t* p = (uintptr_t*)mi_malloc(items*sizeof(uintptr_t)); for (uintptr_t i = 0; i < items; i++) p[i] = (items - i) ^ cookie; return p; } @@ -47,7 +76,7 @@ static void free_items(void* p) { uintptr_t items = (q[0] ^ cookie); for (uintptr_t i = 0; i < items; i++) { if ((q[i]^cookie) != items - i) { - fprintf(stderr,"memory corruption at block %p at %zu\n", p, i); + fprintf(stderr, "memory corruption at block %p at %zu\n", p, i); abort(); } } @@ -57,43 +86,45 @@ static void free_items(void* p) { static void stress(intptr_t tid) { + //bench_start_thread(); + uintptr_t r = tid ^ 42; const size_t max_item = 128; // in words const size_t max_item_retained = 10*max_item; - size_t allocs = 80*N*(tid%8 + 1); // some threads do more + size_t allocs = 25*N*(tid%8 + 1); // some threads do more size_t retain = allocs/2; void** data = NULL; size_t data_size = 0; size_t data_top = 0; - void** retained = mi_mallocn_tp(void*,retain); + void** retained = (void**)mi_malloc(retain*sizeof(void*)); size_t retain_top = 0; while (allocs>0 || retain>0) { - if (retain == 0 || ((rand()%4 == 0) && allocs > 0)) { - // 75% alloc + if (retain == 0 || (chance(50, &r) && allocs > 0)) { + // 50%+ alloc allocs--; if (data_top >= data_size) { data_size += 100000; - data = mi_reallocn_tp(data, void*, data_size); + data = (void**)mi_realloc(data, data_size*sizeof(void*)); } - data[data_top++] = alloc_items((rand() % max_item) + 1); + data[data_top++] = alloc_items((pick(&r) % max_item) + 1, &r); } else { // 25% retain - retained[retain_top++] = alloc_items( 10*((rand() % max_item_retained) + 1) ); + retained[retain_top++] = alloc_items(10*((pick(&r) % max_item_retained) + 1), &r); retain--; } - if ((rand()%3)!=0 && data_top > 0) { + if (chance(66, &r) && data_top > 0) { // 66% free previous alloc - size_t idx = rand() % data_top; + size_t idx = pick(&r) % data_top; free_items(data[idx]); - data[idx]=NULL; + data[idx] = NULL; } - if ((tid%2)==0 && (rand()%4)==0 && data_top > 0) { - // 25% transfer-swap of half the threads - size_t data_idx = rand() % data_top; - size_t transfer_idx = rand() % TRANSFERS; + if (chance(25, &r) && data_top > 0) { + // 25% transfer-swap + size_t data_idx = pick(&r) % data_top; + size_t transfer_idx = pick(&r) % TRANSFERS; void* p = data[data_idx]; - void* q = mi_atomic_exchange_ptr(&transfer[transfer_idx],p); + void* q = atomic_exchange_ptr(&transfer[transfer_idx], p); data[data_idx] = q; } } @@ -106,20 +137,33 @@ static void stress(intptr_t tid) { } mi_free(retained); mi_free(data); + //bench_end_thread(); } -static void run_os_threads(); +static void run_os_threads(size_t nthreads); -int main() { - srand(42); - memset((void*)transfer,0,TRANSFERS*sizeof(void*)); - run_os_threads(); +int main(int argc, char** argv) { + if (argc>=2) { + char* end; + long n = strtol(argv[1], &end, 10); + if (n > 0) THREADS = n; + } + if (argc>=3) { + char* end; + long n = (strtol(argv[2], &end, 10)); + if (n > 0) N = n; + } + printf("start with %i threads with a %i%% load-per-thread\n", THREADS, N); + //bench_start_program(); + memset((void*)transfer, 0, TRANSFERS*sizeof(void*)); + run_os_threads(THREADS); for (int i = 0; i < TRANSFERS; i++) { free_items((void*)transfer[i]); } - mi_collect(false); // ensures abandoned segments are reclaimed - mi_collect(true); // frees everything + mi_collect(false); + mi_collect(true); mi_stats_print(NULL); + //bench_end_program(); return 0; } @@ -133,36 +177,48 @@ static DWORD WINAPI thread_entry(LPVOID param) { return 0; } -static void run_os_threads() { - DWORD tids[THREADS]; - HANDLE thandles[THREADS]; - for(intptr_t i = 0; i < THREADS; i++) { - thandles[i] = CreateThread(0,4096,&thread_entry,(void*)(i),0,&tids[i]); +static void run_os_threads(size_t nthreads) { + DWORD* tids = (DWORD*)malloc(nthreads * sizeof(DWORD)); + HANDLE* thandles = (HANDLE*)malloc(nthreads * sizeof(HANDLE)); + for (uintptr_t i = 0; i < nthreads; i++) { + thandles[i] = CreateThread(0, 4096, &thread_entry, (void*)(i), 0, &tids[i]); } - for (int i = 0; i < THREADS; i++) { + for (size_t i = 0; i < nthreads; i++) { WaitForSingleObject(thandles[i], INFINITE); } } +static void* atomic_exchange_ptr(volatile void** p, void* newval) { + #if (INTPTR_MAX == UINT32_MAX) + return (void*)InterlockedExchange((volatile LONG*)p, (LONG)newval); + #else + return (void*)InterlockedExchange64((volatile LONG64*)p, (LONG64)newval); + #endif +} #else #include +#include -static void* thread_entry( void* param ) { +static void* thread_entry(void* param) { stress((uintptr_t)param); return NULL; } -static void run_os_threads() { - pthread_t threads[THREADS]; - memset(threads,0,sizeof(pthread_t)*THREADS); - //pthread_setconcurrency(THREADS); - for(uintptr_t i = 0; i < THREADS; i++) { +static void run_os_threads(size_t nthreads) { + pthread_t* threads = (pthread_t*)mi_malloc(nthreads*sizeof(pthread_t)); + memset(threads, 0, sizeof(pthread_t)*nthreads); + //pthread_setconcurrency(nthreads); + for (uintptr_t i = 0; i < nthreads; i++) { pthread_create(&threads[i], NULL, &thread_entry, (void*)i); } - for (size_t i = 0; i < THREADS; i++) { + for (size_t i = 0; i < nthreads; i++) { pthread_join(threads[i], NULL); } } +static void* atomic_exchange_ptr(volatile void** p, void* newval) { + return atomic_exchange_explicit((volatile _Atomic(void*)*)p, newval, memory_order_acquire); +} + #endif