diff --git a/CMakeLists.txt b/CMakeLists.txt index c184a0b3..57b49584 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,6 +19,7 @@ option(MI_SEE_ASM "Generate assembly files" OFF) option(MI_OSX_INTERPOSE "Use interpose to override standard malloc on macOS" ON) option(MI_OSX_ZONE "Use malloc zone to override standard malloc on macOS" ON) option(MI_WIN_REDIRECT "Use redirection module ('mimalloc-redirect') on Windows if compiling mimalloc as a DLL" ON) +option(MI_WIN_USE_FIXED_TLS "Use a fixed TLS slot on Windows to avoid extra tests in the malloc fast path" OFF) option(MI_LOCAL_DYNAMIC_TLS "Use local-dynamic-tls, a slightly slower but dlopen-compatible thread local storage mechanism (Unix)" OFF) option(MI_LIBC_MUSL "Enable this when linking with musl libc" OFF) @@ -40,7 +41,7 @@ option(MI_NO_THP "Disable transparent huge pages support on Linux/And option(MI_EXTRA_CPPDEFS "Extra pre-processor definitions (use as `-DMI_EXTRA_CPPDEFS=\"opt1=val1;opt2=val2\"`)" "") # deprecated options -option(MI_WIN_USE_FLS "Use Fiber local storage on Windows to detect thread termination" OFF) +option(MI_WIN_USE_FLS "Use Fiber local storage on Windows to detect thread termination (deprecated)" OFF) option(MI_CHECK_FULL "Use full internal invariant checking in DEBUG mode (deprecated, use MI_DEBUG_FULL instead)" OFF) option(MI_USE_LIBATOMIC "Explicitly link with -latomic (on older systems) (deprecated and detected automatically)" OFF) @@ -327,10 +328,15 @@ if(MI_LIBC_MUSL) endif() if(MI_WIN_USE_FLS) - message(STATUS "Use the Fiber API to detect thread termination (MI_WIN_USE_FLS=ON)") + message(STATUS "Use the Fiber API to detect thread termination (deprecated) (MI_WIN_USE_FLS=ON)") list(APPEND mi_defines MI_WIN_USE_FLS=1) endif() +if(MI_WIN_USE_FIXED_TLS) + message(STATUS "Use fixed TLS slot on Windows to avoid extra tests in the malloc fast path (MI_WIN_USE_FIXED_TLS=ON)") + list(APPEND mi_defines MI_WIN_USE_FIXED_TLS=1) +endif() + # Determine architecture set(MI_OPT_ARCH_FLAGS "") set(MI_ARCH "unknown") @@ -424,7 +430,7 @@ if (MSVC AND MSVC_VERSION GREATER_EQUAL 1914) # vs2017+ endif() if(MINGW) - add_definitions(-D_WIN32_WINNT=0x600) + add_definitions(-D_WIN32_WINNT=0x601) # issue #976 endif() if(MI_OPT_ARCH_FLAGS) diff --git a/ide/vs2022/mimalloc.vcxproj b/ide/vs2022/mimalloc-lib.vcxproj similarity index 99% rename from ide/vs2022/mimalloc.vcxproj rename to ide/vs2022/mimalloc-lib.vcxproj index 87e866bb..c82dbec7 100644 --- a/ide/vs2022/mimalloc.vcxproj +++ b/ide/vs2022/mimalloc-lib.vcxproj @@ -37,7 +37,7 @@ 15.0 {ABB5EAE7-B3E6-432E-B636-333449892EA6} - mimalloc + mimalloc-lib 10.0 mimalloc-lib diff --git a/ide/vs2022/mimalloc-override.vcxproj b/ide/vs2022/mimalloc-override-dll.vcxproj similarity index 99% rename from ide/vs2022/mimalloc-override.vcxproj rename to ide/vs2022/mimalloc-override-dll.vcxproj index 609fd3ba..fbae9aeb 100644 --- a/ide/vs2022/mimalloc-override.vcxproj +++ b/ide/vs2022/mimalloc-override-dll.vcxproj @@ -37,7 +37,7 @@ 15.0 {ABB5EAE7-B3E6-432E-B636-333449892EA7} - mimalloc-override + mimalloc-override-dll 10.0 mimalloc-override-dll @@ -404,11 +404,10 @@ - - + @@ -482,9 +481,6 @@ - - - diff --git a/ide/vs2022/mimalloc-override-test.vcxproj b/ide/vs2022/mimalloc-override-test.vcxproj index 0e87cf36..427a75ae 100644 --- a/ide/vs2022/mimalloc-override-test.vcxproj +++ b/ide/vs2022/mimalloc-override-test.vcxproj @@ -344,7 +344,7 @@ - + {abb5eae7-b3e6-432e-b636-333449892ea7} diff --git a/ide/vs2022/mimalloc-override.vcxproj.filters b/ide/vs2022/mimalloc-override.vcxproj.filters deleted file mode 100644 index fb48e98f..00000000 --- a/ide/vs2022/mimalloc-override.vcxproj.filters +++ /dev/null @@ -1,113 +0,0 @@ - - - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - - - Headers - - - Headers - - - Headers - - - Headers - - - Headers - - - Headers - - - Headers - - - Headers - - - Headers - - - Headers - - - Headers - - - - - {9ef1cf48-7bb2-4af1-8cc1-603486e08a7a} - - - {cfcf1674-81e3-487a-a8dd-5f956ae4007d} - - - - - Headers - - - \ No newline at end of file diff --git a/ide/vs2022/mimalloc-test-api.vcxproj b/ide/vs2022/mimalloc-test-api.vcxproj index 27247569..b7f97ad2 100644 --- a/ide/vs2022/mimalloc-test-api.vcxproj +++ b/ide/vs2022/mimalloc-test-api.vcxproj @@ -282,7 +282,7 @@ - + {abb5eae7-b3e6-432e-b636-333449892ea6} diff --git a/ide/vs2022/mimalloc-test-stress.vcxproj b/ide/vs2022/mimalloc-test-stress.vcxproj index fd88cd8e..cb761f94 100644 --- a/ide/vs2022/mimalloc-test-stress.vcxproj +++ b/ide/vs2022/mimalloc-test-stress.vcxproj @@ -279,7 +279,7 @@ - + {abb5eae7-b3e6-432e-b636-333449892ea6} diff --git a/ide/vs2022/mimalloc-test.vcxproj b/ide/vs2022/mimalloc-test.vcxproj index 6e4576fd..83202dbe 100644 --- a/ide/vs2022/mimalloc-test.vcxproj +++ b/ide/vs2022/mimalloc-test.vcxproj @@ -276,7 +276,7 @@ - + {abb5eae7-b3e6-432e-b636-333449892ea6} diff --git a/ide/vs2022/mimalloc.sln b/ide/vs2022/mimalloc.sln index 5a55c98b..040af3ac 100644 --- a/ide/vs2022/mimalloc.sln +++ b/ide/vs2022/mimalloc.sln @@ -3,11 +3,11 @@ Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio Version 17 VisualStudioVersion = 17.12.35527.113 MinimumVisualStudioVersion = 10.0.40219.1 -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc", "mimalloc.vcxproj", "{ABB5EAE7-B3E6-432E-B636-333449892EA6}" +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-lib", "mimalloc-lib.vcxproj", "{ABB5EAE7-B3E6-432E-B636-333449892EA6}" EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-test", "mimalloc-test.vcxproj", "{FEF7858F-750E-4C21-A04D-22707CC66878}" EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-override", "mimalloc-override.vcxproj", "{ABB5EAE7-B3E6-432E-B636-333449892EA7}" +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-override-dll", "mimalloc-override-dll.vcxproj", "{ABB5EAE7-B3E6-432E-B636-333449892EA7}" EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-override-test", "mimalloc-override-test.vcxproj", "{FEF7868F-750E-4C21-A04D-22707CC66879}" EndProject diff --git a/ide/vs2022/mimalloc.vcxproj.filters b/ide/vs2022/mimalloc.vcxproj.filters deleted file mode 100644 index 06b0364f..00000000 --- a/ide/vs2022/mimalloc.vcxproj.filters +++ /dev/null @@ -1,105 +0,0 @@ - - - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - Sources - - - - - Headers - - - Headers - - - Headers - - - Headers - - - Headers - - - Headers - - - Headers - - - Headers - - - Headers - - - Headers - - - - - {dd2da697-c33c-4348-bf80-a802ebaa06fb} - - - {8027057b-4b93-4321-b93c-d51dd0c8077b} - - - \ No newline at end of file diff --git a/include/mimalloc.h b/include/mimalloc.h index dacc647e..7383ce8a 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -326,7 +326,7 @@ mi_decl_export void mi_heap_guarded_set_size_bound(mi_heap_t* heap, size_t min, //mi_decl_export void mi_os_decommit(void* p, size_t size); mi_decl_export bool mi_arena_unload(mi_arena_id_t arena_id, void** base, size_t* accessed_size, size_t* size); -mi_decl_export bool mi_arena_reload(void* start, size_t size, mi_arena_id_t* arena_id); +mi_decl_export bool mi_arena_reload(void* start, size_t size, mi_arena_id_t* arena_id); mi_decl_export bool mi_heap_reload(mi_heap_t* heap, mi_arena_id_t arena); mi_decl_export void mi_heap_unload(mi_heap_t* heap); diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 49472bdb..ae8839c0 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -425,7 +425,7 @@ static inline bool mi_heap_is_backing(const mi_heap_t* heap) { static inline bool mi_heap_is_initialized(mi_heap_t* heap) { mi_assert_internal(heap != NULL); - return (heap != &_mi_heap_empty); + return (heap != NULL && heap != &_mi_heap_empty); } static inline mi_page_t* _mi_heap_get_free_small_page(mi_heap_t* heap, size_t size) { diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h index 2d681062..12889b8b 100644 --- a/include/mimalloc/prim.h +++ b/include/mimalloc/prim.h @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2018-2023, Microsoft Research, Daan Leijen +Copyright (c) 2018-2024, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. @@ -130,6 +130,7 @@ bool _mi_prim_thread_is_in_threadpool(void); // for each thread (unequal to zero). //------------------------------------------------------------------- + // On some libc + platform combinations we can directly access a thread-local storage (TLS) slot. // The TLS layout depends on both the OS and libc implementation so we use specific tests for each main platform. // If you test on another platform and it works please send a PR :-) @@ -207,22 +208,40 @@ static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexce #endif } -#elif 0 && _MSC_VER && _WIN32 -// On Windows, using a fixed TLS slot has better codegen than a thread-local -// but it might clash with an application trying to use the same slot. (so we disable this by default) -#include +#elif _WIN32 && MI_WIN_USE_FIXED_TLS && !defined(MI_WIN_USE_FLS) -#define MI_HAS_TLS_SLOT -#define MI_TLS_SLOT 63 // last available slot +// On windows we can store the thread-local heap at a fixed TLS slot to avoid +// thread-local initialization checks in the fast path. This use a fixed location +// in the TCB though (last user-reserved slot by default) which may clash with other applications. + +#define MI_HAS_TLS_SLOT 2 // 2 = we can reliable initialize the slot (saving a test on each malloc) + +#if MI_WIN_USE_FIXED_TLS > 1 +#define MI_TLS_SLOT (MI_WIN_USE_FIXED_TLS) +#elif MI_SIZE_SIZE == 4 +#define MI_TLS_SLOT (0x710) // Last user-reserved slot +// #define MI_TLS_SLOT (0xF0C) // Last TlsSlot (might clash with other app reserved slot) +#else +#define MI_TLS_SLOT (0x888) // Last user-reserved slot +// #define MI_TLS_SLOT (0x1678) // Last TlsSlot (might clash with other app reserved slot) +#endif static inline void* mi_prim_tls_slot(size_t slot) mi_attr_noexcept { - return NtCurrentTeb()->TlsSlots[slot]; + #if (_M_X64 || _M_AMD64) && !defined(_M_ARM64EC) + return (void*)__readgsqword((unsigned long)slot); // direct load at offset from gs + #elif _M_IX86 && !defined(_M_ARM64EC) + return (void*)__readfsdword((unsigned long)slot); // direct load at offset from fs + #else + return ((void**)NtCurrentTeb())[slot / sizeof(void*)]; + #endif } static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexcept { - NtCurrentTeb()->TlsSlots[slot] = value; + ((void**)NtCurrentTeb())[slot / sizeof(void*)] = value; } + #endif + // Do we have __builtin_thread_pointer? This would be the preferred way to get a unique thread id // but unfortunately, it seems we cannot test for this reliably at this time (see issue #883) // Nevertheless, it seems needed on older graviton platforms (see issue #851). @@ -337,12 +356,14 @@ static inline mi_heap_t* mi_prim_get_default_heap(void); static inline mi_heap_t* mi_prim_get_default_heap(void) { mi_heap_t* heap = (mi_heap_t*)mi_prim_tls_slot(MI_TLS_SLOT); + #if MI_TLS_SLOT == 1 // check if the TLS slot is initialized if mi_unlikely(heap == NULL) { #ifdef __GNUC__ __asm(""); // prevent conditional load of the address of _mi_heap_empty #endif heap = (mi_heap_t*)&_mi_heap_empty; } + #endif return heap; } diff --git a/src/init.c b/src/init.c index 6aa2495a..e9e6ce9e 100644 --- a/src/init.c +++ b/src/init.c @@ -97,7 +97,12 @@ const mi_page_t _mi_page_empty = { // may lead to allocation itself on some platforms) // -------------------------------------------------------- -static mi_decl_cache_align mi_subproc_t subproc_main = { 0 }; // note: empty initializer to prevent running the constructor (in C++ compilation) +static mi_decl_cache_align mi_subproc_t subproc_main +#if __cplusplus += { }; // empty initializer to prevent running the constructor (with msvc) +#else += { 0 }; // C zero initialize +#endif static mi_decl_cache_align mi_tld_t tld_empty = { 0, // thread_id diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c index e06b278d..63023271 100644 --- a/src/prim/windows/prim.c +++ b/src/prim/windows/prim.c @@ -639,6 +639,11 @@ bool _mi_prim_random_buf(void* buf, size_t buf_len) { static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) { MI_UNUSED(reserved); MI_UNUSED(module); + #if MI_TLS_SLOT >= 2 + if ((reason==DLL_PROCESS_ATTACH || reason==DLL_THREAD_ATTACH) && mi_prim_get_default_heap() == NULL) { + _mi_heap_set_default_direct((mi_heap_t*)&_mi_heap_empty); + } + #endif if (reason==DLL_PROCESS_ATTACH) { _mi_process_load(); } @@ -647,7 +652,7 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) { } else if (reason==DLL_THREAD_DETACH && !_mi_is_redirected()) { _mi_thread_done(NULL); - } + } } @@ -800,6 +805,11 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) { #endif mi_decl_export void _mi_redirect_entry(DWORD reason) { // called on redirection; careful as this may be called before DllMain + #if MI_TLS_SLOT >= 2 + if ((reason==DLL_PROCESS_ATTACH || reason==DLL_THREAD_ATTACH) && mi_prim_get_default_heap() == NULL) { + _mi_heap_set_default_direct((mi_heap_t*)&_mi_heap_empty); + } + #endif if (reason == DLL_PROCESS_ATTACH) { mi_redirected = true; } diff --git a/test/test-stress.c b/test/test-stress.c index 653c0a1a..80623ebf 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -57,7 +57,7 @@ static int ITER = 50; static int THREADS = 32; // more repeatable if THREADS <= #processors static int SCALE = 50; // scaling factor static int ITER = 50; // N full iterations destructing and re-creating all threads -#endif +#endif @@ -256,11 +256,11 @@ static void test_stress(void) { } #ifndef NDEBUG //mi_collect(false); - //mi_debug_show_arenas(); + //mi_debug_show_arenas(true); #endif #if !defined(NDEBUG) || defined(MI_TSAN) - if ((n + 1) % 10 == 0) { - printf("- iterations left: %3d\n", ITER - (n + 1)); + if ((n + 1) % 10 == 0) { + printf("- iterations left: %3d\n", ITER - (n + 1)); mi_debug_show_arenas(true); //mi_collect(true); //mi_debug_show_arenas(true); @@ -274,7 +274,7 @@ static void test_stress(void) { free_items(p); } } -} +} #ifndef STRESS static void leak(intptr_t tid) { @@ -350,17 +350,9 @@ int main(int argc, char** argv) { #ifndef USE_STD_MALLOC #ifndef NDEBUG - //mi_debug_show_arenas(true); mi_debug_show_arenas(true); - //mi_collect(true); - //mi_debug_show_arenas(true); - #else - //mi_collect(true); - mi_debug_show_arenas(true); - mi_stats_print(NULL); + mi_collect(true); #endif -#else - mi_stats_print(NULL); // so we see rss/commit/elapsed #endif mi_stats_print(NULL); //bench_end_program();