diff --git a/CMakeLists.txt b/CMakeLists.txt
index c184a0b3..57b49584 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -19,6 +19,7 @@ option(MI_SEE_ASM "Generate assembly files" OFF)
option(MI_OSX_INTERPOSE "Use interpose to override standard malloc on macOS" ON)
option(MI_OSX_ZONE "Use malloc zone to override standard malloc on macOS" ON)
option(MI_WIN_REDIRECT "Use redirection module ('mimalloc-redirect') on Windows if compiling mimalloc as a DLL" ON)
+option(MI_WIN_USE_FIXED_TLS "Use a fixed TLS slot on Windows to avoid extra tests in the malloc fast path" OFF)
option(MI_LOCAL_DYNAMIC_TLS "Use local-dynamic-tls, a slightly slower but dlopen-compatible thread local storage mechanism (Unix)" OFF)
option(MI_LIBC_MUSL "Enable this when linking with musl libc" OFF)
@@ -40,7 +41,7 @@ option(MI_NO_THP "Disable transparent huge pages support on Linux/And
option(MI_EXTRA_CPPDEFS "Extra pre-processor definitions (use as `-DMI_EXTRA_CPPDEFS=\"opt1=val1;opt2=val2\"`)" "")
# deprecated options
-option(MI_WIN_USE_FLS "Use Fiber local storage on Windows to detect thread termination" OFF)
+option(MI_WIN_USE_FLS "Use Fiber local storage on Windows to detect thread termination (deprecated)" OFF)
option(MI_CHECK_FULL "Use full internal invariant checking in DEBUG mode (deprecated, use MI_DEBUG_FULL instead)" OFF)
option(MI_USE_LIBATOMIC "Explicitly link with -latomic (on older systems) (deprecated and detected automatically)" OFF)
@@ -327,10 +328,15 @@ if(MI_LIBC_MUSL)
endif()
if(MI_WIN_USE_FLS)
- message(STATUS "Use the Fiber API to detect thread termination (MI_WIN_USE_FLS=ON)")
+ message(STATUS "Use the Fiber API to detect thread termination (deprecated) (MI_WIN_USE_FLS=ON)")
list(APPEND mi_defines MI_WIN_USE_FLS=1)
endif()
+if(MI_WIN_USE_FIXED_TLS)
+ message(STATUS "Use fixed TLS slot on Windows to avoid extra tests in the malloc fast path (MI_WIN_USE_FIXED_TLS=ON)")
+ list(APPEND mi_defines MI_WIN_USE_FIXED_TLS=1)
+endif()
+
# Determine architecture
set(MI_OPT_ARCH_FLAGS "")
set(MI_ARCH "unknown")
@@ -424,7 +430,7 @@ if (MSVC AND MSVC_VERSION GREATER_EQUAL 1914) # vs2017+
endif()
if(MINGW)
- add_definitions(-D_WIN32_WINNT=0x600)
+ add_definitions(-D_WIN32_WINNT=0x601) # issue #976
endif()
if(MI_OPT_ARCH_FLAGS)
diff --git a/ide/vs2022/mimalloc.vcxproj b/ide/vs2022/mimalloc-lib.vcxproj
similarity index 99%
rename from ide/vs2022/mimalloc.vcxproj
rename to ide/vs2022/mimalloc-lib.vcxproj
index 87e866bb..c82dbec7 100644
--- a/ide/vs2022/mimalloc.vcxproj
+++ b/ide/vs2022/mimalloc-lib.vcxproj
@@ -37,7 +37,7 @@
15.0
{ABB5EAE7-B3E6-432E-B636-333449892EA6}
- mimalloc
+ mimalloc-lib
10.0
mimalloc-lib
diff --git a/ide/vs2022/mimalloc-override.vcxproj b/ide/vs2022/mimalloc-override-dll.vcxproj
similarity index 99%
rename from ide/vs2022/mimalloc-override.vcxproj
rename to ide/vs2022/mimalloc-override-dll.vcxproj
index 609fd3ba..fbae9aeb 100644
--- a/ide/vs2022/mimalloc-override.vcxproj
+++ b/ide/vs2022/mimalloc-override-dll.vcxproj
@@ -37,7 +37,7 @@
15.0
{ABB5EAE7-B3E6-432E-B636-333449892EA7}
- mimalloc-override
+ mimalloc-override-dll
10.0
mimalloc-override-dll
@@ -404,11 +404,10 @@
-
-
+
@@ -482,9 +481,6 @@
-
-
-
diff --git a/ide/vs2022/mimalloc-override-test.vcxproj b/ide/vs2022/mimalloc-override-test.vcxproj
index 0e87cf36..427a75ae 100644
--- a/ide/vs2022/mimalloc-override-test.vcxproj
+++ b/ide/vs2022/mimalloc-override-test.vcxproj
@@ -344,7 +344,7 @@
-
+
{abb5eae7-b3e6-432e-b636-333449892ea7}
diff --git a/ide/vs2022/mimalloc-override.vcxproj.filters b/ide/vs2022/mimalloc-override.vcxproj.filters
deleted file mode 100644
index fb48e98f..00000000
--- a/ide/vs2022/mimalloc-override.vcxproj.filters
+++ /dev/null
@@ -1,113 +0,0 @@
-
-
-
-
- Sources
-
-
- Sources
-
-
- Sources
-
-
- Sources
-
-
- Sources
-
-
- Sources
-
-
- Sources
-
-
- Sources
-
-
- Sources
-
-
- Sources
-
-
- Sources
-
-
- Sources
-
-
- Sources
-
-
- Sources
-
-
- Sources
-
-
- Sources
-
-
- Sources
-
-
- Sources
-
-
- Sources
-
-
- Sources
-
-
-
-
- Headers
-
-
- Headers
-
-
- Headers
-
-
- Headers
-
-
- Headers
-
-
- Headers
-
-
- Headers
-
-
- Headers
-
-
- Headers
-
-
- Headers
-
-
- Headers
-
-
-
-
- {9ef1cf48-7bb2-4af1-8cc1-603486e08a7a}
-
-
- {cfcf1674-81e3-487a-a8dd-5f956ae4007d}
-
-
-
-
- Headers
-
-
-
\ No newline at end of file
diff --git a/ide/vs2022/mimalloc-test-api.vcxproj b/ide/vs2022/mimalloc-test-api.vcxproj
index 27247569..b7f97ad2 100644
--- a/ide/vs2022/mimalloc-test-api.vcxproj
+++ b/ide/vs2022/mimalloc-test-api.vcxproj
@@ -282,7 +282,7 @@
-
+
{abb5eae7-b3e6-432e-b636-333449892ea6}
diff --git a/ide/vs2022/mimalloc-test-stress.vcxproj b/ide/vs2022/mimalloc-test-stress.vcxproj
index fd88cd8e..cb761f94 100644
--- a/ide/vs2022/mimalloc-test-stress.vcxproj
+++ b/ide/vs2022/mimalloc-test-stress.vcxproj
@@ -279,7 +279,7 @@
-
+
{abb5eae7-b3e6-432e-b636-333449892ea6}
diff --git a/ide/vs2022/mimalloc-test.vcxproj b/ide/vs2022/mimalloc-test.vcxproj
index 6e4576fd..83202dbe 100644
--- a/ide/vs2022/mimalloc-test.vcxproj
+++ b/ide/vs2022/mimalloc-test.vcxproj
@@ -276,7 +276,7 @@
-
+
{abb5eae7-b3e6-432e-b636-333449892ea6}
diff --git a/ide/vs2022/mimalloc.sln b/ide/vs2022/mimalloc.sln
index 5a55c98b..040af3ac 100644
--- a/ide/vs2022/mimalloc.sln
+++ b/ide/vs2022/mimalloc.sln
@@ -3,11 +3,11 @@ Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 17
VisualStudioVersion = 17.12.35527.113
MinimumVisualStudioVersion = 10.0.40219.1
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc", "mimalloc.vcxproj", "{ABB5EAE7-B3E6-432E-B636-333449892EA6}"
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-lib", "mimalloc-lib.vcxproj", "{ABB5EAE7-B3E6-432E-B636-333449892EA6}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-test", "mimalloc-test.vcxproj", "{FEF7858F-750E-4C21-A04D-22707CC66878}"
EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-override", "mimalloc-override.vcxproj", "{ABB5EAE7-B3E6-432E-B636-333449892EA7}"
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-override-dll", "mimalloc-override-dll.vcxproj", "{ABB5EAE7-B3E6-432E-B636-333449892EA7}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mimalloc-override-test", "mimalloc-override-test.vcxproj", "{FEF7868F-750E-4C21-A04D-22707CC66879}"
EndProject
diff --git a/ide/vs2022/mimalloc.vcxproj.filters b/ide/vs2022/mimalloc.vcxproj.filters
deleted file mode 100644
index 06b0364f..00000000
--- a/ide/vs2022/mimalloc.vcxproj.filters
+++ /dev/null
@@ -1,105 +0,0 @@
-
-
-
-
- Sources
-
-
- Sources
-
-
- Sources
-
-
- Sources
-
-
- Sources
-
-
- Sources
-
-
- Sources
-
-
- Sources
-
-
- Sources
-
-
- Sources
-
-
- Sources
-
-
- Sources
-
-
- Sources
-
-
- Sources
-
-
- Sources
-
-
- Sources
-
-
- Sources
-
-
- Sources
-
-
- Sources
-
-
- Sources
-
-
-
-
- Headers
-
-
- Headers
-
-
- Headers
-
-
- Headers
-
-
- Headers
-
-
- Headers
-
-
- Headers
-
-
- Headers
-
-
- Headers
-
-
- Headers
-
-
-
-
- {dd2da697-c33c-4348-bf80-a802ebaa06fb}
-
-
- {8027057b-4b93-4321-b93c-d51dd0c8077b}
-
-
-
\ No newline at end of file
diff --git a/include/mimalloc.h b/include/mimalloc.h
index dacc647e..7383ce8a 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -326,7 +326,7 @@ mi_decl_export void mi_heap_guarded_set_size_bound(mi_heap_t* heap, size_t min,
//mi_decl_export void mi_os_decommit(void* p, size_t size);
mi_decl_export bool mi_arena_unload(mi_arena_id_t arena_id, void** base, size_t* accessed_size, size_t* size);
-mi_decl_export bool mi_arena_reload(void* start, size_t size, mi_arena_id_t* arena_id);
+mi_decl_export bool mi_arena_reload(void* start, size_t size, mi_arena_id_t* arena_id);
mi_decl_export bool mi_heap_reload(mi_heap_t* heap, mi_arena_id_t arena);
mi_decl_export void mi_heap_unload(mi_heap_t* heap);
diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h
index 49472bdb..ae8839c0 100644
--- a/include/mimalloc/internal.h
+++ b/include/mimalloc/internal.h
@@ -425,7 +425,7 @@ static inline bool mi_heap_is_backing(const mi_heap_t* heap) {
static inline bool mi_heap_is_initialized(mi_heap_t* heap) {
mi_assert_internal(heap != NULL);
- return (heap != &_mi_heap_empty);
+ return (heap != NULL && heap != &_mi_heap_empty);
}
static inline mi_page_t* _mi_heap_get_free_small_page(mi_heap_t* heap, size_t size) {
diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h
index 2d681062..12889b8b 100644
--- a/include/mimalloc/prim.h
+++ b/include/mimalloc/prim.h
@@ -1,5 +1,5 @@
/* ----------------------------------------------------------------------------
-Copyright (c) 2018-2023, Microsoft Research, Daan Leijen
+Copyright (c) 2018-2024, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
@@ -130,6 +130,7 @@ bool _mi_prim_thread_is_in_threadpool(void);
// for each thread (unequal to zero).
//-------------------------------------------------------------------
+
// On some libc + platform combinations we can directly access a thread-local storage (TLS) slot.
// The TLS layout depends on both the OS and libc implementation so we use specific tests for each main platform.
// If you test on another platform and it works please send a PR :-)
@@ -207,22 +208,40 @@ static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexce
#endif
}
-#elif 0 && _MSC_VER && _WIN32
-// On Windows, using a fixed TLS slot has better codegen than a thread-local
-// but it might clash with an application trying to use the same slot. (so we disable this by default)
-#include
+#elif _WIN32 && MI_WIN_USE_FIXED_TLS && !defined(MI_WIN_USE_FLS)
-#define MI_HAS_TLS_SLOT
-#define MI_TLS_SLOT 63 // last available slot
+// On windows we can store the thread-local heap at a fixed TLS slot to avoid
+// thread-local initialization checks in the fast path. This use a fixed location
+// in the TCB though (last user-reserved slot by default) which may clash with other applications.
+
+#define MI_HAS_TLS_SLOT 2 // 2 = we can reliable initialize the slot (saving a test on each malloc)
+
+#if MI_WIN_USE_FIXED_TLS > 1
+#define MI_TLS_SLOT (MI_WIN_USE_FIXED_TLS)
+#elif MI_SIZE_SIZE == 4
+#define MI_TLS_SLOT (0x710) // Last user-reserved slot
+// #define MI_TLS_SLOT (0xF0C) // Last TlsSlot (might clash with other app reserved slot)
+#else
+#define MI_TLS_SLOT (0x888) // Last user-reserved slot
+// #define MI_TLS_SLOT (0x1678) // Last TlsSlot (might clash with other app reserved slot)
+#endif
static inline void* mi_prim_tls_slot(size_t slot) mi_attr_noexcept {
- return NtCurrentTeb()->TlsSlots[slot];
+ #if (_M_X64 || _M_AMD64) && !defined(_M_ARM64EC)
+ return (void*)__readgsqword((unsigned long)slot); // direct load at offset from gs
+ #elif _M_IX86 && !defined(_M_ARM64EC)
+ return (void*)__readfsdword((unsigned long)slot); // direct load at offset from fs
+ #else
+ return ((void**)NtCurrentTeb())[slot / sizeof(void*)];
+ #endif
}
static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexcept {
- NtCurrentTeb()->TlsSlots[slot] = value;
+ ((void**)NtCurrentTeb())[slot / sizeof(void*)] = value;
}
+
#endif
+
// Do we have __builtin_thread_pointer? This would be the preferred way to get a unique thread id
// but unfortunately, it seems we cannot test for this reliably at this time (see issue #883)
// Nevertheless, it seems needed on older graviton platforms (see issue #851).
@@ -337,12 +356,14 @@ static inline mi_heap_t* mi_prim_get_default_heap(void);
static inline mi_heap_t* mi_prim_get_default_heap(void) {
mi_heap_t* heap = (mi_heap_t*)mi_prim_tls_slot(MI_TLS_SLOT);
+ #if MI_TLS_SLOT == 1 // check if the TLS slot is initialized
if mi_unlikely(heap == NULL) {
#ifdef __GNUC__
__asm(""); // prevent conditional load of the address of _mi_heap_empty
#endif
heap = (mi_heap_t*)&_mi_heap_empty;
}
+ #endif
return heap;
}
diff --git a/src/init.c b/src/init.c
index 6aa2495a..e9e6ce9e 100644
--- a/src/init.c
+++ b/src/init.c
@@ -97,7 +97,12 @@ const mi_page_t _mi_page_empty = {
// may lead to allocation itself on some platforms)
// --------------------------------------------------------
-static mi_decl_cache_align mi_subproc_t subproc_main = { 0 }; // note: empty initializer to prevent running the constructor (in C++ compilation)
+static mi_decl_cache_align mi_subproc_t subproc_main
+#if __cplusplus
+= { }; // empty initializer to prevent running the constructor (with msvc)
+#else
+= { 0 }; // C zero initialize
+#endif
static mi_decl_cache_align mi_tld_t tld_empty = {
0, // thread_id
diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c
index e06b278d..63023271 100644
--- a/src/prim/windows/prim.c
+++ b/src/prim/windows/prim.c
@@ -639,6 +639,11 @@ bool _mi_prim_random_buf(void* buf, size_t buf_len) {
static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) {
MI_UNUSED(reserved);
MI_UNUSED(module);
+ #if MI_TLS_SLOT >= 2
+ if ((reason==DLL_PROCESS_ATTACH || reason==DLL_THREAD_ATTACH) && mi_prim_get_default_heap() == NULL) {
+ _mi_heap_set_default_direct((mi_heap_t*)&_mi_heap_empty);
+ }
+ #endif
if (reason==DLL_PROCESS_ATTACH) {
_mi_process_load();
}
@@ -647,7 +652,7 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) {
}
else if (reason==DLL_THREAD_DETACH && !_mi_is_redirected()) {
_mi_thread_done(NULL);
- }
+ }
}
@@ -800,6 +805,11 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) {
#endif
mi_decl_export void _mi_redirect_entry(DWORD reason) {
// called on redirection; careful as this may be called before DllMain
+ #if MI_TLS_SLOT >= 2
+ if ((reason==DLL_PROCESS_ATTACH || reason==DLL_THREAD_ATTACH) && mi_prim_get_default_heap() == NULL) {
+ _mi_heap_set_default_direct((mi_heap_t*)&_mi_heap_empty);
+ }
+ #endif
if (reason == DLL_PROCESS_ATTACH) {
mi_redirected = true;
}
diff --git a/test/test-stress.c b/test/test-stress.c
index 653c0a1a..80623ebf 100644
--- a/test/test-stress.c
+++ b/test/test-stress.c
@@ -57,7 +57,7 @@ static int ITER = 50;
static int THREADS = 32; // more repeatable if THREADS <= #processors
static int SCALE = 50; // scaling factor
static int ITER = 50; // N full iterations destructing and re-creating all threads
-#endif
+#endif
@@ -256,11 +256,11 @@ static void test_stress(void) {
}
#ifndef NDEBUG
//mi_collect(false);
- //mi_debug_show_arenas();
+ //mi_debug_show_arenas(true);
#endif
#if !defined(NDEBUG) || defined(MI_TSAN)
- if ((n + 1) % 10 == 0) {
- printf("- iterations left: %3d\n", ITER - (n + 1));
+ if ((n + 1) % 10 == 0) {
+ printf("- iterations left: %3d\n", ITER - (n + 1));
mi_debug_show_arenas(true);
//mi_collect(true);
//mi_debug_show_arenas(true);
@@ -274,7 +274,7 @@ static void test_stress(void) {
free_items(p);
}
}
-}
+}
#ifndef STRESS
static void leak(intptr_t tid) {
@@ -350,17 +350,9 @@ int main(int argc, char** argv) {
#ifndef USE_STD_MALLOC
#ifndef NDEBUG
- //mi_debug_show_arenas(true);
mi_debug_show_arenas(true);
- //mi_collect(true);
- //mi_debug_show_arenas(true);
- #else
- //mi_collect(true);
- mi_debug_show_arenas(true);
- mi_stats_print(NULL);
+ mi_collect(true);
#endif
-#else
- mi_stats_print(NULL); // so we see rss/commit/elapsed
#endif
mi_stats_print(NULL);
//bench_end_program();