diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 377bedbf..72783400 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -46,6 +46,14 @@ jobs: BuildType: release cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -A Win32 MSBuildConfiguration: Release + Debug Fixed TLS: + BuildType: debug + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON -DMI_WIN_USE_FIXED_TLS=ON + MSBuildConfiguration: Debug + Release Fixed TLS: + BuildType: release + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_WIN_USE_FIXED_TLS=ON + MSBuildConfiguration: Release steps: - task: CMake@1 inputs: diff --git a/ide/vs2022/mimalloc-test-stress.vcxproj b/ide/vs2022/mimalloc-test-stress.vcxproj index 128a4ff6..d6af71ce 100644 --- a/ide/vs2022/mimalloc-test-stress.vcxproj +++ b/ide/vs2022/mimalloc-test-stress.vcxproj @@ -282,8 +282,8 @@ - - {abb5eae7-b3e6-432e-b636-333449892ea7} + + {abb5eae7-b3e6-432e-b636-333449892ea6} diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h index 56b73544..ab085c8b 100644 --- a/include/mimalloc/prim.h +++ b/include/mimalloc/prim.h @@ -210,19 +210,18 @@ static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexce // On windows we can store the thread-local heap at a fixed TLS slot to avoid // thread-local initialization checks in the fast path. -// We always use the second user TLS slot (the first one is always allocated already), -// and at initialization (`windows/prim.c`) we call TlsAlloc and verify -// we indeed get the second slot (and fail otherwise). -// Todo: we could make the Tls slot completely dynamic but that would require -// an extra read of the static Tls slot instead of using a constant offset. +// We allocate a user TLS slot at process initialization (see `windows/prim.c`) +// and store the offset `_mi_win_tls_offset`. #define MI_HAS_TLS_SLOT 2 // 2 = we can reliably initialize the slot (saving a test on each malloc) +extern mi_decl_hidden size_t _mi_win_tls_offset; + #if MI_WIN_USE_FIXED_TLS > 1 #define MI_TLS_SLOT (MI_WIN_USE_FIXED_TLS) #elif MI_SIZE_SIZE == 4 -#define MI_TLS_SLOT (0x0E18) // Second User TLS slot +#define MI_TLS_SLOT (0x0E10 + _mi_win_tls_offset) // User TLS slots #else -#define MI_TLS_SLOT (0x1488) // Second User TLS slot +#define MI_TLS_SLOT (0x1480 + _mi_win_tls_offset) // User TLS slots #endif static inline void* mi_prim_tls_slot(size_t slot) mi_attr_noexcept { @@ -271,9 +270,6 @@ static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexce // defined in `init.c`; do not use these directly -#ifdef _MSC_VER -__declspec(selectany) // make it part of the comdat section to have faster TLS access (issue #1078) -#endif extern mi_decl_hidden mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from extern mi_decl_hidden bool _mi_process_is_initialized; // has mi_process_init been called? diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c index 96259040..780cae18 100644 --- a/src/prim/windows/prim.c +++ b/src/prim/windows/prim.c @@ -646,22 +646,29 @@ bool _mi_prim_random_buf(void* buf, size_t buf_len) { // Process & Thread Init/Done //---------------------------------------------------------------- +#if MI_WIN_USE_FIXED_TLS==1 +mi_decl_cache_align size_t _mi_win_tls_offset = sizeof(void*); // use 2nd slot by default +#endif + static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) { MI_UNUSED(reserved); MI_UNUSED(module); - #if MI_TLS_SLOT >= 2 + #if MI_HAS_TLS_SLOT >= 2 // we must initialize the TLS slot before any allocation + #if MI_WIN_USE_FIXED_TLS==1 if (reason==DLL_PROCESS_ATTACH) { const DWORD tls_slot = TlsAlloc(); - if (tls_slot != 1) { - _mi_error_message(EFAULT, "unable to allocate the second TLS slot (rebuild without MI_WIN_USE_FIXED_TLS?)\n"); + if (tls_slot == TLS_OUT_OF_INDEXES) { + _mi_error_message(EFAULT, "unable to allocate the a TLS slot (rebuild without MI_WIN_USE_FIXED_TLS?)\n"); } + _mi_win_tls_offset = (size_t)tls_slot * sizeof(void*); } + #endif if (reason==DLL_PROCESS_ATTACH || reason==DLL_THREAD_ATTACH) { if (mi_prim_get_default_heap() == NULL) { _mi_heap_set_default_direct((mi_heap_t*)&_mi_heap_empty); } - #if MI_DEBUG - void* const p = TlsGetValue(1); + #if MI_DEBUG && MI_WIN_USE_FIXED_TLS==1 + void* const p = TlsGetValue((DWORD)(_mi_win_tls_offset / sizeof(void*))); mi_assert_internal(p == (void*)&_mi_heap_empty); #endif } @@ -827,7 +834,7 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) { #endif mi_decl_export void _mi_redirect_entry(DWORD reason) { // called on redirection; careful as this may be called before DllMain - #if MI_TLS_SLOT >= 2 + #if MI_HAS_TLS_SLOT >= 2 // we must initialize the TLS slot before any allocation if ((reason==DLL_PROCESS_ATTACH || reason==DLL_THREAD_ATTACH) && mi_prim_get_default_heap() == NULL) { _mi_heap_set_default_direct((mi_heap_t*)&_mi_heap_empty); }