diff --git a/CMakeLists.txt b/CMakeLists.txt index 1387e0db..bd98019b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -128,7 +128,7 @@ endif() if(MI_SECURE) message(STATUS "Set full secure build (MI_SECURE=ON)") - list(APPEND mi_defines MI_SECURE=4) + list(APPEND mi_defines MI_SECURE=4) endif() if(MI_TRACK_VALGRIND) @@ -468,7 +468,7 @@ if (MI_BUILD_OBJECT) set(mimalloc-obj-static "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/mimalloc-obj.dir/src/static.c${CMAKE_C_OUTPUT_EXTENSION}") set(mimalloc-obj-out "${CMAKE_CURRENT_BINARY_DIR}/${mi_basename}${CMAKE_C_OUTPUT_EXTENSION}") add_custom_command(OUTPUT ${mimalloc-obj-out} DEPENDS mimalloc-obj COMMAND "${CMAKE_COMMAND}" -E copy "${mimalloc-obj-static}" "${mimalloc-obj-out}") - add_custom_target(mimalloc-obj-target ALL DEPENDS ${mimalloc-obj-out}) + add_custom_target(mimalloc-obj-target ALL DEPENDS ${mimalloc-obj-out}) endif() # the following seems to lead to cmake warnings/errors on some systems, disable for now :-( diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h index 9e560696..c3844d8b 100644 --- a/include/mimalloc/prim.h +++ b/include/mimalloc/prim.h @@ -35,10 +35,10 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config ); // Free OS memory int _mi_prim_free(void* addr, size_t size ); - + // Allocate OS memory. Return NULL on error. // The `try_alignment` is just a hint and the returned pointer does not have to be aligned. -// If `commit` is false, the virtual memory range only needs to be reserved (with no access) +// If `commit` is false, the virtual memory range only needs to be reserved (with no access) // which will later be committed explicitly using `_mi_prim_commit`. // `is_zero` is set to true if the memory was zero initialized (as on most OS's) // pre: !commit => !allow_large @@ -82,11 +82,11 @@ mi_msecs_t _mi_prim_clock_now(void); typedef struct mi_process_info_s { mi_msecs_t elapsed; mi_msecs_t utime; - mi_msecs_t stime; - size_t current_rss; - size_t peak_rss; + mi_msecs_t stime; + size_t current_rss; + size_t peak_rss; size_t current_commit; - size_t peak_commit; + size_t peak_commit; size_t page_faults; } mi_process_info_t; @@ -117,7 +117,7 @@ void _mi_prim_thread_associate_default_heap(mi_heap_t* heap); //------------------------------------------------------------------- // Thread id: `_mi_prim_thread_id()` -// +// // Getting the thread id should be performant as it is called in the // fast path of `_mi_free` and we specialize for various platforms as // inlined definitions. Regular code should call `init.c:_mi_thread_id()`. @@ -125,26 +125,14 @@ void _mi_prim_thread_associate_default_heap(mi_heap_t* heap); // for each thread (unequal to zero). //------------------------------------------------------------------- -// defined in `init.c`; do not use these directly -extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from -extern bool _mi_process_is_initialized; // has mi_process_init been called? - -static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept; - -#if defined(_WIN32) - -#define WIN32_LEAN_AND_MEAN -#include -static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { - // Windows: works on Intel and ARM in both 32- and 64-bit - return (uintptr_t)NtCurrentTeb(); -} - -// We use assembly for a fast thread id on the main platforms. The TLS layout depends on -// both the OS and libc implementation so we use specific tests for each main platform. +// On some libc + platform combinations we can directly access a thread-local storage (TLS) slot. +// The TLS layout depends on both the OS and libc implementation so we use specific tests for each main platform. // If you test on another platform and it works please send a PR :-) // see also https://akkadia.org/drepper/tls.pdf for more info on the TLS register. -#elif defined(__GNUC__) && ( \ +// +// Note: on most platforms this is not actually used anymore as we prefer `__builtin_thread_pointer()` nowadays. +// However, we do still use it with older clang compilers and Apple OS (as we use TLS slot for the default heap there). +#if defined(__GNUC__) && ( \ (defined(__GLIBC__) && (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))) \ || (defined(__APPLE__) && (defined(__x86_64__) || defined(__aarch64__))) \ || (defined(__BIONIC__) && (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))) \ @@ -152,6 +140,8 @@ static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { || (defined(__OpenBSD__) && (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__))) \ ) +#define MI_HAS_TLS_SLOT + static inline void* mi_prim_tls_slot(size_t slot) mi_attr_noexcept { void* res; const size_t ofs = (slot*sizeof(void*)); @@ -205,6 +195,33 @@ static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexce #endif } +#endif + +// defined in `init.c`; do not use these directly +extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from +extern bool _mi_process_is_initialized; // has mi_process_init been called? + +static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept; + +#if defined(_WIN32) + +#define WIN32_LEAN_AND_MEAN +#include +static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { + // Windows: works on Intel and ARM in both 32- and 64-bit + return (uintptr_t)NtCurrentTeb(); +} + +#elif defined(__has_builtin) && __has_builtin(__builtin_thread_pointer) && \ + (!defined(__clang_major__) || __clang_major__ >= 14) // older clang versions emit bad code; fall back to using the TLS slot () + +static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { + // Works on most Unix based platforms + return (uintptr_t)__builtin_thread_pointer(); +} + +#elif defined(MI_HAS_TLS_SLOT) + static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { #if defined(__BIONIC__) // issue #384, #495: on the Bionic libc (Android), slot 1 is the thread id @@ -251,7 +268,6 @@ static inline mi_heap_t* mi_prim_get_default_heap(void); #if defined(MI_MALLOC_OVERRIDE) #if defined(__APPLE__) // macOS #define MI_TLS_SLOT 89 // seems unused? - // #define MI_TLS_RECURSE_GUARD 1 // other possible unused ones are 9, 29, __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY4 (94), __PTK_FRAMEWORK_GC_KEY9 (112) and __PTK_FRAMEWORK_OLDGC_KEY9 (89) // see #elif defined(__OpenBSD__) @@ -269,6 +285,9 @@ static inline mi_heap_t* mi_prim_get_default_heap(void); #if defined(MI_TLS_SLOT) +# if !defined(MI_HAS_TLS_SLOT) +# error "trying to use a TLS slot for the default heap, but the mi_prim_tls_slot primitives are not defined +# endif static inline mi_heap_t* mi_prim_get_default_heap(void) { mi_heap_t* heap = (mi_heap_t*)mi_prim_tls_slot(MI_TLS_SLOT);