From a83bca72b3f98b55c564713176c40e44698e2c43 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Thu, 17 Jun 2021 19:15:09 -0700 Subject: [PATCH] fixes for M1; disable interpose use zones; fix pedantic warnings --- CMakeLists.txt | 11 +++--- include/mimalloc-atomic.h | 4 +-- include/mimalloc-internal.h | 7 ++-- src/alloc-override-osx.c | 2 +- src/alloc-override.c | 68 ++++++++++++++++++------------------- test/test-stress.c | 7 ++-- 6 files changed, 48 insertions(+), 51 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 403251cf..b56953c4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,8 +12,8 @@ option(MI_XMALLOC "Enable abort() call on memory allocation failure by option(MI_SHOW_ERRORS "Show error and warning messages by default (only enabled by default in DEBUG mode)" OFF) option(MI_USE_CXX "Use the C++ compiler to compile the library (instead of the C compiler)" OFF) option(MI_SEE_ASM "Generate assembly files" OFF) -option(MI_INTERPOSE "Use interpose to override standard malloc on macOS" ON) -option(MI_OSX_ZONE "Use malloc zone to override standard malloc on macOS" OFF) # enables interpose as well +option(MI_INTERPOSE "Use interpose to override standard malloc on macOS" OFF) +option(MI_OSX_ZONE "Use malloc zone to override standard malloc on macOS" ON) option(MI_LOCAL_DYNAMIC_TLS "Use slightly slower, dlopen-compatible TLS mechanism (Unix)" OFF) option(MI_BUILD_SHARED "Build shared library" ON) option(MI_BUILD_STATIC "Build static library" ON) @@ -76,15 +76,12 @@ if(MI_OVERRIDE) # use zone's on macOS message(STATUS " Use malloc zone to override malloc (MI_OSX_ZONE=ON)") list(APPEND mi_sources src/alloc-override-osx.c) - list(APPEND mi_defines MI_OSX_ZONE=1) - if(NOT MI_INTERPOSE) - message(STATUS " (enabling INTERPOSE as well since zone's require this)") - set(MI_INTERPOSE "ON") - endif() + list(APPEND mi_defines MI_OSX_ZONE=1) endif() if(MI_INTERPOSE) # use interpose on macOS message(STATUS " Use interpose to override malloc (MI_INTERPOSE=ON)") + message(STATUS " WARNING: interpose does not seem to work reliably on the M1; use -DMI_OSX_ZONE=ON instead") list(APPEND mi_defines MI_INTERPOSE) endif() endif() diff --git a/include/mimalloc-atomic.h b/include/mimalloc-atomic.h index d82bcfce..dc48f0a2 100644 --- a/include/mimalloc-atomic.h +++ b/include/mimalloc-atomic.h @@ -295,7 +295,7 @@ static inline void mi_atomic_yield(void) { } #elif defined(__aarch64__) static inline void mi_atomic_yield(void) { - asm volatile("wfe"); + __asm__ volatile("wfe"); } #elif (defined(__arm__) && __ARM_ARCH__ >= 7) static inline void mi_atomic_yield(void) { @@ -307,7 +307,7 @@ static inline void mi_atomic_yield(void) { } #elif defined(__armel__) || defined(__ARMEL__) static inline void mi_atomic_yield(void) { - asm volatile ("nop" ::: "memory"); + __asm__ volatile ("nop" ::: "memory"); } #endif #elif defined(__sun) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 449893b7..4a803ff5 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -713,6 +713,7 @@ static inline void* mi_tls_slot(size_t slot) mi_attr_noexcept { void** tcb; UNUSED(ofs); #if defined(__APPLE__) // M1, issue #343 __asm__ volatile ("mrs %0, tpidrro_el0" : "=r" (tcb)); + tcb = (void**)((uintptr_t)tcb & ~0x07UL); // clear lower 3 bits #else __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb)); #endif @@ -740,6 +741,7 @@ static inline void mi_tls_slot_set(size_t slot, void* value) mi_attr_noexcept { void** tcb; UNUSED(ofs); #if defined(__APPLE__) // M1, issue #343 __asm__ volatile ("mrs %0, tpidrro_el0" : "=r" (tcb)); + tcb = (void**)((uintptr_t)tcb & ~0x07UL); // clear lower 3 bits #else __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb)); #endif @@ -748,10 +750,7 @@ static inline void mi_tls_slot_set(size_t slot, void* value) mi_attr_noexcept { } static inline uintptr_t _mi_thread_id(void) mi_attr_noexcept { -#if defined(__aarch64__) && defined(__APPLE__) // M1 - // on macOS on the M1, slot 0 does not seem to work, so we fall back to portable C for now. See issue #354 - return (uintptr_t)&_mi_heap_default; -#elif defined(__BIONIC__) && (defined(__arm__) || defined(__aarch64__)) +#if defined(__BIONIC__) && (defined(__arm__) || defined(__aarch64__)) // on Android, slot 1 is the thread ID (pointer to pthread internal struct) return (uintptr_t)mi_tls_slot(1); #else diff --git a/src/alloc-override-osx.c b/src/alloc-override-osx.c index 3a46ecd9..f506d30a 100644 --- a/src/alloc-override-osx.c +++ b/src/alloc-override-osx.c @@ -64,7 +64,7 @@ static void* zone_valloc(malloc_zone_t* zone, size_t size) { static void zone_free(malloc_zone_t* zone, void* p) { UNUSED(zone); - return mi_free(p); + mi_free(p); } static void* zone_realloc(malloc_zone_t* zone, void* p, size_t newsize) { diff --git a/src/alloc-override.c b/src/alloc-override.c index 084f8ae4..6a87e7bd 100644 --- a/src/alloc-override.c +++ b/src/alloc-override.c @@ -22,9 +22,9 @@ terms of the MIT license. A copy of the license can be found in the file #if (defined(__GNUC__) || defined(__clang__)) && !defined(__APPLE__) // use aliasing to alias the exported function to one of our `mi_` functions #if (defined(__GNUC__) && __GNUC__ >= 9) - #define MI_FORWARD(fun) __attribute__((alias(#fun), used, visibility("default"), copy(fun))) + #define MI_FORWARD(fun) __attribute__((alias(#fun), used, visibility("default"), copy(fun))); #else - #define MI_FORWARD(fun) __attribute__((alias(#fun), used, visibility("default"))) + #define MI_FORWARD(fun) __attribute__((alias(#fun), used, visibility("default"))); #endif #define MI_FORWARD1(fun,x) MI_FORWARD(fun) #define MI_FORWARD2(fun,x,y) MI_FORWARD(fun) @@ -75,10 +75,10 @@ terms of the MIT license. A copy of the license can be found in the file // we just override new/delete which does work in a static library. #else // On all other systems forward to our API - void* malloc(size_t size) MI_FORWARD1(mi_malloc, size); - void* calloc(size_t size, size_t n) MI_FORWARD2(mi_calloc, size, n); - void* realloc(void* p, size_t newsize) MI_FORWARD2(mi_realloc, p, newsize); - void free(void* p) MI_FORWARD0(mi_free, p); + void* malloc(size_t size) MI_FORWARD1(mi_malloc, size) + void* calloc(size_t size, size_t n) MI_FORWARD2(mi_calloc, size, n) + void* realloc(void* p, size_t newsize) MI_FORWARD2(mi_realloc, p, newsize) + void free(void* p) MI_FORWARD0(mi_free, p) #endif #if (defined(__GNUC__) || defined(__clang__)) && !defined(__APPLE__) @@ -96,18 +96,18 @@ terms of the MIT license. A copy of the license can be found in the file // see // ------------------------------------------------------ #include - void operator delete(void* p) noexcept MI_FORWARD0(mi_free,p); - void operator delete[](void* p) noexcept MI_FORWARD0(mi_free,p); + void operator delete(void* p) noexcept MI_FORWARD0(mi_free,p) + void operator delete[](void* p) noexcept MI_FORWARD0(mi_free,p) - void* operator new(std::size_t n) noexcept(false) MI_FORWARD1(mi_new,n); - void* operator new[](std::size_t n) noexcept(false) MI_FORWARD1(mi_new,n); + void* operator new(std::size_t n) noexcept(false) MI_FORWARD1(mi_new,n) + void* operator new[](std::size_t n) noexcept(false) MI_FORWARD1(mi_new,n) void* operator new (std::size_t n, const std::nothrow_t& tag) noexcept { UNUSED(tag); return mi_new_nothrow(n); } void* operator new[](std::size_t n, const std::nothrow_t& tag) noexcept { UNUSED(tag); return mi_new_nothrow(n); } #if (__cplusplus >= 201402L || _MSC_VER >= 1916) - void operator delete (void* p, std::size_t n) noexcept MI_FORWARD02(mi_free_size,p,n); - void operator delete[](void* p, std::size_t n) noexcept MI_FORWARD02(mi_free_size,p,n); + void operator delete (void* p, std::size_t n) noexcept MI_FORWARD02(mi_free_size,p,n) + void operator delete[](void* p, std::size_t n) noexcept MI_FORWARD02(mi_free_size,p,n) #endif #if (__cplusplus > 201402L && defined(__cpp_aligned_new)) && (!defined(__GNUC__) || (__GNUC__ > 5)) @@ -128,10 +128,10 @@ terms of the MIT license. A copy of the license can be found in the file // used by GCC and CLang). // See // ------------------------------------------------------ - void _ZdlPv(void* p) MI_FORWARD0(mi_free,p); // delete - void _ZdaPv(void* p) MI_FORWARD0(mi_free,p); // delete[] - void _ZdlPvm(void* p, size_t n) MI_FORWARD02(mi_free_size,p,n); - void _ZdaPvm(void* p, size_t n) MI_FORWARD02(mi_free_size,p,n); + void _ZdlPv(void* p) MI_FORWARD0(mi_free,p) // delete + void _ZdaPv(void* p) MI_FORWARD0(mi_free,p) // delete[] + void _ZdlPvm(void* p, size_t n) MI_FORWARD02(mi_free_size,p,n) + void _ZdaPvm(void* p, size_t n) MI_FORWARD02(mi_free_size,p,n) void _ZdlPvSt11align_val_t(void* p, size_t al) { mi_free_aligned(p,al); } void _ZdaPvSt11align_val_t(void* p, size_t al) { mi_free_aligned(p,al); } void _ZdlPvmSt11align_val_t(void* p, size_t n, size_t al) { mi_free_size_aligned(p,n,al); } @@ -139,19 +139,19 @@ terms of the MIT license. A copy of the license can be found in the file typedef struct mi_nothrow_s { int _tag; } mi_nothrow_t; #if (MI_INTPTR_SIZE==8) - void* _Znwm(size_t n) MI_FORWARD1(mi_new,n); // new 64-bit - void* _Znam(size_t n) MI_FORWARD1(mi_new,n); // new[] 64-bit - void* _ZnwmSt11align_val_t(size_t n, size_t al) MI_FORWARD2(mi_new_aligned, n, al); - void* _ZnamSt11align_val_t(size_t n, size_t al) MI_FORWARD2(mi_new_aligned, n, al); + void* _Znwm(size_t n) MI_FORWARD1(mi_new,n) // new 64-bit + void* _Znam(size_t n) MI_FORWARD1(mi_new,n) // new[] 64-bit + void* _ZnwmSt11align_val_t(size_t n, size_t al) MI_FORWARD2(mi_new_aligned, n, al) + void* _ZnamSt11align_val_t(size_t n, size_t al) MI_FORWARD2(mi_new_aligned, n, al) void* _ZnwmRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { UNUSED(tag); return mi_new_nothrow(n); } void* _ZnamRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { UNUSED(tag); return mi_new_nothrow(n); } void* _ZnwmSt11align_val_tRKSt9nothrow_t(size_t n, size_t al, mi_nothrow_t tag) { UNUSED(tag); return mi_new_aligned_nothrow(n,al); } void* _ZnamSt11align_val_tRKSt9nothrow_t(size_t n, size_t al, mi_nothrow_t tag) { UNUSED(tag); return mi_new_aligned_nothrow(n,al); } #elif (MI_INTPTR_SIZE==4) - void* _Znwj(size_t n) MI_FORWARD1(mi_new,n); // new 64-bit - void* _Znaj(size_t n) MI_FORWARD1(mi_new,n); // new[] 64-bit - void* _ZnwjSt11align_val_t(size_t n, size_t al) MI_FORWARD2(mi_new_aligned, n, al); - void* _ZnajSt11align_val_t(size_t n, size_t al) MI_FORWARD2(mi_new_aligned, n, al); + void* _Znwj(size_t n) MI_FORWARD1(mi_new,n) // new 64-bit + void* _Znaj(size_t n) MI_FORWARD1(mi_new,n) // new[] 64-bit + void* _ZnwjSt11align_val_t(size_t n, size_t al) MI_FORWARD2(mi_new_aligned, n, al) + void* _ZnajSt11align_val_t(size_t n, size_t al) MI_FORWARD2(mi_new_aligned, n, al) void* _ZnwjRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { UNUSED(tag); return mi_new_nothrow(n); } void* _ZnajRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { UNUSED(tag); return mi_new_nothrow(n); } void* _ZnwjSt11align_val_tRKSt9nothrow_t(size_t n, size_t al, mi_nothrow_t tag) { UNUSED(tag); return mi_new_aligned_nothrow(n,al); } @@ -170,13 +170,13 @@ extern "C" { // Posix & Unix functions definitions // ------------------------------------------------------ -void cfree(void* p) MI_FORWARD0(mi_free, p); -void* reallocf(void* p, size_t newsize) MI_FORWARD2(mi_reallocf,p,newsize); -size_t malloc_size(const void* p) MI_FORWARD1(mi_usable_size,p); +void cfree(void* p) MI_FORWARD0(mi_free, p) +void* reallocf(void* p, size_t newsize) MI_FORWARD2(mi_reallocf,p,newsize) +size_t malloc_size(const void* p) MI_FORWARD1(mi_usable_size,p) #if !defined(__ANDROID__) -size_t malloc_usable_size(void *p) MI_FORWARD1(mi_usable_size,p); +size_t malloc_usable_size(void *p) MI_FORWARD1(mi_usable_size,p) #else -size_t malloc_usable_size(const void *p) MI_FORWARD1(mi_usable_size,p); +size_t malloc_usable_size(const void *p) MI_FORWARD1(mi_usable_size,p) #endif // no forwarding here due to aliasing/name mangling issues @@ -199,11 +199,11 @@ void* aligned_alloc(size_t alignment, size_t size) { return mi_aligned_alloc(a #if defined(__GLIBC__) && defined(__linux__) // forward __libc interface (needed for glibc-based Linux distributions) - void* __libc_malloc(size_t size) MI_FORWARD1(mi_malloc,size); - void* __libc_calloc(size_t count, size_t size) MI_FORWARD2(mi_calloc,count,size); - void* __libc_realloc(void* p, size_t size) MI_FORWARD2(mi_realloc,p,size); - void __libc_free(void* p) MI_FORWARD0(mi_free,p); - void __libc_cfree(void* p) MI_FORWARD0(mi_free,p); + void* __libc_malloc(size_t size) MI_FORWARD1(mi_malloc,size) + void* __libc_calloc(size_t count, size_t size) MI_FORWARD2(mi_calloc,count,size) + void* __libc_realloc(void* p, size_t size) MI_FORWARD2(mi_realloc,p,size) + void __libc_free(void* p) MI_FORWARD0(mi_free,p) + void __libc_cfree(void* p) MI_FORWARD0(mi_free,p) void* __libc_valloc(size_t size) { return mi_valloc(size); } void* __libc_pvalloc(size_t size) { return mi_pvalloc(size); } diff --git a/test/test-stress.c b/test/test-stress.c index cf01dc40..d45e9899 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -37,6 +37,7 @@ static bool allow_large_objects = true; // allow very large objects? static size_t use_one_size = 0; // use single object size of `N * sizeof(uintptr_t)`? +// #define USE_STD_MALLOC #ifdef USE_STD_MALLOC #define custom_calloc(n,s) calloc(n,s) #define custom_realloc(p,s) realloc(p,s) @@ -250,10 +251,10 @@ int main(int argc, char** argv) { test_leak(); #endif -#ifndef NDEBUG - mi_collect(true); -#endif #ifndef USE_STD_MALLOC + #ifndef NDEBUG + mi_collect(true); + #endif mi_stats_print(NULL); #endif //bench_end_program();