Merge branch 'dev-exp-tls' into dev-exp

This commit is contained in:
daan 2020-02-09 18:34:23 -08:00
commit 609703a7f3
11 changed files with 338 additions and 184 deletions

View file

@ -5,11 +5,12 @@ set(CMAKE_C_STANDARD 11)
set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD 17)
option(MI_OVERRIDE "Override the standard malloc interface" ON) option(MI_OVERRIDE "Override the standard malloc interface" ON)
option(MI_INTERPOSE "Use interpose to override standard malloc on macOS" ON)
option(MI_DEBUG_FULL "Use full internal heap invariant checking in DEBUG mode" OFF) option(MI_DEBUG_FULL "Use full internal heap invariant checking in DEBUG mode" OFF)
option(MI_SECURE "Use full security mitigations (like guard pages, allocation randomization, double-free mitigation, and free-list corruption detection)" OFF) option(MI_SECURE "Use full security mitigations (like guard pages, allocation randomization, double-free mitigation, and free-list corruption detection)" OFF)
option(MI_USE_CXX "Use the C++ compiler to compile the library" OFF) option(MI_USE_CXX "Use the C++ compiler to compile the library" OFF)
option(MI_SEE_ASM "Generate assembly files" OFF) option(MI_SEE_ASM "Generate assembly files" OFF)
option(MI_INTERPOSE "Use interpose to override standard malloc on macOS" ON)
option(MI_OSX_ZONE "Use malloc zone to override standard malloc on macOS" OFF) # enables interpose as well
option(MI_LOCAL_DYNAMIC_TLS "Use slightly slower, dlopen-compatible TLS mechanism (Unix)" OFF) option(MI_LOCAL_DYNAMIC_TLS "Use slightly slower, dlopen-compatible TLS mechanism (Unix)" OFF)
option(MI_BUILD_TESTS "Build test executables" ON) option(MI_BUILD_TESTS "Build test executables" ON)
option(MI_CHECK_FULL "Use full internal invariant checking in DEBUG mode (deprecated, use MI_DEBUG_FULL instead)" OFF) option(MI_CHECK_FULL "Use full internal invariant checking in DEBUG mode (deprecated, use MI_DEBUG_FULL instead)" OFF)
@ -61,14 +62,19 @@ endif()
if(MI_OVERRIDE MATCHES "ON") if(MI_OVERRIDE MATCHES "ON")
message(STATUS "Override standard malloc (MI_OVERRIDE=ON)") message(STATUS "Override standard malloc (MI_OVERRIDE=ON)")
if(APPLE) if(APPLE)
if(MI_OSX_ZONE MATCHES "ON")
# use zone's on macOS
message(STATUS " Use malloc zone to override malloc (MI_OSX_ZONE=ON)")
list(APPEND mi_sources src/alloc-override-osx.c)
if(NOT MI_INTERPOSE MATCHES "ON")
message(STATUS " (enabling INTERPOSE as well since zone's require this)")
set(MI_INTERPOSE "ON")
endif()
endif()
if(MI_INTERPOSE MATCHES "ON") if(MI_INTERPOSE MATCHES "ON")
# use interpose on macOS # use interpose on macOS
message(STATUS " Use interpose to override malloc (MI_INTERPOSE=ON)") message(STATUS " Use interpose to override malloc (MI_INTERPOSE=ON)")
list(APPEND mi_defines MI_INTERPOSE) list(APPEND mi_defines MI_INTERPOSE)
else()
# use zone's on macOS
message(STATUS " Use zone's to override malloc (MI_INTERPOSE=OFF)")
list(APPEND mi_sources src/alloc-override-osx.c)
endif() endif()
endif() endif()
endif() endif()
@ -247,7 +253,7 @@ if (MI_BUILD_TESTS MATCHES "ON")
target_compile_definitions(mimalloc-test-stress PRIVATE ${mi_defines}) target_compile_definitions(mimalloc-test-stress PRIVATE ${mi_defines})
target_compile_options(mimalloc-test-stress PRIVATE ${mi_cflags}) target_compile_options(mimalloc-test-stress PRIVATE ${mi_cflags})
target_include_directories(mimalloc-test-stress PRIVATE include) target_include_directories(mimalloc-test-stress PRIVATE include)
target_link_libraries(mimalloc-test-stress PRIVATE mimalloc-static ${mi_libraries}) target_link_libraries(mimalloc-test-stress PRIVATE mimalloc ${mi_libraries})
enable_testing() enable_testing()
add_test(test_api, mimalloc-test-api) add_test(test_api, mimalloc-test-api)

View file

@ -10,10 +10,6 @@ terms of the MIT license. A copy of the license can be found in the file
#include "mimalloc-types.h" #include "mimalloc-types.h"
#if defined(MI_MALLOC_OVERRIDE) && (defined(__APPLE__) || defined(__OpenBSD__) || defined(__DragonFly__))
#define MI_TLS_RECURSE_GUARD
#endif
#if (MI_DEBUG>0) #if (MI_DEBUG>0)
#define mi_trace_message(...) _mi_trace_message(__VA_ARGS__) #define mi_trace_message(...) _mi_trace_message(__VA_ARGS__)
#else #else
@ -51,6 +47,7 @@ void _mi_random_init(mi_random_ctx_t* ctx);
void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* new_ctx); void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* new_ctx);
uintptr_t _mi_random_next(mi_random_ctx_t* ctx); uintptr_t _mi_random_next(mi_random_ctx_t* ctx);
uintptr_t _mi_heap_random_next(mi_heap_t* heap); uintptr_t _mi_heap_random_next(mi_heap_t* heap);
uintptr_t _os_random_weak(uintptr_t extra_seed);
static inline uintptr_t _mi_random_shuffle(uintptr_t x); static inline uintptr_t _mi_random_shuffle(uintptr_t x);
// init.c // init.c
@ -270,26 +267,76 @@ static inline bool mi_count_size_overflow(size_t count, size_t size, size_t* tot
} }
/* ----------------------------------------------------------- /* ----------------------------------------------------------------------------------------
The thread local default heap The thread local default heap: `_mi_get_default_heap` returns the thread local heap.
----------------------------------------------------------- */ On most platforms (Windows, Linux, FreeBSD, NetBSD, etc), this just returns a
__thread local variable (`_mi_heap_default`). With the initial-exec TLS model this ensures
that the storage will always be available (allocated on the thread stacks).
On some platforms though we cannot use that when overriding `malloc` since the underlying
TLS implementation (or the loader) will call itself `malloc` on a first access and recurse.
We try to circumvent this in an efficient way:
- macOSX : we use an unused TLS slot from the OS allocated slots (MI_TLS_SLOT). On OSX, the
loader itself calls `malloc` even before the modules are initialized.
- OpenBSD: we use an unused slot from the pthread block (MI_TLS_PTHREAD_SLOT_OFS).
- DragonFly: not yet working.
------------------------------------------------------------------------------------------- */
extern const mi_heap_t _mi_heap_empty; // read-only empty heap, initial value of the thread local default heap extern const mi_heap_t _mi_heap_empty; // read-only empty heap, initial value of the thread local default heap
extern mi_heap_t _mi_heap_main; // statically allocated main backing heap
extern bool _mi_process_is_initialized; extern bool _mi_process_is_initialized;
mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing heap
#if defined(MI_MALLOC_OVERRIDE)
#if defined(__MACH__) // OSX
#define MI_TLS_SLOT 89 // seems unused?
// other possible unused ones are 9, 29, __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY4 (94), __PTK_FRAMEWORK_GC_KEY9 (112) and __PTK_FRAMEWORK_OLDGC_KEY9 (89)
// see <https://github.com/rweichler/substrate/blob/master/include/pthread_machdep.h>
#elif defined(__OpenBSD__)
// use end bytes of a name; goes wrong if anyone uses names > 23 characters (ptrhread specifies 16)
// see <https://github.com/openbsd/src/blob/master/lib/libc/include/thread_private.h#L371>
#define MI_TLS_PTHREAD_SLOT_OFS (6*sizeof(int) + 4*sizeof(void*) + 24)
#elif defined(__DragonFly__)
#warning "mimalloc is not working correctly on DragonFly yet."
#define MI_TLS_PTHREAD_SLOT_OFS (4 + 1*sizeof(void*)) // offset `uniqueid` (also used by gdb?) <https://github.com/DragonFlyBSD/DragonFlyBSD/blob/master/lib/libthread_xu/thread/thr_private.h#L458>
#endif
#endif
#if defined(MI_TLS_SLOT)
static inline void* mi_tls_slot(size_t slot) mi_attr_noexcept; // forward declaration
#elif defined(MI_TLS_PTHREAD_SLOT_OFS)
#include <pthread.h>
static inline mi_heap_t** mi_tls_pthread_heap_slot(void) {
pthread_t self = pthread_self();
#if defined(__DragonFly__)
if (self==NULL) {
static mi_heap_t* pheap_main = _mi_heap_main_get();
return &pheap_main;
}
#endif
return (mi_heap_t**)((uint8_t*)self + MI_TLS_PTHREAD_SLOT_OFS);
}
#elif defined(MI_TLS_PTHREAD)
#include <pthread.h>
extern pthread_key_t _mi_heap_default_key;
#else
extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from
#endif
static inline mi_heap_t* mi_get_default_heap(void) { static inline mi_heap_t* mi_get_default_heap(void) {
#ifdef MI_TLS_RECURSE_GUARD #if defined(MI_TLS_SLOT)
// on some BSD platforms, like macOS, the dynamic loader calls `malloc` mi_heap_t* heap = (mi_heap_t*)mi_tls_slot(MI_TLS_SLOT);
// to initialize thread local data. To avoid recursion, we need to avoid return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap);
// accessing the thread local `_mi_default_heap` until our module is loaded #elif defined(MI_TLS_PTHREAD_SLOT_OFS)
// and use the statically allocated main heap until that time. mi_heap_t* heap = *mi_tls_pthread_heap_slot();
// TODO: patch ourselves dynamically to avoid this check every time? return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap);
if (!_mi_process_is_initialized) return &_mi_heap_main; #elif defined(MI_TLS_PTHREAD)
#endif mi_heap_t* heap = (mi_unlikely(_mi_heap_default_key == (pthread_key_t)(-1)) ? _mi_heap_main_get() : (mi_heap_t*)pthread_getspecific(_mi_heap_default_key));
return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap);
#else
#if defined(MI_TLS_RECURSE_GUARD)
if (mi_unlikely(!_mi_process_is_initialized)) return _mi_heap_main_get();
#endif
return _mi_heap_default; return _mi_heap_default;
#endif
} }
static inline bool mi_heap_is_default(const mi_heap_t* heap) { static inline bool mi_heap_is_default(const mi_heap_t* heap) {
@ -306,6 +353,8 @@ static inline bool mi_heap_is_initialized(mi_heap_t* heap) {
} }
static inline uintptr_t _mi_ptr_cookie(const void* p) { static inline uintptr_t _mi_ptr_cookie(const void* p) {
extern mi_heap_t _mi_heap_main;
mi_assert_internal(_mi_heap_main.cookie != 0);
return ((uintptr_t)p ^ _mi_heap_main.cookie); return ((uintptr_t)p ^ _mi_heap_main.cookie);
} }
@ -619,9 +668,8 @@ static inline size_t _mi_os_numa_node_count(void) {
// ------------------------------------------------------------------- // -------------------------------------------------------------------
// Getting the thread id should be performant // Getting the thread id should be performant as it is called in the
// as it is called in the fast path of `_mi_free`, // fast path of `_mi_free` and we specialize for various platforms.
// so we specialize for various platforms.
// ------------------------------------------------------------------- // -------------------------------------------------------------------
#if defined(_WIN32) #if defined(_WIN32)
#define WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN
@ -630,24 +678,55 @@ static inline uintptr_t _mi_thread_id(void) mi_attr_noexcept {
// Windows: works on Intel and ARM in both 32- and 64-bit // Windows: works on Intel and ARM in both 32- and 64-bit
return (uintptr_t)NtCurrentTeb(); return (uintptr_t)NtCurrentTeb();
} }
#elif (defined(__GNUC__) || defined(__clang__)) && \
#elif defined(__GNUC__) && \
(defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__)) (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))
// TLS register on x86 is in the FS or GS register
// see: https://akkadia.org/drepper/tls.pdf // TLS register on x86 is in the FS or GS register, see: https://akkadia.org/drepper/tls.pdf
static inline void* mi_tls_slot(size_t slot) mi_attr_noexcept {
void* res;
const size_t ofs = (slot*sizeof(void*));
#if defined(__i386__)
__asm__("movl %%gs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // 32-bit always uses GS
#elif defined(__MACH__) && defined(__x86_64__)
__asm__("movq %%gs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86_64 macOSX uses GS
#elif defined(__x86_64__)
__asm__("movq %%fs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86_64 Linux, BSD uses FS
#elif defined(__arm__)
void** tcb; UNUSED(ofs);
asm volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb));
res = tcb[slot];
#elif defined(__aarch64__)
void** tcb; UNUSED(ofs);
asm volatile ("mrs %0, tpidr_el0" : "=r" (tcb));
res = tcb[slot];
#endif
return res;
}
// setting is only used on macOSX for now
static inline void mi_tls_slot_set(size_t slot, void* value) mi_attr_noexcept {
const size_t ofs = (slot*sizeof(void*));
#if defined(__i386__)
__asm__("movl %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // 32-bit always uses GS
#elif defined(__MACH__) && defined(__x86_64__)
__asm__("movq %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x86_64 macOSX uses GS
#elif defined(__x86_64__)
__asm__("movq %1,%%fs:%1" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x86_64 Linux, BSD uses FS
#elif defined(__arm__)
void** tcb; UNUSED(ofs);
asm volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb));
tcb[slot] = value;
#elif defined(__aarch64__)
void** tcb; UNUSED(ofs);
asm volatile ("mrs %0, tpidr_el0" : "=r" (tcb));
tcb[slot] = value;
#endif
}
static inline uintptr_t _mi_thread_id(void) mi_attr_noexcept { static inline uintptr_t _mi_thread_id(void) mi_attr_noexcept {
uintptr_t tid; // in all our targets, slot 0 is the pointer to the thread control block
#if defined(__i386__) return (uintptr_t)mi_tls_slot(0);
__asm__("movl %%gs:0, %0" : "=r" (tid) : : ); // 32-bit always uses GS
#elif defined(__MACH__)
__asm__("movq %%gs:0, %0" : "=r" (tid) : : ); // x86_64 macOS uses GS
#elif defined(__x86_64__)
__asm__("movq %%fs:0, %0" : "=r" (tid) : : ); // x86_64 Linux, BSD uses FS
#elif defined(__arm__)
asm volatile ("mrc p15, 0, %0, c13, c0, 3" : "=r" (tid));
#elif defined(__aarch64__)
asm volatile ("mrs %0, tpidr_el0" : "=r" (tid));
#endif
return tid;
} }
#else #else
// otherwise use standard C // otherwise use standard C

View file

@ -32,8 +32,8 @@ terms of the MIT license. A copy of the license can be found in the file
void* operator new[](std::size_t n, const std::nothrow_t& tag) noexcept { (void)(tag); return mi_new_nothrow(n); } void* operator new[](std::size_t n, const std::nothrow_t& tag) noexcept { (void)(tag); return mi_new_nothrow(n); }
#if (__cplusplus >= 201402L || _MSC_VER >= 1916) #if (__cplusplus >= 201402L || _MSC_VER >= 1916)
void operator delete (void* p, std::size_t n) { mi_free_size(p,n); }; void operator delete (void* p, std::size_t n) noexcept { mi_free_size(p,n); };
void operator delete[](void* p, std::size_t n) { mi_free_size(p,n); }; void operator delete[](void* p, std::size_t n) noexcept { mi_free_size(p,n); };
#endif #endif
#if (__cplusplus > 201402L || defined(__cpp_aligned_new)) #if (__cplusplus > 201402L || defined(__cpp_aligned_new))

View file

@ -17,6 +17,12 @@ terms of the MIT license. A copy of the license can be found in the file
/* ------------------------------------------------------ /* ------------------------------------------------------
Override system malloc on macOS Override system malloc on macOS
This is done through the malloc zone interface. This is done through the malloc zone interface.
It seems we also need to interpose (see `alloc-override.c`)
or otherwise we get zone errors as there are usually
already allocations done by the time we take over the
zone. Unfortunately, that means we need to replace
the `free` with a checked free (`cfree`) impacting
performance.
------------------------------------------------------ */ ------------------------------------------------------ */
#include <AvailabilityMacros.h> #include <AvailabilityMacros.h>
@ -35,34 +41,42 @@ extern malloc_zone_t* malloc_default_purgeable_zone(void) __attribute__((weak_im
------------------------------------------------------ */ ------------------------------------------------------ */
static size_t zone_size(malloc_zone_t* zone, const void* p) { static size_t zone_size(malloc_zone_t* zone, const void* p) {
UNUSED(zone); UNUSED(p);
return 0; // as we cannot guarantee that `p` comes from us, just return 0 return 0; // as we cannot guarantee that `p` comes from us, just return 0
} }
static void* zone_malloc(malloc_zone_t* zone, size_t size) { static void* zone_malloc(malloc_zone_t* zone, size_t size) {
UNUSED(zone);
return mi_malloc(size); return mi_malloc(size);
} }
static void* zone_calloc(malloc_zone_t* zone, size_t count, size_t size) { static void* zone_calloc(malloc_zone_t* zone, size_t count, size_t size) {
UNUSED(zone);
return mi_calloc(count, size); return mi_calloc(count, size);
} }
static void* zone_valloc(malloc_zone_t* zone, size_t size) { static void* zone_valloc(malloc_zone_t* zone, size_t size) {
UNUSED(zone);
return mi_malloc_aligned(size, _mi_os_page_size()); return mi_malloc_aligned(size, _mi_os_page_size());
} }
static void zone_free(malloc_zone_t* zone, void* p) { static void zone_free(malloc_zone_t* zone, void* p) {
UNUSED(zone);
return mi_free(p); return mi_free(p);
} }
static void* zone_realloc(malloc_zone_t* zone, void* p, size_t newsize) { static void* zone_realloc(malloc_zone_t* zone, void* p, size_t newsize) {
UNUSED(zone);
return mi_realloc(p, newsize); return mi_realloc(p, newsize);
} }
static void* zone_memalign(malloc_zone_t* zone, size_t alignment, size_t size) { static void* zone_memalign(malloc_zone_t* zone, size_t alignment, size_t size) {
UNUSED(zone);
return mi_malloc_aligned(size,alignment); return mi_malloc_aligned(size,alignment);
} }
static void zone_destroy(malloc_zone_t* zone) { static void zone_destroy(malloc_zone_t* zone) {
UNUSED(zone);
// todo: ignore for now? // todo: ignore for now?
} }
@ -83,11 +97,13 @@ static void zone_batch_free(malloc_zone_t* zone, void** ps, unsigned count) {
} }
static size_t zone_pressure_relief(malloc_zone_t* zone, size_t size) { static size_t zone_pressure_relief(malloc_zone_t* zone, size_t size) {
UNUSED(zone); UNUSED(size);
mi_collect(false); mi_collect(false);
return 0; return 0;
} }
static void zone_free_definite_size(malloc_zone_t* zone, void* p, size_t size) { static void zone_free_definite_size(malloc_zone_t* zone, void* p, size_t size) {
UNUSED(size);
zone_free(zone,p); zone_free(zone,p);
} }
@ -102,34 +118,43 @@ static kern_return_t intro_enumerator(task_t task, void* p,
vm_range_recorder_t recorder) vm_range_recorder_t recorder)
{ {
// todo: enumerate all memory // todo: enumerate all memory
UNUSED(task); UNUSED(p); UNUSED(type_mask); UNUSED(zone_address);
UNUSED(reader); UNUSED(recorder);
return KERN_SUCCESS; return KERN_SUCCESS;
} }
static size_t intro_good_size(malloc_zone_t* zone, size_t size) { static size_t intro_good_size(malloc_zone_t* zone, size_t size) {
UNUSED(zone);
return mi_good_size(size); return mi_good_size(size);
} }
static boolean_t intro_check(malloc_zone_t* zone) { static boolean_t intro_check(malloc_zone_t* zone) {
UNUSED(zone);
return true; return true;
} }
static void intro_print(malloc_zone_t* zone, boolean_t verbose) { static void intro_print(malloc_zone_t* zone, boolean_t verbose) {
UNUSED(zone); UNUSED(verbose);
mi_stats_print(NULL); mi_stats_print(NULL);
} }
static void intro_log(malloc_zone_t* zone, void* p) { static void intro_log(malloc_zone_t* zone, void* p) {
UNUSED(zone); UNUSED(p);
// todo? // todo?
} }
static void intro_force_lock(malloc_zone_t* zone) { static void intro_force_lock(malloc_zone_t* zone) {
UNUSED(zone);
// todo? // todo?
} }
static void intro_force_unlock(malloc_zone_t* zone) { static void intro_force_unlock(malloc_zone_t* zone) {
UNUSED(zone);
// todo? // todo?
} }
static void intro_statistics(malloc_zone_t* zone, malloc_statistics_t* stats) { static void intro_statistics(malloc_zone_t* zone, malloc_statistics_t* stats) {
UNUSED(zone);
// todo... // todo...
stats->blocks_in_use = 0; stats->blocks_in_use = 0;
stats->size_in_use = 0; stats->size_in_use = 0;
@ -138,6 +163,7 @@ static void intro_statistics(malloc_zone_t* zone, malloc_statistics_t* stats) {
} }
static boolean_t intro_zone_locked(malloc_zone_t* zone) { static boolean_t intro_zone_locked(malloc_zone_t* zone) {
UNUSED(zone);
return false; return false;
} }
@ -161,7 +187,6 @@ static malloc_zone_t* mi_get_default_zone()
} }
} }
static void __attribute__((constructor)) _mi_macos_override_malloc() static void __attribute__((constructor)) _mi_macos_override_malloc()
{ {
static malloc_introspection_t intro; static malloc_introspection_t intro;
@ -201,6 +226,7 @@ static void __attribute__((constructor)) _mi_macos_override_malloc()
zone.free_definite_size = &zone_free_definite_size; zone.free_definite_size = &zone_free_definite_size;
zone.pressure_relief = &zone_pressure_relief; zone.pressure_relief = &zone_pressure_relief;
intro.zone_locked = &intro_zone_locked; intro.zone_locked = &intro_zone_locked;
intro.statistics = &intro_statistics;
// force the purgeable zone to exist to avoid strange bugs // force the purgeable zone to exist to avoid strange bugs
if (malloc_default_purgeable_zone) { if (malloc_default_purgeable_zone) {
@ -225,6 +251,7 @@ static void __attribute__((constructor)) _mi_macos_override_malloc()
malloc_zone_unregister(purgeable_zone); malloc_zone_unregister(purgeable_zone);
malloc_zone_register(purgeable_zone); malloc_zone_register(purgeable_zone);
} }
} }
#endif // MI_MALLOC_OVERRIDE #endif // MI_MALLOC_OVERRIDE

View file

@ -13,7 +13,7 @@ terms of the MIT license. A copy of the license can be found in the file
#error "It is only possible to override "malloc" on Windows when building as a DLL (and linking the C runtime as a DLL)" #error "It is only possible to override "malloc" on Windows when building as a DLL (and linking the C runtime as a DLL)"
#endif #endif
#if defined(MI_MALLOC_OVERRIDE) && !defined(_WIN32) #if defined(MI_MALLOC_OVERRIDE) && !(defined(_WIN32)) // || (defined(__MACH__) && !defined(MI_INTERPOSE)))
// ------------------------------------------------------ // ------------------------------------------------------
// Override system malloc // Override system malloc
@ -47,26 +47,31 @@ terms of the MIT license. A copy of the license can be found in the file
const void* replacement; const void* replacement;
const void* target; const void* target;
}; };
#define MI_INTERPOSEX(oldfun,newfun) { (const void*)&newfun, (const void*)&oldfun } #define MI_INTERPOSE_FUN(oldfun,newfun) { (const void*)&newfun, (const void*)&oldfun }
#define MI_INTERPOSE_MI(fun) MI_INTERPOSEX(fun,mi_##fun) #define MI_INTERPOSE_MI(fun) MI_INTERPOSE_FUN(fun,mi_##fun)
__attribute__((used)) static struct mi_interpose_s _mi_interposes[] __attribute__((section("__DATA, __interpose"))) = __attribute__((used)) static struct mi_interpose_s _mi_interposes[] __attribute__((section("__DATA, __interpose"))) =
{ {
MI_INTERPOSE_MI(malloc), MI_INTERPOSE_MI(malloc),
MI_INTERPOSE_MI(calloc), MI_INTERPOSE_MI(calloc),
MI_INTERPOSE_MI(realloc), MI_INTERPOSE_MI(realloc),
MI_INTERPOSE_MI(free),
MI_INTERPOSE_MI(strdup), MI_INTERPOSE_MI(strdup),
MI_INTERPOSE_MI(strndup) MI_INTERPOSE_MI(strndup),
MI_INTERPOSE_MI(realpath),
MI_INTERPOSE_MI(posix_memalign),
MI_INTERPOSE_MI(reallocf),
MI_INTERPOSE_MI(valloc),
// some code allocates from a zone but deallocates using plain free :-( (like NxHashResizeToCapacity <https://github.com/nneonneo/osx-10.9-opensource/blob/master/objc4-551.1/runtime/hashtable2.mm>)
MI_INTERPOSE_FUN(free,mi_cfree), // use safe free that checks if pointers are from us
}; };
#elif defined(_MSC_VER) #elif defined(_MSC_VER)
// cannot override malloc unless using a dll. // cannot override malloc unless using a dll.
// we just override new/delete which does work in a static library. // we just override new/delete which does work in a static library.
#else #else
// On all other systems forward to our API // On all other systems forward to our API
void* malloc(size_t size) mi_attr_noexcept MI_FORWARD1(mi_malloc, size); void* malloc(size_t size) MI_FORWARD1(mi_malloc, size);
void* calloc(size_t size, size_t n) mi_attr_noexcept MI_FORWARD2(mi_calloc, size, n); void* calloc(size_t size, size_t n) MI_FORWARD2(mi_calloc, size, n);
void* realloc(void* p, size_t newsize) mi_attr_noexcept MI_FORWARD2(mi_realloc, p, newsize); void* realloc(void* p, size_t newsize) MI_FORWARD2(mi_realloc, p, newsize);
void free(void* p) mi_attr_noexcept MI_FORWARD0(mi_free, p); void free(void* p) MI_FORWARD0(mi_free, p);
#endif #endif
#if (defined(__GNUC__) || defined(__clang__)) && !defined(__MACH__) #if (defined(__GNUC__) || defined(__clang__)) && !defined(__MACH__)
@ -94,8 +99,8 @@ terms of the MIT license. A copy of the license can be found in the file
void* operator new[](std::size_t n, const std::nothrow_t& tag) noexcept { UNUSED(tag); return mi_new_nothrow(n); } void* operator new[](std::size_t n, const std::nothrow_t& tag) noexcept { UNUSED(tag); return mi_new_nothrow(n); }
#if (__cplusplus >= 201402L || _MSC_VER >= 1916) #if (__cplusplus >= 201402L || _MSC_VER >= 1916)
void operator delete (void* p, std::size_t n) MI_FORWARD02(mi_free_size,p,n); void operator delete (void* p, std::size_t n) noexcept MI_FORWARD02(mi_free_size,p,n);
void operator delete[](void* p, std::size_t n) MI_FORWARD02(mi_free_size,p,n); void operator delete[](void* p, std::size_t n) noexcept MI_FORWARD02(mi_free_size,p,n);
#endif #endif
#if (__cplusplus > 201402L || defined(__cpp_aligned_new)) && (!defined(__GNUC__) || (__GNUC__ > 5)) #if (__cplusplus > 201402L || defined(__cpp_aligned_new)) && (!defined(__GNUC__) || (__GNUC__ > 5))
@ -194,4 +199,3 @@ int posix_memalign(void** p, size_t alignment, size_t size) { return mi_posix_me
#endif #endif
#endif // MI_MALLOC_OVERRIDE && !_WIN32 #endif // MI_MALLOC_OVERRIDE && !_WIN32

View file

@ -107,6 +107,8 @@ mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty;
#define tld_main_stats ((mi_stats_t*)((uint8_t*)&tld_main + offsetof(mi_tld_t,stats))) #define tld_main_stats ((mi_stats_t*)((uint8_t*)&tld_main + offsetof(mi_tld_t,stats)))
#define tld_main_os ((mi_os_tld_t*)((uint8_t*)&tld_main + offsetof(mi_tld_t,os))) #define tld_main_os ((mi_os_tld_t*)((uint8_t*)&tld_main + offsetof(mi_tld_t,os)))
extern mi_heap_t _mi_heap_main;
static mi_tld_t tld_main = { static mi_tld_t tld_main = {
0, false, 0, false,
&_mi_heap_main, &_mi_heap_main,
@ -118,21 +120,15 @@ static mi_tld_t tld_main = {
{ MI_STATS_NULL } // stats { MI_STATS_NULL } // stats
}; };
#if MI_INTPTR_SIZE==8
#define MI_INIT_COOKIE (0xCDCDCDCDCDCDCDCDUL)
#else
#define MI_INIT_COOKIE (0xCDCDCDCDUL)
#endif
mi_heap_t _mi_heap_main = { mi_heap_t _mi_heap_main = {
&tld_main, &tld_main,
MI_SMALL_PAGES_EMPTY, MI_SMALL_PAGES_EMPTY,
MI_PAGE_QUEUES_EMPTY, MI_PAGE_QUEUES_EMPTY,
ATOMIC_VAR_INIT(NULL), ATOMIC_VAR_INIT(NULL),
0, // thread id 0, // thread id
MI_INIT_COOKIE, // initial cookie 0, // initial cookie
{ MI_INIT_COOKIE, MI_INIT_COOKIE }, // the key of the main heap can be fixed (unlike page keys that need to be secure!) { 0, 0 }, // the key of the main heap can be fixed (unlike page keys that need to be secure!)
{ {0}, {0}, 0 }, // random { {0x846ca68b}, {0}, 0 }, // random
0, // page count 0, // page count
false // can reclaim false // can reclaim
}; };
@ -142,6 +138,22 @@ bool _mi_process_is_initialized = false; // set to `true` in `mi_process_init`.
mi_stats_t _mi_stats_main = { MI_STATS_NULL }; mi_stats_t _mi_stats_main = { MI_STATS_NULL };
static void mi_heap_main_init(void) {
if (_mi_heap_main.cookie == 0) {
_mi_heap_main.thread_id = _mi_thread_id();
_mi_heap_main.cookie = _os_random_weak((uintptr_t)&mi_heap_main_init);
_mi_random_init(&_mi_heap_main.random);
_mi_heap_main.keys[0] = _mi_heap_random_next(&_mi_heap_main);
_mi_heap_main.keys[1] = _mi_heap_random_next(&_mi_heap_main);
}
}
mi_heap_t* _mi_heap_main_get(void) {
mi_heap_main_init();
return &_mi_heap_main;
}
/* ----------------------------------------------------------- /* -----------------------------------------------------------
Initialization and freeing of the thread local heaps Initialization and freeing of the thread local heaps
----------------------------------------------------------- */ ----------------------------------------------------------- */
@ -154,11 +166,13 @@ typedef struct mi_thread_data_s {
// Initialize the thread local default heap, called from `mi_thread_init` // Initialize the thread local default heap, called from `mi_thread_init`
static bool _mi_heap_init(void) { static bool _mi_heap_init(void) {
if (mi_heap_is_initialized(_mi_heap_default)) return true; if (mi_heap_is_initialized(mi_get_default_heap())) return true;
if (_mi_is_main_thread()) { if (_mi_is_main_thread()) {
// mi_assert_internal(_mi_heap_main.thread_id != 0); // can happen on freeBSD where alloc is called before any initialization
// the main heap is statically allocated // the main heap is statically allocated
mi_heap_main_init();
_mi_heap_set_default_direct(&_mi_heap_main); _mi_heap_set_default_direct(&_mi_heap_main);
mi_assert_internal(_mi_heap_default->tld->heap_backing == mi_get_default_heap()); //mi_assert_internal(_mi_heap_default->tld->heap_backing == mi_get_default_heap());
} }
else { else {
// use `_mi_os_alloc` to allocate directly from the OS // use `_mi_os_alloc` to allocate directly from the OS
@ -253,14 +267,15 @@ static void _mi_thread_done(mi_heap_t* default_heap);
// use thread local storage keys to detect thread ending // use thread local storage keys to detect thread ending
#include <windows.h> #include <windows.h>
#include <fibersapi.h> #include <fibersapi.h>
static DWORD mi_fls_key; static DWORD mi_fls_key = (DWORD)(-1);
static void NTAPI mi_fls_done(PVOID value) { static void NTAPI mi_fls_done(PVOID value) {
if (value!=NULL) _mi_thread_done((mi_heap_t*)value); if (value!=NULL) _mi_thread_done((mi_heap_t*)value);
} }
#elif defined(MI_USE_PTHREADS) #elif defined(MI_USE_PTHREADS)
// use pthread locol storage keys to detect thread ending // use pthread local storage keys to detect thread ending
// (and used with MI_TLS_PTHREADS for the default heap)
#include <pthread.h> #include <pthread.h>
static pthread_key_t mi_pthread_key; pthread_key_t _mi_heap_default_key = (pthread_key_t)(-1);
static void mi_pthread_done(void* value) { static void mi_pthread_done(void* value) {
if (value!=NULL) _mi_thread_done((mi_heap_t*)value); if (value!=NULL) _mi_thread_done((mi_heap_t*)value);
} }
@ -280,8 +295,10 @@ static void mi_process_setup_auto_thread_done(void) {
#elif defined(_WIN32) && !defined(MI_SHARED_LIB) #elif defined(_WIN32) && !defined(MI_SHARED_LIB)
mi_fls_key = FlsAlloc(&mi_fls_done); mi_fls_key = FlsAlloc(&mi_fls_done);
#elif defined(MI_USE_PTHREADS) #elif defined(MI_USE_PTHREADS)
pthread_key_create(&mi_pthread_key, &mi_pthread_done); mi_assert_internal(_mi_heap_default_key == (pthread_key_t)(-1));
pthread_key_create(&_mi_heap_default_key, &mi_pthread_done);
#endif #endif
_mi_heap_set_default_direct(&_mi_heap_main);
} }
@ -323,21 +340,31 @@ static void _mi_thread_done(mi_heap_t* heap) {
void _mi_heap_set_default_direct(mi_heap_t* heap) { void _mi_heap_set_default_direct(mi_heap_t* heap) {
mi_assert_internal(heap != NULL); mi_assert_internal(heap != NULL);
#if defined(MI_TLS_SLOT)
mi_tls_slot_set(MI_TLS_SLOT,heap);
#elif defined(MI_TLS_PTHREAD_SLOT_OFS)
*mi_tls_pthread_heap_slot() = heap;
#elif defined(MI_TLS_PTHREAD)
// we use _mi_heap_default_key
#else
_mi_heap_default = heap; _mi_heap_default = heap;
#endif
// ensure the default heap is passed to `_mi_thread_done` // ensure the default heap is passed to `_mi_thread_done`
// setting to a non-NULL value also ensures `mi_thread_done` is called. // setting to a non-NULL value also ensures `mi_thread_done` is called.
#if defined(_WIN32) && defined(MI_SHARED_LIB) #if defined(_WIN32) && defined(MI_SHARED_LIB)
// nothing to do as it is done in DllMain // nothing to do as it is done in DllMain
#elif defined(_WIN32) && !defined(MI_SHARED_LIB) #elif defined(_WIN32) && !defined(MI_SHARED_LIB)
mi_assert_internal(mi_fls_key != 0);
FlsSetValue(mi_fls_key, heap); FlsSetValue(mi_fls_key, heap);
#elif defined(MI_USE_PTHREADS) #elif defined(MI_USE_PTHREADS)
pthread_setspecific(mi_pthread_key, heap); if (_mi_heap_default_key != (pthread_key_t)(-1)) { // can happen during recursive invocation on freeBSD
pthread_setspecific(_mi_heap_default_key, heap);
}
#endif #endif
} }
// -------------------------------------------------------- // --------------------------------------------------------
// Run functions on process init/done, and thread init/done // Run functions on process init/done, and thread init/done
// -------------------------------------------------------- // --------------------------------------------------------
@ -389,11 +416,16 @@ static void mi_allocator_done() {
// Called once by the process loader // Called once by the process loader
static void mi_process_load(void) { static void mi_process_load(void) {
mi_heap_main_init();
#if defined(MI_TLS_RECURSE_GUARD)
volatile mi_heap_t* dummy = _mi_heap_default; // access TLS to allocate it before setting tls_initialized to true;
UNUSED(dummy);
#endif
os_preloading = false; os_preloading = false;
atexit(&mi_process_done); atexit(&mi_process_done);
_mi_options_init(); _mi_options_init();
mi_process_init(); mi_process_init();
//mi_stats_reset(); //mi_stats_reset();-
if (mi_redirected) _mi_verbose_message("malloc is redirected.\n"); if (mi_redirected) _mi_verbose_message("malloc is redirected.\n");
// show message from the redirector (if present) // show message from the redirector (if present)
@ -408,22 +440,12 @@ static void mi_process_load(void) {
void mi_process_init(void) mi_attr_noexcept { void mi_process_init(void) mi_attr_noexcept {
// ensure we are called once // ensure we are called once
if (_mi_process_is_initialized) return; if (_mi_process_is_initialized) return;
// access _mi_heap_default before setting _mi_process_is_initialized to ensure
// that the TLS slot is allocated without getting into recursion on macOS
// when using dynamic linking with interpose.
mi_get_default_heap();
_mi_process_is_initialized = true; _mi_process_is_initialized = true;
_mi_heap_main.thread_id = _mi_thread_id();
_mi_verbose_message("process init: 0x%zx\n", _mi_heap_main.thread_id);
_mi_random_init(&_mi_heap_main.random);
#ifndef __APPLE__ // TODO: fix this? cannot update cookie if allocation already happened..
_mi_heap_main.cookie = _mi_heap_random_next(&_mi_heap_main);
_mi_heap_main.keys[0] = _mi_heap_random_next(&_mi_heap_main);
_mi_heap_main.keys[1] = _mi_heap_random_next(&_mi_heap_main);
#endif
mi_process_setup_auto_thread_done(); mi_process_setup_auto_thread_done();
_mi_verbose_message("process init: 0x%zx\n", _mi_thread_id());
_mi_os_init(); _mi_os_init();
mi_heap_main_init();
#if (MI_DEBUG) #if (MI_DEBUG)
_mi_verbose_message("debug level : %d\n", MI_DEBUG); _mi_verbose_message("debug level : %d\n", MI_DEBUG);
#endif #endif

View file

@ -70,7 +70,11 @@ static mi_option_desc_t options[_mi_option_last] =
{ 1, UNINIT, MI_OPTION(page_reset) }, // reset page memory on free { 1, UNINIT, MI_OPTION(page_reset) }, // reset page memory on free
{ 0, UNINIT, MI_OPTION(abandoned_page_reset) },// reset free page memory when a thread terminates { 0, UNINIT, MI_OPTION(abandoned_page_reset) },// reset free page memory when a thread terminates
{ 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit)
#if defined(__NetBSD__)
{ 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed
#else
{ 1, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 1, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed
#endif
{ 100, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds { 100, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds
{ 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes.
{ 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose
@ -239,16 +243,30 @@ static volatile _Atomic(uintptr_t) error_count; // = 0; // when MAX_ERROR_COUNT
// inside the C runtime causes another message. // inside the C runtime causes another message.
static mi_decl_thread bool recurse = false; static mi_decl_thread bool recurse = false;
static bool mi_recurse_enter(void) {
#ifdef MI_TLS_RECURSE_GUARD
if (_mi_preloading()) return true;
#endif
if (recurse) return false;
recurse = true;
return true;
}
static void mi_recurse_exit(void) {
#ifdef MI_TLS_RECURSE_GUARD
if (_mi_preloading()) return;
#endif
recurse = false;
}
void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* message) { void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* message) {
if (recurse) return; if (!mi_recurse_enter()) return;
if (out==NULL || (FILE*)out==stdout || (FILE*)out==stderr) { // TODO: use mi_out_stderr for stderr? if (out==NULL || (FILE*)out==stdout || (FILE*)out==stderr) { // TODO: use mi_out_stderr for stderr?
out = mi_out_get_default(&arg); out = mi_out_get_default(&arg);
} }
recurse = true;
if (prefix != NULL) out(prefix,arg); if (prefix != NULL) out(prefix,arg);
out(message,arg); out(message,arg);
recurse = false; mi_recurse_exit();
return;
} }
// Define our own limited `fprintf` that avoids memory allocation. // Define our own limited `fprintf` that avoids memory allocation.
@ -256,14 +274,12 @@ void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* me
static void mi_vfprintf( mi_output_fun* out, void* arg, const char* prefix, const char* fmt, va_list args ) { static void mi_vfprintf( mi_output_fun* out, void* arg, const char* prefix, const char* fmt, va_list args ) {
char buf[512]; char buf[512];
if (fmt==NULL) return; if (fmt==NULL) return;
if (recurse) return; if (!mi_recurse_enter()) return;
recurse = true;
vsnprintf(buf,sizeof(buf)-1,fmt,args); vsnprintf(buf,sizeof(buf)-1,fmt,args);
recurse = false; mi_recurse_exit();
_mi_fputs(out,arg,prefix,buf); _mi_fputs(out,arg,prefix,buf);
} }
void _mi_fprintf( mi_output_fun* out, void* arg, const char* fmt, ... ) { void _mi_fprintf( mi_output_fun* out, void* arg, const char* fmt, ... ) {
va_list args; va_list args;
va_start(args,fmt); va_start(args,fmt);

View file

@ -241,8 +241,8 @@ static bool os_random_buf(void* buf, size_t buf_len) {
#include <time.h> #include <time.h>
#endif #endif
static uintptr_t os_random_weak(uintptr_t extra_seed) { uintptr_t _os_random_weak(uintptr_t extra_seed) {
uintptr_t x = (uintptr_t)&os_random_weak ^ extra_seed; // ASLR makes the address random uintptr_t x = (uintptr_t)&_os_random_weak ^ extra_seed; // ASLR makes the address random
#if defined(_WIN32) #if defined(_WIN32)
LARGE_INTEGER pcount; LARGE_INTEGER pcount;
QueryPerformanceCounter(&pcount); QueryPerformanceCounter(&pcount);
@ -270,7 +270,7 @@ void _mi_random_init(mi_random_ctx_t* ctx) {
// if we fail to get random data from the OS, we fall back to a // if we fail to get random data from the OS, we fall back to a
// weak random source based on the current time // weak random source based on the current time
_mi_warning_message("unable to use secure randomness\n"); _mi_warning_message("unable to use secure randomness\n");
uintptr_t x = os_random_weak(0); uintptr_t x = _os_random_weak(0);
for (size_t i = 0; i < 8; i++) { // key is eight 32-bit words. for (size_t i = 0; i < 8; i++) { // key is eight 32-bit words.
x = _mi_random_shuffle(x); x = _mi_random_shuffle(x);
((uint32_t*)key)[i] = (uint32_t)x; ((uint32_t*)key)[i] = (uint32_t)x;

View file

@ -161,7 +161,7 @@ static bool mi_segment_is_valid(const mi_segment_t* segment, mi_segments_tld_t*
} }
} }
mi_assert_internal(nfree + segment->used == segment->capacity); mi_assert_internal(nfree + segment->used == segment->capacity);
mi_assert_internal(segment->thread_id == _mi_thread_id() || (segment->thread_id==0)); // or 0 // mi_assert_internal(segment->thread_id == _mi_thread_id() || (segment->thread_id==0)); // or 0
mi_assert_internal(segment->page_kind == MI_PAGE_HUGE || mi_assert_internal(segment->page_kind == MI_PAGE_HUGE ||
(mi_segment_page_size(segment) * segment->capacity == segment->segment_size)); (mi_segment_page_size(segment) * segment->capacity == segment->segment_size));
return true; return true;

View file

@ -188,7 +188,7 @@ static void test_stress(void) {
free_items(p); free_items(p);
} }
} }
mi_collect(false); // mi_collect(false);
#ifndef NDEBUG #ifndef NDEBUG
if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); } if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); }
#endif #endif
@ -242,14 +242,14 @@ int main(int argc, char** argv) {
// Run ITER full iterations where half the objects in the transfer buffer survive to the next round. // Run ITER full iterations where half the objects in the transfer buffer survive to the next round.
srand(0x7feb352d); srand(0x7feb352d);
mi_stats_reset(); // mi_stats_reset();
#ifdef STRESS #ifdef STRESS
test_stress(); test_stress();
#else #else
test_leak(); test_leak();
#endif #endif
mi_collect(true); // mi_collect(true);
mi_stats_print(NULL); mi_stats_print(NULL);
//bench_end_program(); //bench_end_program();
return 0; return 0;