merge from dev

This commit is contained in:
Daan 2021-11-02 22:42:25 -07:00
commit f3ffa663f1
8 changed files with 250 additions and 60 deletions

View file

@ -12,7 +12,7 @@ option(MI_XMALLOC "Enable abort() call on memory allocation failure by
option(MI_SHOW_ERRORS "Show error and warning messages by default (only enabled by default in DEBUG mode)" OFF)
option(MI_USE_CXX "Use the C++ compiler to compile the library (instead of the C compiler)" OFF)
option(MI_SEE_ASM "Generate assembly files" OFF)
option(MI_OSX_INTERPOSE "Use interpose to override standard malloc on macOS" OFF)
option(MI_OSX_INTERPOSE "Use interpose to override standard malloc on macOS" ON)
option(MI_OSX_ZONE "Use malloc zone to override standard malloc on macOS" ON)
option(MI_LOCAL_DYNAMIC_TLS "Use slightly slower, dlopen-compatible TLS mechanism (Unix)" OFF)
option(MI_BUILD_SHARED "Build shared library" ON)
@ -78,12 +78,17 @@ if(MI_OVERRIDE)
message(STATUS " Use malloc zone to override malloc (MI_OSX_ZONE=ON)")
list(APPEND mi_sources src/alloc-override-osx.c)
list(APPEND mi_defines MI_OSX_ZONE=1)
if (NOT MI_OSX_INTERPOSE)
message(STATUS " WARNING: zone overriding usually also needs interpose (use -DMI_OSX_INTERPOSE=ON)")
endif()
endif()
if(MI_OSX_INTERPOSE)
# use interpose on macOS
message(STATUS " Use interpose to override malloc (MI_OSX_INTERPOSE=ON)")
message(STATUS " WARNING: interpose does not seem to work reliably on the M1; use -DMI_OSX_ZONE=ON instead")
list(APPEND mi_defines MI_OSX_INTERPOSE)
if (NOT MI_OSX_ZONE)
message(STATUS " WARNING: interpose usually also needs zone overriding (use -DMI_OSX_INTERPOSE=ON)")
endif()
endif()
endif()
endif()
@ -188,6 +193,9 @@ if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU|Intel" AND NOT CMAKE_SYSTEM
else()
list(APPEND mi_cflags -ftls-model=initial-exec)
endif()
if(MI_OVERRIDE)
list(APPEND -fno-builtin-malloc)
endif()
endif()
if (MSVC AND MSVC_VERSION GREATER_EQUAL 1914)
@ -364,7 +372,7 @@ if (MI_BUILD_TESTS)
target_compile_definitions(mimalloc-test-api PRIVATE ${mi_defines})
target_compile_options(mimalloc-test-api PRIVATE ${mi_cflags})
target_include_directories(mimalloc-test-api PRIVATE include)
target_link_libraries(mimalloc-test-api PRIVATE mimalloc-static ${mi_libraries})
target_link_libraries(mimalloc-test-api PRIVATE mimalloc ${mi_libraries})
add_executable(mimalloc-test-stress test/test-stress.c)
target_compile_definitions(mimalloc-test-stress PRIVATE ${mi_defines})

View file

@ -36,6 +36,12 @@ terms of the MIT license. A copy of the license can be found in the file
#define __wasi__
#endif
#if defined(__cplusplus)
#define mi_decl_externc extern "C"
#else
#define mi_decl_externc
#endif
// "options.c"
void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* message);
void _mi_fprintf(mi_output_fun* out, void* arg, const char* fmt, ...);
@ -319,6 +325,7 @@ mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing hea
#if defined(MI_MALLOC_OVERRIDE)
#if defined(__APPLE__) // macOS
#define MI_TLS_SLOT 89 // seems unused?
// #define MI_TLS_RECURSE_GUARD 1
// other possible unused ones are 9, 29, __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY4 (94), __PTK_FRAMEWORK_GC_KEY9 (112) and __PTK_FRAMEWORK_OLDGC_KEY9 (89)
// see <https://github.com/rweichler/substrate/blob/master/include/pthread_machdep.h>
#elif defined(__OpenBSD__)
@ -356,10 +363,12 @@ extern pthread_key_t _mi_heap_default_key;
// However, on the Apple M1 we do use the address of this variable as the unique thread-id (issue #356).
extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from
static inline mi_heap_t* mi_get_default_heap(void) {
#if defined(MI_TLS_SLOT)
mi_heap_t* heap = (mi_heap_t*)mi_tls_slot(MI_TLS_SLOT);
return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap);
if (mi_unlikely(heap == NULL)) { heap = (mi_heap_t*)&_mi_heap_empty; } //_mi_heap_empty_get(); }
return heap;
#elif defined(MI_TLS_PTHREAD_SLOT_OFS)
mi_heap_t* heap = *mi_tls_pthread_heap_slot();
return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap);

View file

@ -349,6 +349,7 @@ mi_decl_export void mi_option_set_default(mi_option_t option, long value);
mi_decl_export void mi_cfree(void* p) mi_attr_noexcept;
mi_decl_export void* mi__expand(void* p, size_t newsize) mi_attr_noexcept;
mi_decl_nodiscard mi_decl_export size_t mi_malloc_size(const void* p) mi_attr_noexcept;
mi_decl_nodiscard mi_decl_export size_t mi_malloc_good_size(size_t size) mi_attr_noexcept;
mi_decl_nodiscard mi_decl_export size_t mi_malloc_usable_size(const void *p) mi_attr_noexcept;
mi_decl_export int mi_posix_memalign(void** p, size_t alignment, size_t size) mi_attr_noexcept;

View file

@ -17,17 +17,20 @@ terms of the MIT license. A copy of the license can be found in the file
/* ------------------------------------------------------
Override system malloc on macOS
This is done through the malloc zone interface.
It seems we also need to interpose (see `alloc-override.c`)
or otherwise we get zone errors as there are usually
already allocations done by the time we take over the
zone. Unfortunately, that means we need to replace
the `free` with a checked free (`cfree`) impacting
performance.
It seems to be most robust in combination with interposing
though or otherwise we may get zone errors as there are could
be allocations done by the time we take over the
zone.
------------------------------------------------------ */
#include <AvailabilityMacros.h>
#include <malloc/malloc.h>
#include <string.h> // memset
#include <stdlib.h>
#ifdef __cplusplus
extern "C" {
#endif
#if defined(MAC_OS_X_VERSION_10_6) && \
MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6
@ -41,9 +44,7 @@ extern malloc_zone_t* malloc_default_purgeable_zone(void) __attribute__((weak_im
static size_t zone_size(malloc_zone_t* zone, const void* p) {
UNUSED(zone);
if (!mi_is_in_heap_region(p))
return 0; // not our pointer, bail out
//if (!mi_is_in_heap_region(p)){ return 0; } // not our pointer, bail out
return mi_usable_size(p);
}
@ -109,6 +110,11 @@ static void zone_free_definite_size(malloc_zone_t* zone, void* p, size_t size) {
zone_free(zone,p);
}
static boolean_t zone_claimed_address(malloc_zone_t* zone, void* p) {
UNUSED(zone);
return mi_is_in_heap_region(p);
}
/* ------------------------------------------------------
Introspection members
@ -174,21 +180,6 @@ static boolean_t intro_zone_locked(malloc_zone_t* zone) {
At process start, override the default allocator
------------------------------------------------------ */
static malloc_zone_t* mi_get_default_zone()
{
// The first returned zone is the real default
malloc_zone_t** zones = NULL;
unsigned count = 0;
kern_return_t ret = malloc_get_all_zones(0, NULL, (vm_address_t**)&zones, &count);
if (ret == KERN_SUCCESS && count > 0) {
return zones[0];
}
else {
// fallback
return malloc_default_zone();
}
}
#if defined(__GNUC__) && !defined(__clang__)
#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
#endif
@ -220,33 +211,197 @@ static malloc_zone_t mi_malloc_zone = {
.batch_malloc = &zone_batch_malloc,
.batch_free = &zone_batch_free,
.introspect = &mi_introspect,
#if defined(MAC_OS_X_VERSION_10_6) && \
MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6
// switch to version 9 on OSX 10.6 to support memalign.
.version = 9,
#if defined(MAC_OS_X_VERSION_10_6) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6
// switch to version 9+ on OSX 10.6 to support memalign.
.memalign = &zone_memalign,
.free_definite_size = &zone_free_definite_size,
.pressure_relief = &zone_pressure_relief,
#if defined(MAC_OS_X_VERSION_10_7) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_7
.claimed_address = &zone_claimed_address,
.version = 10
#else
.version = 4,
.version = 9
#endif
#else
.version = 4
#endif
};
#ifdef __cplusplus
}
#endif
#if defined(MI_SHARED_LIB_EXPORT) && defined(MI_OSX_INTERPOSE)
static malloc_zone_t *mi_malloc_default_zone(void) {
#if defined(MI_OSX_INTERPOSE)
// ------------------------------------------------------
// Override malloc_xxx and zone_xxx api's to use only
// our mimalloc zone. Since even the loader uses malloc
// on macOS, this ensures that all allocations go through
// mimalloc (as all calls are interposed).
// ------------------------------------------------------
static inline malloc_zone_t* mi_get_default_zone(void)
{
static bool init;
if (mi_unlikely(!init)) {
init = true;
malloc_zone_register(&mi_malloc_zone); // by calling register we avoid a zone error on free (see <http://eatmyrandom.blogspot.com/2010/03/mallocfree-interception-on-mac-os-x.html>)
}
return &mi_malloc_zone;
}
// TODO: should use the macros in alloc-override but they aren't available here.
__attribute__((used)) static struct {
mi_decl_externc int malloc_jumpstart(uintptr_t cookie);
mi_decl_externc void _malloc_fork_prepare(void);
mi_decl_externc void _malloc_fork_parent(void);
mi_decl_externc void _malloc_fork_child(void);
static malloc_zone_t* mi_malloc_create_zone(vm_size_t size, unsigned flags) {
UNUSED(size); UNUSED(flags);
return mi_get_default_zone();
}
static malloc_zone_t* mi_malloc_default_zone (void) {
return mi_get_default_zone();
}
static malloc_zone_t* mi_malloc_default_purgeable_zone(void) {
return mi_get_default_zone();
}
static void mi_malloc_destroy_zone(malloc_zone_t* zone) {
UNUSED(zone);
// nothing.
}
static kern_return_t mi_malloc_get_all_zones (task_t task, memory_reader_t mr, vm_address_t** addresses, unsigned* count) {
UNUSED(task); UNUSED(mr);
if (addresses != NULL) *addresses = NULL;
if (count != NULL) *count = 0;
return KERN_SUCCESS;
}
static const char* mi_malloc_get_zone_name(malloc_zone_t* zone) {
return (zone == NULL ? mi_malloc_zone.zone_name : zone->zone_name);
}
static void mi_malloc_set_zone_name(malloc_zone_t* zone, const char* name) {
UNUSED(zone); UNUSED(name);
}
static int mi_malloc_jumpstart(uintptr_t cookie) {
UNUSED(cookie);
return 1; // or 0 for no error?
}
static void mi__malloc_fork_prepare(void) {
// nothing
}
static void mi__malloc_fork_parent(void) {
// nothing
}
static void mi__malloc_fork_child(void) {
// nothing
}
static void mi_malloc_printf(const char* fmt, ...) {
UNUSED(fmt);
}
static bool zone_check(malloc_zone_t* zone) {
UNUSED(zone);
return true;
}
static malloc_zone_t* zone_from_ptr(const void* p) {
UNUSED(p);
return mi_get_default_zone();
}
static void zone_log(malloc_zone_t* zone, void* p) {
UNUSED(zone); UNUSED(p);
}
static void zone_print(malloc_zone_t* zone, bool b) {
UNUSED(zone); UNUSED(b);
}
static void zone_print_ptr_info(void* p) {
UNUSED(p);
}
static void zone_register(malloc_zone_t* zone) {
UNUSED(zone);
}
static void zone_unregister(malloc_zone_t* zone) {
UNUSED(zone);
}
// use interposing so `DYLD_INSERT_LIBRARIES` works without `DYLD_FORCE_FLAT_NAMESPACE=1`
// See: <https://books.google.com/books?id=K8vUkpOXhN4C&pg=PA73>
struct mi_interpose_s {
const void* replacement;
const void* target;
} replace_malloc_default_zone[] __attribute__((section("__DATA, __interpose"))) = {
{ (const void*)mi_malloc_default_zone, (const void*)malloc_default_zone },
};
#endif
#define MI_INTERPOSE_FUN(oldfun,newfun) { (const void*)&newfun, (const void*)&oldfun }
#define MI_INTERPOSE_MI(fun) MI_INTERPOSE_FUN(fun,mi_##fun)
#define MI_INTERPOSE_ZONE(fun) MI_INTERPOSE_FUN(malloc_##fun,fun)
__attribute__((used)) static const struct mi_interpose_s _mi_zone_interposes[] __attribute__((section("__DATA, __interpose"))) =
{
MI_INTERPOSE_MI(malloc_create_zone),
MI_INTERPOSE_MI(malloc_default_purgeable_zone),
MI_INTERPOSE_MI(malloc_default_zone),
MI_INTERPOSE_MI(malloc_destroy_zone),
MI_INTERPOSE_MI(malloc_get_all_zones),
MI_INTERPOSE_MI(malloc_get_zone_name),
MI_INTERPOSE_MI(malloc_jumpstart),
MI_INTERPOSE_MI(malloc_printf),
MI_INTERPOSE_MI(malloc_set_zone_name),
MI_INTERPOSE_MI(_malloc_fork_child),
MI_INTERPOSE_MI(_malloc_fork_parent),
MI_INTERPOSE_MI(_malloc_fork_prepare),
MI_INTERPOSE_ZONE(zone_batch_free),
MI_INTERPOSE_ZONE(zone_batch_malloc),
MI_INTERPOSE_ZONE(zone_calloc),
MI_INTERPOSE_ZONE(zone_check),
MI_INTERPOSE_ZONE(zone_free),
MI_INTERPOSE_ZONE(zone_from_ptr),
MI_INTERPOSE_ZONE(zone_log),
MI_INTERPOSE_ZONE(zone_malloc),
MI_INTERPOSE_ZONE(zone_memalign),
MI_INTERPOSE_ZONE(zone_print),
MI_INTERPOSE_ZONE(zone_print_ptr_info),
MI_INTERPOSE_ZONE(zone_realloc),
MI_INTERPOSE_ZONE(zone_register),
MI_INTERPOSE_ZONE(zone_unregister),
MI_INTERPOSE_ZONE(zone_valloc)
};
#else
// ------------------------------------------------------
// hook into the zone api's without interposing
// ------------------------------------------------------
static inline malloc_zone_t* mi_get_default_zone(void)
{
// The first returned zone is the real default
malloc_zone_t** zones = NULL;
unsigned count = 0;
kern_return_t ret = malloc_get_all_zones(0, NULL, (vm_address_t**)&zones, &count);
if (ret == KERN_SUCCESS && count > 0) {
return zones[0];
}
else {
// fallback
return malloc_default_zone();
}
}
#if defined(__clang__)
__attribute__((constructor(0)))
@ -287,5 +442,6 @@ static void _mi_macos_override_malloc() {
}
}
#endif // MI_OSX_INTERPOSE
#endif // MI_MALLOC_OVERRIDE

View file

@ -13,7 +13,7 @@ terms of the MIT license. A copy of the license can be found in the file
#error "It is only possible to override "malloc" on Windows when building as a DLL (and linking the C runtime as a DLL)"
#endif
#if defined(MI_MALLOC_OVERRIDE) && !(defined(_WIN32)) // || (defined(__APPLE__) && !defined(MI_OSX_INTERPOSE)))
#if defined(MI_MALLOC_OVERRIDE) && !(defined(_WIN32))
// ------------------------------------------------------
// Override system malloc
@ -42,6 +42,11 @@ terms of the MIT license. A copy of the license can be found in the file
#endif
#if defined(__APPLE__) && defined(MI_SHARED_LIB_EXPORT) && defined(MI_OSX_INTERPOSE)
#include <malloc/malloc.h>
mi_decl_externc void vfree(void* p);
mi_decl_externc size_t malloc_size(const void* p);
mi_decl_externc size_t malloc_good_size(size_t size);
// use interposing so `DYLD_INSERT_LIBRARIES` works without `DYLD_FORCE_FLAT_NAMESPACE=1`
// See: <https://books.google.com/books?id=K8vUkpOXhN4C&pg=PA73>
struct mi_interpose_s {
@ -50,6 +55,7 @@ terms of the MIT license. A copy of the license can be found in the file
};
#define MI_INTERPOSE_FUN(oldfun,newfun) { (const void*)&newfun, (const void*)&oldfun }
#define MI_INTERPOSE_MI(fun) MI_INTERPOSE_FUN(fun,mi_##fun)
__attribute__((used)) static struct mi_interpose_s _mi_interposes[] __attribute__((section("__DATA, __interpose"))) =
{
MI_INTERPOSE_MI(malloc),
@ -61,15 +67,18 @@ terms of the MIT license. A copy of the license can be found in the file
MI_INTERPOSE_MI(posix_memalign),
MI_INTERPOSE_MI(reallocf),
MI_INTERPOSE_MI(valloc),
MI_INTERPOSE_MI(malloc_size),
MI_INTERPOSE_MI(malloc_good_size),
MI_INTERPOSE_MI(aligned_alloc),
#ifndef MI_OSX_ZONE
// some code allocates from default zone but deallocates using plain free :-( (like NxHashResizeToCapacity <https://github.com/nneonneo/osx-10.9-opensource/blob/master/objc4-551.1/runtime/hashtable2.mm>)
// sometimes code allocates from default zone but deallocates using plain free :-( (like NxHashResizeToCapacity <https://github.com/nneonneo/osx-10.9-opensource/blob/master/objc4-551.1/runtime/hashtable2.mm>)
MI_INTERPOSE_FUN(free,mi_cfree), // use safe free that checks if pointers are from us
MI_INTERPOSE_FUN(vfree,mi_cfree),
#else
// We interpose malloc_default_zone in alloc-override-osx.c
// we interpose malloc_default_zone in alloc-override-osx.c so we can use mi_free safely
MI_INTERPOSE_MI(free),
MI_INTERPOSE_FUN(vfree,mi_free),
#endif
// some code allocates from a zone but deallocates using plain free :-( (like NxHashResizeToCapacity <https://github.com/nneonneo/osx-10.9-opensource/blob/master/objc4-551.1/runtime/hashtable2.mm>)
MI_INTERPOSE_FUN(free,mi_cfree), // use safe free that checks if pointers are from us
};
#elif defined(_MSC_VER)
// cannot override malloc unless using a dll.
@ -123,7 +132,7 @@ terms of the MIT license. A copy of the license can be found in the file
void* operator new[](std::size_t n, std::align_val_t al, const std::nothrow_t&) noexcept { return mi_new_aligned_nothrow(n, static_cast<size_t>(al)); }
#endif
#elif (defined(__GNUC__) || defined(__clang__))
#elif (defined(__GNUC__) || defined(__clang__)) && !defined(MI_OSX_ZONE)
// ------------------------------------------------------
// Override by defining the mangled C++ names of the operators (as
// used by GCC and CLang).

View file

@ -33,13 +33,19 @@ terms of the MIT license. A copy of the license can be found in the file
size_t mi_malloc_size(const void* p) mi_attr_noexcept {
//if (!mi_is_in_heap_region(p)) return 0;
return mi_usable_size(p);
}
size_t mi_malloc_usable_size(const void *p) mi_attr_noexcept {
//if (!mi_is_in_heap_region(p)) return 0;
return mi_usable_size(p);
}
size_t mi_malloc_good_size(size_t size) mi_attr_noexcept {
return mi_good_size(size);
}
void mi_cfree(void* p) mi_attr_noexcept {
if (mi_is_in_heap_region(p)) {
mi_free(p);

View file

@ -485,7 +485,7 @@ void mi_free(void* p) mi_attr_noexcept
const uintptr_t tid = _mi_thread_id();
mi_page_t* const page = _mi_segment_page_of(segment, p);
if (mi_likely(tid == segment->thread_id && page->flags.full_aligned == 0)) { // the thread id matches and it is not a full page, nor has aligned blocks
if (mi_likely(tid == mi_atomic_load_relaxed(&segment->thread_id) && page->flags.full_aligned == 0)) { // the thread id matches and it is not a full page, nor has aligned blocks
// local, and not full or aligned
mi_block_t* block = (mi_block_t*)(p);
if (mi_unlikely(mi_check_is_double_free(page,block))) return;

View file

@ -506,10 +506,11 @@ static void mi_detect_cpu_features(void) {
void mi_process_init(void) mi_attr_noexcept {
// ensure we are called once
if (_mi_process_is_initialized) return;
_mi_verbose_message("process init: 0x%zx\n", _mi_thread_id());
_mi_process_is_initialized = true;
mi_process_setup_auto_thread_done();
_mi_verbose_message("process init: 0x%zx\n", _mi_thread_id());
mi_detect_cpu_features();
_mi_os_init();
mi_heap_main_init();