diff --git a/CMakeLists.txt b/CMakeLists.txt index e78ba9fd..4729e5b5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -31,8 +31,10 @@ option(MI_SKIP_COLLECT_ON_EXIT "Skip collecting memory on program exit" OFF) option(MI_NO_PADDING "Force no use of padding even in DEBUG mode etc." OFF) option(MI_INSTALL_TOPLEVEL "Install directly into $CMAKE_INSTALL_PREFIX instead of PREFIX/lib/mimalloc-version" OFF) option(MI_NO_THP "Disable transparent huge pages support on Linux/Android for the mimalloc process only" OFF) +option(MI_EXTRA_CPPDEFS "Extra pre-processor definitions (use as `-DMI_EXTRA_CPPDEFS=\"opt1=val1;opt2=val2\"`)" "") # deprecated options +option(MI_WIN_USE_FLS "Use Fiber local storage on Windows to detect thread termination" OFF) option(MI_CHECK_FULL "Use full internal invariant checking in DEBUG mode (deprecated, use MI_DEBUG_FULL instead)" OFF) option(MI_USE_LIBATOMIC "Explicitly link with -latomic (on older systems) (deprecated and detected automatically)" OFF) @@ -62,9 +64,14 @@ set(mi_sources set(mi_cflags "") set(mi_cflags_static "") # extra flags for a static library build set(mi_cflags_dynamic "") # extra flags for a shared-object library build -set(mi_defines "") set(mi_libraries "") +if(MI_EXTRA_CPPDEFS) + set(mi_defines ${MI_EXTRA_CPPDEFS}) +else() + set(mi_defines "") +endif() + # ----------------------------------------------------------------------------- # Convenience: set default build type depending on the build directory # ----------------------------------------------------------------------------- @@ -307,6 +314,22 @@ if(MI_LIBC_MUSL) list(APPEND mi_defines MI_LIBC_MUSL=1) endif() +if(MI_WIN_USE_FLS) + message(STATUS "Use the Fiber API to detect thread termination") + list(APPEND mi_defines MI_WIN_USE_FLS=1) +endif() + + + # Check /proc/cpuinfo for an SV39 MMU and define a constant if one is + # found. We will want to skip the aligned hinting in that case. Issue #939, #949 + if (EXISTS /proc/cpuinfo) + file(STRINGS /proc/cpuinfo mi_sv39_mmu REGEX "^mmu[ \t]+:[ \t]+sv39$") + if (mi_sv39_mmu) + MESSAGE( STATUS "Disable aligned hints (SV39 MMU detected)" ) + list(APPEND mi_defines MI_NO_ALIGNED_HINT=1) + endif() + endif() + # On Haiku use `-DCMAKE_INSTALL_PREFIX` instead, issue #788 # if(CMAKE_SYSTEM_NAME MATCHES "Haiku") # SET(CMAKE_INSTALL_LIBDIR ~/config/non-packaged/lib) diff --git a/bin/mimalloc-redirect.dll b/bin/mimalloc-redirect.dll index a3a3591f..ed001d64 100644 Binary files a/bin/mimalloc-redirect.dll and b/bin/mimalloc-redirect.dll differ diff --git a/bin/mimalloc-redirect.lib b/bin/mimalloc-redirect.lib index de128bb9..7d5f19ce 100644 Binary files a/bin/mimalloc-redirect.lib and b/bin/mimalloc-redirect.lib differ diff --git a/bin/mimalloc-redirect32.dll b/bin/mimalloc-redirect32.dll index 522723e5..ec4ff1d5 100644 Binary files a/bin/mimalloc-redirect32.dll and b/bin/mimalloc-redirect32.dll differ diff --git a/bin/mimalloc-redirect32.lib b/bin/mimalloc-redirect32.lib index 87f19b8e..a7100afc 100644 Binary files a/bin/mimalloc-redirect32.lib and b/bin/mimalloc-redirect32.lib differ diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index b4e74789..dcbaf15d 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -77,6 +77,11 @@ static inline uintptr_t _mi_random_shuffle(uintptr_t x); // init.c extern mi_decl_cache_align mi_stats_t _mi_stats_main; extern mi_decl_cache_align const mi_page_t _mi_page_empty; +void _mi_process_load(void); +void mi_cdecl _mi_process_done(void); +bool _mi_is_redirected(void); +bool _mi_allocator_init(const char** message); +void _mi_allocator_done(void); bool _mi_is_main_thread(void); size_t _mi_current_thread_count(void); bool _mi_preloading(void); // true while the C runtime is not initialized yet @@ -704,6 +709,16 @@ static inline mi_encoded_t mi_ptr_encode(const void* null, const void* p, const return mi_rotl(x ^ keys[1], keys[0]) + keys[0]; } +static inline uint32_t mi_ptr_encode_canary(const void* null, const void* p, const uintptr_t* keys) { + const uint32_t x = (uint32_t)(mi_ptr_encode(null,p,keys)); + // make the lowest byte 0 to prevent spurious read overflows which could be a security issue (issue #951) + #ifdef MI_BIG_ENDIAN + return (x & 0x00FFFFFF); + #else + return (x & 0xFFFFFF00); + #endif +} + static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* block, const uintptr_t* keys ) { mi_track_mem_defined(block,sizeof(mi_block_t)); mi_block_t* next; diff --git a/src/alloc-override.c b/src/alloc-override.c index 12837cdd..82a98b80 100644 --- a/src/alloc-override.c +++ b/src/alloc-override.c @@ -289,8 +289,8 @@ mi_decl_weak int reallocarr(void* p, size_t count, size_t size) { return mi_r void __libc_free(void* p) MI_FORWARD0(mi_free, p) void* __libc_memalign(size_t alignment, size_t size) { return mi_memalign(alignment, size); } -#elif defined(__GLIBC__) && defined(__linux__) - // forward __libc interface (needed for glibc-based Linux distributions) +#elif defined(__linux__) + // forward __libc interface (needed for glibc-based and musl-based Linux distributions) void* __libc_malloc(size_t size) MI_FORWARD1(mi_malloc,size) void* __libc_calloc(size_t count, size_t size) MI_FORWARD2(mi_calloc,count,size) void* __libc_realloc(void* p, size_t size) MI_FORWARD2(mi_realloc,p,size) diff --git a/src/alloc.c b/src/alloc.c index 99cb1066..70767e5b 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -99,7 +99,7 @@ extern inline void* _mi_page_malloc_zero(mi_heap_t* heap, mi_page_t* page, size_ mi_assert_internal(delta >= 0 && mi_page_usable_block_size(page) >= (size - MI_PADDING_SIZE + delta)); #endif mi_track_mem_defined(padding,sizeof(mi_padding_t)); // note: re-enable since mi_page_usable_block_size may set noaccess - padding->canary = (uint32_t)(mi_ptr_encode(page,block,page->keys)); + padding->canary = mi_ptr_encode_canary(page,block,page->keys); padding->delta = (uint32_t)(delta); #if MI_PADDING_CHECK if (!mi_page_is_huge(page)) { diff --git a/src/arena-abandon.c b/src/arena-abandon.c index eaa8c7c9..48e37794 100644 --- a/src/arena-abandon.c +++ b/src/arena-abandon.c @@ -148,7 +148,7 @@ static void mi_arena_segment_os_mark_abandoned(mi_segment_t* segment) { void _mi_arena_segment_mark_abandoned(mi_segment_t* segment) { mi_assert_internal(segment->used == segment->abandoned); - mi_atomic_store_release(&segment->thread_id, 0); // mark as abandoned for multi-thread free's + mi_atomic_store_release(&segment->thread_id, (uintptr_t)0); // mark as abandoned for multi-thread free's if mi_unlikely(segment->memid.memkind != MI_MEM_ARENA) { mi_arena_segment_os_mark_abandoned(segment); return; @@ -237,7 +237,7 @@ static mi_segment_t* mi_arena_segment_clear_abandoned_at(mi_arena_t* arena, mi_s static mi_segment_t* mi_arena_segment_clear_abandoned_next_field(mi_arena_field_cursor_t* previous) { const size_t max_arena = mi_arena_get_count(); size_t field_idx = mi_bitmap_index_field(previous->bitmap_idx); - size_t bit_idx = mi_bitmap_index_bit_in_field(previous->bitmap_idx) + 1; + size_t bit_idx = mi_bitmap_index_bit_in_field(previous->bitmap_idx); // visit arena's (from the previous cursor) for (; previous->start < previous->end; previous->start++, field_idx = 0, bit_idx = 0) { // index wraps around @@ -266,11 +266,12 @@ static mi_segment_t* mi_arena_segment_clear_abandoned_next_field(mi_arena_field_ // pre-check if the bit is set size_t mask = ((size_t)1 << bit_idx); if mi_unlikely((field & mask) == mask) { - previous->bitmap_idx = mi_bitmap_index_create(field_idx, bit_idx); - mi_segment_t* const segment = mi_arena_segment_clear_abandoned_at(arena, previous->subproc, previous->bitmap_idx); + mi_bitmap_index_t bitmap_idx = mi_bitmap_index_create(field_idx, bit_idx); + mi_segment_t* const segment = mi_arena_segment_clear_abandoned_at(arena, previous->subproc, bitmap_idx); if (segment != NULL) { //mi_assert_internal(arena->blocks_committed == NULL || _mi_bitmap_is_claimed(arena->blocks_committed, arena->field_count, 1, bitmap_idx)); if (has_lock) { mi_lock_release(&arena->abandoned_visit_lock); } + previous->bitmap_idx = mi_bitmap_index_create_ex(field_idx, bit_idx + 1); // start at next one for the next iteration return segment; } } diff --git a/src/bitmap.h b/src/bitmap.h index d8316b83..367da739 100644 --- a/src/bitmap.h +++ b/src/bitmap.h @@ -35,9 +35,13 @@ typedef mi_bitmap_field_t* mi_bitmap_t; typedef size_t mi_bitmap_index_t; // Create a bit index. +static inline mi_bitmap_index_t mi_bitmap_index_create_ex(size_t idx, size_t bitidx) { + mi_assert_internal(bitidx <= MI_BITMAP_FIELD_BITS); + return (idx*MI_BITMAP_FIELD_BITS) + bitidx; +} static inline mi_bitmap_index_t mi_bitmap_index_create(size_t idx, size_t bitidx) { mi_assert_internal(bitidx < MI_BITMAP_FIELD_BITS); - return (idx*MI_BITMAP_FIELD_BITS) + bitidx; + return mi_bitmap_index_create_ex(idx,bitidx); } // Create a bit index. diff --git a/src/free.c b/src/free.c index e2eec3ca..ad162915 100644 --- a/src/free.c +++ b/src/free.c @@ -244,11 +244,12 @@ static void mi_decl_noinline mi_free_block_delayed_mt( mi_page_t* page, mi_block static void mi_decl_noinline mi_free_block_mt(mi_page_t* page, mi_segment_t* segment, mi_block_t* block) { // first see if the segment was abandoned and if we can reclaim it into our thread - if (mi_option_is_enabled(mi_option_abandoned_reclaim_on_free) && + if (_mi_option_get_fast(mi_option_abandoned_reclaim_on_free) != 0 && #if MI_HUGE_PAGE_ABANDON segment->page_kind != MI_PAGE_HUGE && #endif - mi_atomic_load_relaxed(&segment->thread_id) == 0) + mi_atomic_load_relaxed(&segment->thread_id) == 0 && // segment is abandoned? + mi_prim_get_default_heap() != (mi_heap_t*)&_mi_heap_empty) // and we did not already exit this thread (without this check, a fresh heap will be initalized (issue #944)) { // the segment is abandoned, try to reclaim it into our heap if (_mi_segment_attempt_reclaim(mi_heap_get_default(), segment)) { @@ -421,7 +422,7 @@ static bool mi_page_decode_padding(const mi_page_t* page, const mi_block_t* bloc uintptr_t keys[2]; keys[0] = page->keys[0]; keys[1] = page->keys[1]; - bool ok = ((uint32_t)mi_ptr_encode(page,block,keys) == canary && *delta <= *bsize); + bool ok = (mi_ptr_encode_canary(page,block,keys) == canary && *delta <= *bsize); mi_track_mem_noaccess(padding,sizeof(mi_padding_t)); return ok; } diff --git a/src/heap.c b/src/heap.c index b46e7ec3..b3fda0f6 100644 --- a/src/heap.c +++ b/src/heap.c @@ -555,13 +555,14 @@ void _mi_heap_area_init(mi_heap_area_t* area, mi_page_t* page) { static void mi_get_fast_divisor(size_t divisor, uint64_t* magic, size_t* shift) { mi_assert_internal(divisor > 0 && divisor <= UINT32_MAX); - *shift = 64 - mi_clz(divisor - 1); + *shift = MI_INTPTR_BITS - mi_clz(divisor - 1); *magic = ((((uint64_t)1 << 32) * (((uint64_t)1 << *shift) - divisor)) / divisor + 1); } static size_t mi_fast_divide(size_t n, uint64_t magic, size_t shift) { mi_assert_internal(n <= UINT32_MAX); - return ((((uint64_t)n * magic) >> 32) + n) >> shift; + const uint64_t hi = ((uint64_t)n * magic) >> 32; + return (size_t)((hi + n) >> shift); } bool _mi_heap_area_visit_blocks(const mi_heap_area_t* area, mi_page_t* page, mi_block_visit_fun* visitor, void* arg) { diff --git a/src/init.c b/src/init.c index 0f3142b5..a13edba6 100644 --- a/src/init.c +++ b/src/init.c @@ -533,54 +533,15 @@ void _mi_heap_set_default_direct(mi_heap_t* heap) { // -------------------------------------------------------- // Run functions on process init/done, and thread init/done // -------------------------------------------------------- -static void mi_cdecl mi_process_done(void); - static bool os_preloading = true; // true until this module is initialized -static bool mi_redirected = false; // true if malloc redirects to mi_malloc // Returns true if this module has not been initialized; Don't use C runtime routines until it returns false. bool mi_decl_noinline _mi_preloading(void) { return os_preloading; } -mi_decl_nodiscard bool mi_is_redirected(void) mi_attr_noexcept { - return mi_redirected; -} - -// Communicate with the redirection module on Windows -#if defined(_WIN32) && defined(MI_SHARED_LIB) && !defined(MI_WIN_NOREDIRECT) -#ifdef __cplusplus -extern "C" { -#endif -mi_decl_export void _mi_redirect_entry(DWORD reason) { - // called on redirection; careful as this may be called before DllMain - if (reason == DLL_PROCESS_ATTACH) { - mi_redirected = true; - } - else if (reason == DLL_PROCESS_DETACH) { - mi_redirected = false; - } - else if (reason == DLL_THREAD_DETACH) { - mi_thread_done(); - } -} -__declspec(dllimport) bool mi_cdecl mi_allocator_init(const char** message); -__declspec(dllimport) void mi_cdecl mi_allocator_done(void); -#ifdef __cplusplus -} -#endif -#else -static bool mi_allocator_init(const char** message) { - if (message != NULL) *message = NULL; - return true; -} -static void mi_allocator_done(void) { - // nothing to do -} -#endif - -// Called once by the process loader -static void mi_process_load(void) { +// Called once by the process loader from `src/prim/prim.c` +void _mi_process_load(void) { mi_heap_main_init(); #if defined(__APPLE__) || defined(MI_TLS_RECURSE_GUARD) volatile mi_heap_t* dummy = _mi_heap_default; // access TLS to allocate it before setting tls_initialized to true; @@ -588,17 +549,14 @@ static void mi_process_load(void) { #endif os_preloading = false; mi_assert_internal(_mi_is_main_thread()); - #if !(defined(_WIN32) && defined(MI_SHARED_LIB)) // use Dll process detach (see below) instead of atexit (issue #521) - atexit(&mi_process_done); - #endif _mi_options_init(); mi_process_setup_auto_thread_done(); mi_process_init(); - if (mi_redirected) _mi_verbose_message("malloc is redirected.\n"); + if (_mi_is_redirected()) _mi_verbose_message("malloc is redirected.\n"); // show message from the redirector (if present) const char* msg = NULL; - mi_allocator_init(&msg); + _mi_allocator_init(&msg); if (msg != NULL && (mi_option_is_enabled(mi_option_verbose) || mi_option_is_enabled(mi_option_show_errors))) { _mi_fputs(NULL,NULL,NULL,msg); } @@ -676,7 +634,7 @@ void mi_process_init(void) mi_attr_noexcept { } // Called when the process is done (through `at_exit`) -static void mi_cdecl mi_process_done(void) { +void mi_cdecl _mi_process_done(void) { // only shutdown if we were initialized if (!_mi_process_is_initialized) return; // ensure we are called once @@ -708,64 +666,8 @@ static void mi_cdecl mi_process_done(void) { if (mi_option_is_enabled(mi_option_show_stats) || mi_option_is_enabled(mi_option_verbose)) { mi_stats_print(NULL); } - mi_allocator_done(); + _mi_allocator_done(); _mi_verbose_message("process done: 0x%zx\n", _mi_heap_main.thread_id); os_preloading = true; // don't call the C runtime anymore } - - -#if defined(_WIN32) && defined(MI_SHARED_LIB) - // Windows DLL: easy to hook into process_init and thread_done - __declspec(dllexport) BOOL WINAPI DllMain(HINSTANCE inst, DWORD reason, LPVOID reserved) { - MI_UNUSED(reserved); - MI_UNUSED(inst); - if (reason==DLL_PROCESS_ATTACH) { - mi_process_load(); - } - else if (reason==DLL_PROCESS_DETACH) { - mi_process_done(); - } - else if (reason==DLL_THREAD_DETACH) { - if (!mi_is_redirected()) { - mi_thread_done(); - } - } - return TRUE; - } - -#elif defined(_MSC_VER) - // MSVC: use data section magic for static libraries - // See - static int _mi_process_init(void) { - mi_process_load(); - return 0; - } - typedef int(*_mi_crt_callback_t)(void); - #if defined(_M_X64) || defined(_M_ARM64) - __pragma(comment(linker, "/include:" "_mi_msvc_initu")) - #pragma section(".CRT$XIU", long, read) - #else - __pragma(comment(linker, "/include:" "__mi_msvc_initu")) - #endif - #pragma data_seg(".CRT$XIU") - mi_decl_externc _mi_crt_callback_t _mi_msvc_initu[] = { &_mi_process_init }; - #pragma data_seg() - -#elif defined(__cplusplus) - // C++: use static initialization to detect process start - static bool _mi_process_init(void) { - mi_process_load(); - return (_mi_heap_main.thread_id != 0); - } - static bool mi_initialized = _mi_process_init(); - -#elif defined(__GNUC__) || defined(__clang__) - // GCC,Clang: use the constructor attribute - static void __attribute__((constructor)) _mi_process_init(void) { - mi_process_load(); - } - -#else -#pragma message("define a way to call mi_process_load on your platform") -#endif diff --git a/src/libc.c b/src/libc.c index dd6b4007..ce541f1b 100644 --- a/src/libc.c +++ b/src/libc.c @@ -130,7 +130,7 @@ static void mi_out_alignright(char fill, char* start, size_t len, size_t extra, } -static void mi_out_num(uintptr_t x, size_t base, char prefix, char** out, char* end) +static void mi_out_num(uintmax_t x, size_t base, char prefix, char** out, char* end) { if (x == 0 || base == 0 || base > 16) { if (prefix != 0) { mi_outc(prefix, out, end); } @@ -206,12 +206,13 @@ void _mi_vsnprintf(char* buf, size_t bufsize, const char* fmt, va_list args) { } else if (c == 'p' || c == 'x' || c == 'u') { // unsigned - uintptr_t x = 0; + uintmax_t x = 0; if (c == 'x' || c == 'u') { if (numtype == 'z') x = va_arg(args, size_t); else if (numtype == 't') x = va_arg(args, uintptr_t); // unsigned ptrdiff_t - else if (numtype == 'L') x = (uintptr_t)va_arg(args, unsigned long long); - else x = va_arg(args, unsigned long); + else if (numtype == 'L') x = va_arg(args, unsigned long long); + else if (numtype == 'l') x = va_arg(args, unsigned long); + else x = va_arg(args, unsigned int); } else if (c == 'p') { x = va_arg(args, uintptr_t); @@ -228,20 +229,21 @@ void _mi_vsnprintf(char* buf, size_t bufsize, const char* fmt, va_list args) { } else if (c == 'i' || c == 'd') { // signed - intptr_t x = 0; + intmax_t x = 0; if (numtype == 'z') x = va_arg(args, intptr_t ); else if (numtype == 't') x = va_arg(args, ptrdiff_t); - else if (numtype == 'L') x = (intptr_t)va_arg(args, long long); - else x = va_arg(args, long); + else if (numtype == 'L') x = va_arg(args, long long); + else if (numtype == 'l') x = va_arg(args, long); + else x = va_arg(args, int); char pre = 0; if (x < 0) { pre = '-'; - if (x > INTPTR_MIN) { x = -x; } + if (x > INTMAX_MIN) { x = -x; } } else if (numplus != 0) { pre = numplus; } - mi_out_num((uintptr_t)x, 10, pre, &out, end); + mi_out_num((uintmax_t)x, 10, pre, &out, end); } else if (c >= ' ' && c <= '~') { // unknown format diff --git a/src/options.c b/src/options.c index c97b9abe..c55e63b1 100644 --- a/src/options.c +++ b/src/options.c @@ -47,6 +47,47 @@ typedef struct mi_option_desc_s { #define MI_OPTION(opt) mi_option_##opt, #opt, NULL #define MI_OPTION_LEGACY(opt,legacy) mi_option_##opt, #opt, #legacy +// Some options can be set at build time for statically linked libraries (use `-DMI_EXTRA_CPPDEFS="opt1=val1;opt2=val2"`) +// This is useful if we cannot pass them as environment variables +// (and setting them programmatically would be too late) + +#ifndef MI_DEFAULT_VERBOSE +#define MI_DEFAULT_VERBOSE 0 +#endif + +#ifndef MI_DEFAULT_EAGER_COMMIT +#define MI_DEFAULT_EAGER_COMMIT 1 +#endif + +#ifndef MI_DEFAULT_ARENA_EAGER_COMMIT +#define MI_DEFAULT_ARENA_EAGER_COMMIT 2 +#endif + +#ifndef MI_DEFAULT_ARENA_RESERVE + #if (MI_INTPTR_SIZE>4) + #define MI_DEFAULT_ARENA_RESERVE 1024L*1024L + #else + #define MI_DEFAULT_ARENA_RESERVE 128L*1024L + #endif +#endif + +#ifndef MI_DEFAULT_DISALLOW_ARENA_ALLOC +#define MI_DEFAULT_DISALLOW_ARENA_ALLOC 0 +#endif + +#ifndef MI_DEFAULT_ALLOW_LARGE_OS_PAGES +#define MI_DEFAULT_ALLOW_LARGE_OS_PAGES 0 +#endif + +#ifndef MI_DEFAULT_RESERVE_HUGE_OS_PAGES +#define MI_DEFAULT_RESERVE_HUGE_OS_PAGES 0 +#endif + +#ifndef MI_DEFAULT_RESERVE_OS_MEMORY +#define MI_DEFAULT_RESERVE_OS_MEMORY 0 +#endif + + static mi_option_desc_t options[_mi_option_last] = { // stable options @@ -56,16 +97,16 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(show_errors) }, #endif { 0, UNINIT, MI_OPTION(show_stats) }, - { 0, UNINIT, MI_OPTION(verbose) }, + { MI_DEFAULT_VERBOSE, UNINIT, MI_OPTION(verbose) }, // the following options are experimental and not all combinations make sense. - { 1, UNINIT, MI_OPTION(eager_commit) }, // commit per segment directly (4MiB) (but see also `eager_commit_delay`) - { 2, UNINIT, MI_OPTION_LEGACY(arena_eager_commit,eager_region_commit) }, // eager commit arena's? 2 is used to enable this only on an OS that has overcommit (i.e. linux) + { MI_DEFAULT_EAGER_COMMIT, UNINIT, MI_OPTION(eager_commit) }, // commit per segment directly (4MiB) (but see also `eager_commit_delay`) + { MI_DEFAULT_ARENA_EAGER_COMMIT, UNINIT, MI_OPTION_LEGACY(arena_eager_commit,eager_region_commit) }, // eager commit arena's? 2 is used to enable this only on an OS that has overcommit (i.e. linux) { 1, UNINIT, MI_OPTION_LEGACY(purge_decommits,reset_decommits) }, // purge decommits memory (instead of reset) (note: on linux this uses MADV_DONTNEED for decommit) - { 0, UNINIT, MI_OPTION_LEGACY(allow_large_os_pages,large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's - { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, // per 1GiB huge pages + { MI_DEFAULT_ALLOW_LARGE_OS_PAGES, UNINIT, MI_OPTION_LEGACY(allow_large_os_pages,large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's + { MI_DEFAULT_RESERVE_HUGE_OS_PAGES, UNINIT, MI_OPTION(reserve_huge_os_pages) }, // per 1GiB huge pages {-1, UNINIT, MI_OPTION(reserve_huge_os_pages_at) }, // reserve huge pages at node N - { 0, UNINIT, MI_OPTION(reserve_os_memory) }, // reserve N KiB OS memory in advance (use `option_get_size`) + { MI_DEFAULT_RESERVE_OS_MEMORY, UNINIT, MI_OPTION(reserve_os_memory) }, // reserve N KiB OS memory in advance (use `option_get_size`) { 0, UNINIT, MI_OPTION(deprecated_segment_cache) }, // cache N segments per thread { 0, UNINIT, MI_OPTION(deprecated_page_reset) }, // reset page memory on free { 0, UNINIT, MI_OPTION_LEGACY(abandoned_page_purge,abandoned_page_reset) }, // reset free page memory when a thread terminates @@ -83,15 +124,11 @@ static mi_option_desc_t options[_mi_option_last] = { 32, UNINIT, MI_OPTION(max_warnings) }, // maximum warnings that are output { 10, UNINIT, MI_OPTION(max_segment_reclaim)}, // max. percentage of the abandoned segments to be reclaimed per try. { 0, UNINIT, MI_OPTION(destroy_on_exit)}, // release all OS memory on process exit; careful with dangling pointer or after-exit frees! - #if (MI_INTPTR_SIZE>4) - { 1024L*1024L, UNINIT, MI_OPTION(arena_reserve) }, // reserve memory N KiB at a time (=1GiB) (use `option_get_size`) - #else - { 128L*1024L, UNINIT, MI_OPTION(arena_reserve) }, // =128MiB on 32-bit - #endif - { 10, UNINIT, MI_OPTION(arena_purge_mult) }, // purge delay multiplier for arena's + { MI_DEFAULT_ARENA_RESERVE, UNINIT, MI_OPTION(arena_reserve) }, // reserve memory N KiB at a time (=1GiB) (use `option_get_size`) + { 10, UNINIT, MI_OPTION(arena_purge_mult) }, // purge delay multiplier for arena's { 1, UNINIT, MI_OPTION_LEGACY(purge_extend_delay, decommit_extend_delay) }, { 1, UNINIT, MI_OPTION(abandoned_reclaim_on_free) },// reclaim an abandoned segment on a free - { 0, UNINIT, MI_OPTION(disallow_arena_alloc) }, // 1 = do not use arena's for allocation (except if using specific arena id's) + { MI_DEFAULT_DISALLOW_ARENA_ALLOC, UNINIT, MI_OPTION(disallow_arena_alloc) }, // 1 = do not use arena's for allocation (except if using specific arena id's) { 400, UNINIT, MI_OPTION(retry_on_oom) }, // windows only: retry on out-of-memory for N milli seconds (=400), set to 0 to disable retries. #if defined(MI_VISIT_ABANDONED) { 1, INITIALIZED, MI_OPTION(visit_abandoned) }, // allow visiting heap blocks in abandonded segments; requires taking locks during reclaim. diff --git a/src/os.c b/src/os.c index 931ebc2b..b794b4da 100644 --- a/src/os.c +++ b/src/os.c @@ -77,8 +77,9 @@ bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats -------------------------------------------------------------- */ // On 64-bit systems, we can do efficient aligned allocation by using -// the 2TiB to 30TiB area to allocate those. -#if (MI_INTPTR_SIZE >= 8) +// the 2TiB to 30TiB area to allocate those. We assume we have +// at least 48 bits of virtual address space on 64-bit systems (but see issue #939) +#if (MI_INTPTR_SIZE >= 8) && !defined(MI_NO_ALIGNED_HINT) static mi_decl_cache_align _Atomic(uintptr_t)aligned_base; // Return a MI_SEGMENT_SIZE aligned address that is probably available. @@ -224,7 +225,7 @@ static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit if (!(alignment >= _mi_os_page_size() && ((alignment & (alignment - 1)) == 0))) return NULL; size = _mi_align_up(size, _mi_os_page_size()); - // try first with a hint (this will be aligned directly on Win 10+ or BSD) + // try first with a requested alignment hint (this will usually be aligned directly on Win 10+ or BSD) void* p = mi_os_prim_alloc(size, alignment, commit, allow_large, is_large, is_zero, stats); if (p == NULL) return NULL; diff --git a/src/prim/osx/alloc-override-zone.c b/src/prim/osx/alloc-override-zone.c index 1515b886..d3af170d 100644 --- a/src/prim/osx/alloc-override-zone.c +++ b/src/prim/osx/alloc-override-zone.c @@ -418,9 +418,9 @@ static inline malloc_zone_t* mi_get_default_zone(void) } #if defined(__clang__) -__attribute__((constructor(0))) +__attribute__((constructor(101))) // highest priority #else -__attribute__((constructor)) // seems not supported by g++-11 on the M1 +__attribute__((constructor)) // priority level is not supported by gcc #endif __attribute__((used)) static void _mi_macos_override_malloc(void) { diff --git a/src/prim/prim.c b/src/prim/prim.c index 3b7d3736..2002853f 100644 --- a/src/prim/prim.c +++ b/src/prim/prim.c @@ -25,3 +25,52 @@ terms of the MIT license. A copy of the license can be found in the file #include "unix/prim.c" // mmap() (Linux, macOSX, BSD, Illumnos, Haiku, DragonFly, etc.) #endif + +// Generic process initialization +#ifndef MI_PRIM_HAS_PROCESS_ATTACH +#if defined(__GNUC__) || defined(__clang__) + // gcc,clang: use the constructor/destructor attribute + // which for both seem to run before regular constructors/destructors + #if defined(__clang__) + #define mi_attr_constructor __attribute__((constructor(101))) + #define mi_attr_destructor __attribute__((destructor(101))) + #else + #define mi_attr_constructor __attribute__((constructor)) + #define mi_attr_destructor __attribute__((destructor)) + #endif + static void mi_attr_constructor mi_process_attach(void) { + _mi_process_load(); + } + static void mi_attr_destructor mi_process_detach(void) { + _mi_process_done(); + } +#elif defined(__cplusplus) + // C++: use static initialization to detect process start/end + // This is not guaranteed to be first/last but the best we can generally do? + struct mi_init_done_t { + mi_init_done_t() { + _mi_process_load(); + } + ~mi_init_done_t() { + _mi_process_done(); + } + }; + static mi_init_done_t mi_init_done; + #else + #pragma message("define a way to call _mi_process_load/done on your platform") +#endif +#endif + +// Generic allocator init/done callback +#ifndef MI_PRIM_HAS_ALLOCATOR_INIT +bool _mi_is_redirected(void) { + return false; +} +bool _mi_allocator_init(const char** message) { + if (message != NULL) { *message = NULL; } + return true; +} +void _mi_allocator_done(void) { + // nothing to do +} +#endif diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c index bd874f9b..385354fc 100644 --- a/src/prim/windows/prim.c +++ b/src/prim/windows/prim.c @@ -499,8 +499,7 @@ void _mi_prim_process_info(mi_process_info_t* pinfo) } // get process info - PROCESS_MEMORY_COUNTERS info; - memset(&info, 0, sizeof(info)); + PROCESS_MEMORY_COUNTERS info; _mi_memzero_var(info); if (pGetProcessMemoryInfo != NULL) { pGetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info)); } @@ -602,60 +601,195 @@ bool _mi_prim_random_buf(void* buf, size_t buf_len) { #endif // MI_USE_RTLGENRANDOM + + //---------------------------------------------------------------- -// Thread init/done +// Process & Thread Init/Done //---------------------------------------------------------------- -#if !defined(MI_SHARED_LIB) - -// use thread local storage keys to detect thread ending -// note: another design could be to use special linker sections (see issue #869) -#include -#if (_WIN32_WINNT < 0x600) // before Windows Vista -WINBASEAPI DWORD WINAPI FlsAlloc( _In_opt_ PFLS_CALLBACK_FUNCTION lpCallback ); -WINBASEAPI PVOID WINAPI FlsGetValue( _In_ DWORD dwFlsIndex ); -WINBASEAPI BOOL WINAPI FlsSetValue( _In_ DWORD dwFlsIndex, _In_opt_ PVOID lpFlsData ); -WINBASEAPI BOOL WINAPI FlsFree(_In_ DWORD dwFlsIndex); -#endif - -static DWORD mi_fls_key = (DWORD)(-1); - -static void NTAPI mi_fls_done(PVOID value) { - mi_heap_t* heap = (mi_heap_t*)value; - if (heap != NULL) { - _mi_thread_done(heap); - FlsSetValue(mi_fls_key, NULL); // prevent recursion as _mi_thread_done may set it back to the main heap, issue #672 +static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) { + MI_UNUSED(reserved); + MI_UNUSED(module); + if (reason==DLL_PROCESS_ATTACH) { + _mi_process_load(); } + else if (reason==DLL_PROCESS_DETACH) { + _mi_process_done(); + } + else if (reason==DLL_THREAD_DETACH && !_mi_is_redirected()) { + _mi_thread_done(NULL); + } } -void _mi_prim_thread_init_auto_done(void) { - mi_fls_key = FlsAlloc(&mi_fls_done); -} -void _mi_prim_thread_done_auto_done(void) { - // call thread-done on all threads (except the main thread) to prevent - // dangling callback pointer if statically linked with a DLL; Issue #208 - FlsFree(mi_fls_key); -} +#if defined(MI_SHARED_LIB) + #define MI_PRIM_HAS_PROCESS_ATTACH 1 -void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) { - mi_assert_internal(mi_fls_key != (DWORD)(-1)); - FlsSetValue(mi_fls_key, heap); -} + // Windows DLL: easy to hook into process_init and thread_done + __declspec(dllexport) BOOL WINAPI DllMain(HINSTANCE inst, DWORD reason, LPVOID reserved) { + mi_win_main((PVOID)inst,reason,reserved); + return TRUE; + } -#else + // nothing to do since `_mi_thread_done` is handled through the DLL_THREAD_DETACH event. + void _mi_prim_thread_init_auto_done(void) { } + void _mi_prim_thread_done_auto_done(void) { } + void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) { + MI_UNUSED(heap); + } -// Dll; nothing to do as in that case thread_done is handled through the DLL_THREAD_DETACH event. +#elif !defined(MI_WIN_USE_FLS) + #define MI_PRIM_HAS_PROCESS_ATTACH 1 -void _mi_prim_thread_init_auto_done(void) { -} + static void NTAPI mi_win_main_attach(PVOID module, DWORD reason, LPVOID reserved) { + if (reason == DLL_PROCESS_ATTACH || reason == DLL_THREAD_ATTACH) { + mi_win_main(module, reason, reserved); + } + } + static void NTAPI mi_win_main_detach(PVOID module, DWORD reason, LPVOID reserved) { + if (reason == DLL_PROCESS_DETACH || reason == DLL_THREAD_DETACH) { + mi_win_main(module, reason, reserved); + } + } -void _mi_prim_thread_done_auto_done(void) { -} + // Set up TLS callbacks in a statically linked library by using special data sections. + // See + // We use 2 entries to ensure we call attach events before constructors + // are called, and detach events after destructors are called. + #if defined(__cplusplus) + extern "C" { + #endif -void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) { - MI_UNUSED(heap); -} + #if defined(_WIN64) + #pragma comment(linker, "/INCLUDE:_tls_used") + #pragma comment(linker, "/INCLUDE:_mi_tls_callback_pre") + #pragma comment(linker, "/INCLUDE:_mi_tls_callback_post") + #pragma const_seg(".CRT$XLB") + extern const PIMAGE_TLS_CALLBACK _mi_tls_callback_pre[]; + const PIMAGE_TLS_CALLBACK _mi_tls_callback_pre[] = { &mi_win_main_attach }; + #pragma const_seg() + #pragma const_seg(".CRT$XLY") + extern const PIMAGE_TLS_CALLBACK _mi_tls_callback_post[]; + const PIMAGE_TLS_CALLBACK _mi_tls_callback_post[] = { &mi_win_main_detach }; + #pragma const_seg() + #else + #pragma comment(linker, "/INCLUDE:__tls_used") + #pragma comment(linker, "/INCLUDE:__mi_tls_callback_pre") + #pragma comment(linker, "/INCLUDE:__mi_tls_callback_post") + #pragma data_seg(".CRT$XLB") + PIMAGE_TLS_CALLBACK _mi_tls_callback_pre[] = { &mi_win_main_attach }; + #pragma data_seg() + #pragma data_seg(".CRT$XLY") + PIMAGE_TLS_CALLBACK _mi_tls_callback_post[] = { &mi_win_main_detach }; + #pragma data_seg() + #endif + #if defined(__cplusplus) + } + #endif + + // nothing to do since `_mi_thread_done` is handled through the DLL_THREAD_DETACH event. + void _mi_prim_thread_init_auto_done(void) { } + void _mi_prim_thread_done_auto_done(void) { } + void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) { + MI_UNUSED(heap); + } + +#else // deprecated: statically linked, use fiber api + + #if defined(_MSC_VER) // on clang/gcc use the constructor attribute (in `src/prim/prim.c`) + // MSVC: use data section magic for static libraries + // See + #define MI_PRIM_HAS_PROCESS_ATTACH 1 + + static int mi_process_attach(void) { + mi_win_main(NULL,DLL_PROCESS_ATTACH,NULL); + atexit(&_mi_process_done); + return 0; + } + typedef int(*mi_crt_callback_t)(void); + #if defined(_WIN64) + #pragma comment(linker, "/INCLUDE:_mi_tls_callback") + #pragma section(".CRT$XIU", long, read) + #else + #pragma comment(linker, "/INCLUDE:__mi_tls_callback") + #endif + #pragma data_seg(".CRT$XIU") + mi_decl_externc mi_crt_callback_t _mi_tls_callback[] = { &mi_process_attach }; + #pragma data_seg() + #endif + + // use the fiber api for calling `_mi_thread_done`. + #include + #if (_WIN32_WINNT < 0x600) // before Windows Vista + WINBASEAPI DWORD WINAPI FlsAlloc( _In_opt_ PFLS_CALLBACK_FUNCTION lpCallback ); + WINBASEAPI PVOID WINAPI FlsGetValue( _In_ DWORD dwFlsIndex ); + WINBASEAPI BOOL WINAPI FlsSetValue( _In_ DWORD dwFlsIndex, _In_opt_ PVOID lpFlsData ); + WINBASEAPI BOOL WINAPI FlsFree(_In_ DWORD dwFlsIndex); + #endif + + static DWORD mi_fls_key = (DWORD)(-1); + + static void NTAPI mi_fls_done(PVOID value) { + mi_heap_t* heap = (mi_heap_t*)value; + if (heap != NULL) { + _mi_thread_done(heap); + FlsSetValue(mi_fls_key, NULL); // prevent recursion as _mi_thread_done may set it back to the main heap, issue #672 + } + } + + void _mi_prim_thread_init_auto_done(void) { + mi_fls_key = FlsAlloc(&mi_fls_done); + } + + void _mi_prim_thread_done_auto_done(void) { + // call thread-done on all threads (except the main thread) to prevent + // dangling callback pointer if statically linked with a DLL; Issue #208 + FlsFree(mi_fls_key); + } + + void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) { + mi_assert_internal(mi_fls_key != (DWORD)(-1)); + FlsSetValue(mi_fls_key, heap); + } #endif +// ---------------------------------------------------- +// Communicate with the redirection module on Windows +// ---------------------------------------------------- +#if defined(MI_SHARED_LIB) && !defined(MI_WIN_NOREDIRECT) + #define MI_PRIM_HAS_ALLOCATOR_INIT 1 + + static bool mi_redirected = false; // true if malloc redirects to mi_malloc + + bool _mi_is_redirected(void) { + return mi_redirected; + } + + #ifdef __cplusplus + extern "C" { + #endif + mi_decl_export void _mi_redirect_entry(DWORD reason) { + // called on redirection; careful as this may be called before DllMain + if (reason == DLL_PROCESS_ATTACH) { + mi_redirected = true; + } + else if (reason == DLL_PROCESS_DETACH) { + mi_redirected = false; + } + else if (reason == DLL_THREAD_DETACH) { + _mi_thread_done(NULL); + } + } + __declspec(dllimport) bool mi_cdecl mi_allocator_init(const char** message); + __declspec(dllimport) void mi_cdecl mi_allocator_done(void); + #ifdef __cplusplus + } + #endif + bool _mi_allocator_init(const char** message) { + return mi_allocator_init(message); + } + void _mi_allocator_done(void) { + mi_allocator_done(); + } +#endif \ No newline at end of file diff --git a/test/main-override-static.c b/test/main-override-static.c index 9757e9a9..07af1090 100644 --- a/test/main-override-static.c +++ b/test/main-override-static.c @@ -20,8 +20,12 @@ static void test_reserved(void); static void negative_stat(void); static void alloc_huge(void); static void test_heap_walk(void); +<<<<<<< HEAD static void test_heap_arena(void); static void test_align(void); +======= +static void test_canary_leak(void); +>>>>>>> dev // static void test_large_pages(void); int main() { @@ -33,7 +37,8 @@ int main() { // double_free2(); // corrupt_free(); // block_overflow1(); - block_overflow2(); + // block_overflow2(); + test_canary_leak(); // test_aslr(); // invalid_free(); // test_reserved(); @@ -250,6 +255,13 @@ static void test_heap_arena(void) { break; } } +static void test_canary_leak(void) { + char* p = mi_mallocn_tp(char,23); + for(int i = 0; i < 23; i++) { + p[i] = '0'+i; + } + puts(p); + free(p); } // Experiment with huge OS pages diff --git a/test/main-override.cpp b/test/main-override.cpp index dcb896cc..5e8b6f82 100644 --- a/test/main-override.cpp +++ b/test/main-override.cpp @@ -40,6 +40,7 @@ static void bench_alloc_large(void); // issue #xxx //static void test_large_migrate(void); // issue #691 static void heap_thread_free_huge(); static void test_std_string(); // issue #697 +static void test_thread_local(); // issue #944 static void test_stl_allocators(); @@ -47,7 +48,8 @@ static void test_stl_allocators(); int main() { // mi_stats_reset(); // ignore earlier allocations - // test_std_string(); + //test_std_string(); + test_thread_local(); // heap_thread_free_huge(); /* heap_thread_free_huge(); @@ -398,3 +400,30 @@ static void bench_alloc_large(void) { std::cout << "Avg " << us_per_allocation << " us per allocation" << std::endl; } + +class MTest +{ + char *data; +public: + MTest() { data = (char*)malloc(1024); } + ~MTest() { free(data); }; +}; + +thread_local MTest tlVariable; + +void threadFun( int i ) +{ + printf( "Thread %d\n", i ); + std::this_thread::sleep_for( std::chrono::milliseconds(100) ); +} + +void test_thread_local() +{ + for( int i=1; i < 100; ++i ) + { + std::thread t( threadFun, i ); + t.join(); + mi_stats_print(NULL); + } + return; +}