From 82c85d1a130eb1e204177169476785728888c0ae Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Sun, 5 Mar 2023 18:03:04 -0800 Subject: [PATCH 01/42] fix valgrind mem for large alignment --- src/alloc-aligned.c | 17 +++++++---------- test/test-wrong.c | 24 +++++++++++++++++++++++- 2 files changed, 30 insertions(+), 11 deletions(-) diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index c24f5c9d..08ad9814 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -74,20 +74,17 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t* mi_assert_internal(mi_usable_size(p) == mi_usable_size(aligned_p)+adjust); // now zero the block if needed - if (zero && alignment > MI_ALIGNMENT_MAX) { - const ptrdiff_t diff = (uint8_t*)aligned_p - (uint8_t*)p; - ptrdiff_t zsize = mi_page_usable_block_size(_mi_ptr_page(p)) - diff - MI_PADDING_SIZE; - #if MI_PADDING - zsize -= MI_MAX_ALIGN_SIZE; - #endif - if (zsize > 0) { _mi_memzero(aligned_p, zsize); } + if (alignment > MI_ALIGNMENT_MAX) { + // for the tracker, on huge aligned allocations only from the start of the large block is defined + mi_track_mem_undefined(aligned_p, size); + if (zero) { + _mi_memzero(aligned_p, mi_usable_size(aligned_p)); + } } - #if MI_TRACK_ENABLED if (p != aligned_p) { mi_track_align(p,aligned_p,adjust,mi_usable_size(aligned_p)); - } - #endif + } return aligned_p; } diff --git a/test/test-wrong.c b/test/test-wrong.c index 17d253b6..aaaf60b9 100644 --- a/test/test-wrong.c +++ b/test/test-wrong.c @@ -5,7 +5,10 @@ terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ -/* test file for valgrind support. +/* test file for valgrind/asan support. + + VALGRIND: + ---------- Compile in an "out/debug" folder: > cd out/debug @@ -19,6 +22,25 @@ terms of the MIT license. A copy of the license can be found in the file and test as: > valgrind ./test-wrong + + + ASAN + ---------- + Compile in an "out/debug" folder: + + > cd out/debug + > cmake ../.. -DMI_ASAN=1 + > make -j8 + + and then compile this file as: + + > clang -g -o test-wrong -I../../include ../../test/test-wrong.c libmimalloc-asan-debug.a -lpthread -fsanitize=address -fsanitize-recover=address + + and test as: + + > ASAN_OPTIONS=verbosity=1:halt_on_error=0 ./test-wrong + + */ #include #include From 465eb81d30187ad9139141d830fb1753ebb3742d Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Sun, 5 Mar 2023 18:18:41 -0800 Subject: [PATCH 02/42] track free blocks in valgrind for heap_destroy as well --- include/mimalloc-track.h | 15 +++++++++------ src/heap.c | 11 +++++++++++ 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/include/mimalloc-track.h b/include/mimalloc-track.h index 8326c620..b2404f8d 100644 --- a/include/mimalloc-track.h +++ b/include/mimalloc-track.h @@ -24,8 +24,9 @@ uses the block start pointer and original size (corresponding to the `mi_track_m #if MI_VALGRIND -#define MI_TRACK_ENABLED 1 -#define MI_TRACK_TOOL "valgrind" +#define MI_TRACK_ENABLED 1 +#define MI_TRACK_HEAP_DESTROY 1 // track free of individual blocks on heap_destroy +#define MI_TRACK_TOOL "valgrind" #include #include @@ -39,8 +40,9 @@ uses the block start pointer and original size (corresponding to the `mi_track_m #elif MI_ASAN -#define MI_TRACK_ENABLED 1 -#define MI_TRACK_TOOL "asan" +#define MI_TRACK_ENABLED 1 +#define MI_TRACK_HEAP_DESTROY 0 +#define MI_TRACK_TOOL "asan" #include @@ -52,8 +54,9 @@ uses the block start pointer and original size (corresponding to the `mi_track_m #else -#define MI_TRACK_ENABLED 0 -#define MI_TRACK_TOOL "none" +#define MI_TRACK_ENABLED 0 +#define MI_TRACK_HEAP_DESTROY 0 +#define MI_TRACK_TOOL "none" #define mi_track_malloc_size(p,reqsize,size,zero) #define mi_track_free_size(p,_size) diff --git a/src/heap.c b/src/heap.c index 0ed0ab2c..6fe1fb17 100644 --- a/src/heap.c +++ b/src/heap.c @@ -8,6 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc.h" #include "mimalloc-internal.h" #include "mimalloc-atomic.h" +#include "mimalloc-track.h" #include // memset, memcpy @@ -310,6 +311,12 @@ void _mi_heap_destroy_pages(mi_heap_t* heap) { mi_heap_reset_pages(heap); } +static bool mi_cdecl mi_heap_track_block_free(const mi_heap_t* heap, const mi_heap_area_t* area, void* block, size_t block_size, void* arg) { + MI_UNUSED(heap); MI_UNUSED(area); MI_UNUSED(arg); MI_UNUSED(block_size); + mi_track_free_size(block,mi_usable_size(block)); + return true; +} + void mi_heap_destroy(mi_heap_t* heap) { mi_assert(heap != NULL); mi_assert(mi_heap_is_initialized(heap)); @@ -321,6 +328,10 @@ void mi_heap_destroy(mi_heap_t* heap) { mi_heap_delete(heap); } else { + // track all blocks as freed + #if MI_TRACK_HEAP_DESTROY + mi_heap_visit_blocks(heap, true, mi_heap_track_block_free, NULL); + #endif // free all pages _mi_heap_destroy_pages(heap); mi_heap_free(heap); From b3f3a0de3b34ab0650228f4341c347f88c123420 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Sun, 5 Mar 2023 22:22:36 -0800 Subject: [PATCH 03/42] include psapi.h instead of defining PROCESS_MEMORY_COUNTERS on windows --- src/heap.c | 2 ++ src/stats.c | 14 +------------- 2 files changed, 3 insertions(+), 13 deletions(-) diff --git a/src/heap.c b/src/heap.c index 6fe1fb17..94fed2b5 100644 --- a/src/heap.c +++ b/src/heap.c @@ -311,11 +311,13 @@ void _mi_heap_destroy_pages(mi_heap_t* heap) { mi_heap_reset_pages(heap); } +#if MI_TRACK_HEAP_DESTROY static bool mi_cdecl mi_heap_track_block_free(const mi_heap_t* heap, const mi_heap_area_t* area, void* block, size_t block_size, void* arg) { MI_UNUSED(heap); MI_UNUSED(area); MI_UNUSED(arg); MI_UNUSED(block_size); mi_track_free_size(block,mi_usable_size(block)); return true; } +#endif void mi_heap_destroy(mi_heap_t* heap) { mi_assert(heap != NULL); diff --git a/src/stats.c b/src/stats.c index 363c4400..84d677fa 100644 --- a/src/stats.c +++ b/src/stats.c @@ -465,6 +465,7 @@ mi_msecs_t _mi_clock_end(mi_msecs_t start) { #if defined(_WIN32) #include +#include static mi_msecs_t filetime_msecs(const FILETIME* ftime) { ULARGE_INTEGER i; @@ -474,19 +475,6 @@ static mi_msecs_t filetime_msecs(const FILETIME* ftime) { return msecs; } -typedef struct _PROCESS_MEMORY_COUNTERS { - DWORD cb; - DWORD PageFaultCount; - SIZE_T PeakWorkingSetSize; - SIZE_T WorkingSetSize; - SIZE_T QuotaPeakPagedPoolUsage; - SIZE_T QuotaPagedPoolUsage; - SIZE_T QuotaPeakNonPagedPoolUsage; - SIZE_T QuotaNonPagedPoolUsage; - SIZE_T PagefileUsage; - SIZE_T PeakPagefileUsage; -} PROCESS_MEMORY_COUNTERS; -typedef PROCESS_MEMORY_COUNTERS* PPROCESS_MEMORY_COUNTERS; typedef BOOL (WINAPI *PGetProcessMemoryInfo)(HANDLE, PPROCESS_MEMORY_COUNTERS, DWORD); static PGetProcessMemoryInfo pGetProcessMemoryInfo = NULL; From e912697d90fef95ff9004ee4a9543d333dbb128c Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Sun, 5 Mar 2023 22:26:05 -0800 Subject: [PATCH 04/42] fix warning with zero padding --- src/page.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/page.c b/src/page.c index 70cb0d89..3205f975 100644 --- a/src/page.c +++ b/src/page.c @@ -856,7 +856,7 @@ static mi_page_t* mi_find_page(mi_heap_t* heap, size_t size, size_t huge_alignme } else { // otherwise find a page with free blocks in our size segregated queues - mi_assert_internal(size >= MI_PADDING_SIZE); + mi_assert_internal((ptrdiff_t)size >= MI_PADDING_SIZE); // cast to signed to avoid error if there is no padding return mi_find_free_page(heap, size); } } From 64fb009695a7e659059d6ea8c36bb23cee141193 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Sun, 5 Mar 2023 22:27:45 -0800 Subject: [PATCH 05/42] fix warning with zero padding --- src/page.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/page.c b/src/page.c index 3205f975..46dfd26f 100644 --- a/src/page.c +++ b/src/page.c @@ -856,7 +856,9 @@ static mi_page_t* mi_find_page(mi_heap_t* heap, size_t size, size_t huge_alignme } else { // otherwise find a page with free blocks in our size segregated queues - mi_assert_internal((ptrdiff_t)size >= MI_PADDING_SIZE); // cast to signed to avoid error if there is no padding + #if MI_PADDING + mi_assert_internal(size >= MI_PADDING_SIZE); + #endif return mi_find_free_page(heap, size); } } From 2e6ab0f23033bfca96b51e15b9567b07417c9268 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Mon, 6 Mar 2023 09:02:38 -0800 Subject: [PATCH 06/42] add documentation for tracking tools; rename with prefix MI_TRACK_tool --- CMakeLists.txt | 47 ++++++++++++++++++--------------------- include/mimalloc-track.h | 44 +++++++++++++++++++++++++++--------- include/mimalloc-types.h | 7 +++--- include/mimalloc.h | 2 +- readme.md | 48 +++++++++++++++++++++++++++++++++------- test/test-api-fill.c | 4 ++-- test/test-wrong.c | 4 ++-- 7 files changed, 104 insertions(+), 52 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6b6ed554..26f5eed8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,8 +10,8 @@ option(MI_PADDING "Enable padding to detect heap block overflow (alway option(MI_OVERRIDE "Override the standard malloc interface (e.g. define entry points for malloc() etc)" ON) option(MI_XMALLOC "Enable abort() call on memory allocation failure by default" OFF) option(MI_SHOW_ERRORS "Show error and warning messages by default (only enabled by default in DEBUG mode)" OFF) -option(MI_VALGRIND "Compile with Valgrind support (adds a small overhead)" OFF) -option(MI_ASAN "Compile with address sanitizer support (adds a small overhead)" OFF) +option(MI_TRACK_VALGRIND "Compile with Valgrind support (adds a small overhead)" OFF) +option(MI_TRACK_ASAN "Compile with address sanitizer support (adds a small overhead)" OFF) option(MI_USE_CXX "Use the C++ compiler to compile the library (instead of the C compiler)" OFF) option(MI_SEE_ASM "Generate assembly files" OFF) option(MI_OSX_INTERPOSE "Use interpose to override standard malloc on macOS" ON) @@ -25,7 +25,7 @@ option(MI_BUILD_TESTS "Build test executables" ON) option(MI_DEBUG_TSAN "Build with thread sanitizer (needs clang)" OFF) option(MI_DEBUG_UBSAN "Build with undefined-behavior sanitizer (needs clang++)" OFF) option(MI_SKIP_COLLECT_ON_EXIT, "Skip collecting memory on program exit" OFF) -option(MI_NO_PADDING "Force no use of padding even in DEBUG mode ets." OFF) +option(MI_NO_PADDING "Force no use of padding even in DEBUG mode etc." OFF) # deprecated options option(MI_CHECK_FULL "Use full internal invariant checking in DEBUG mode (deprecated, use MI_DEBUG_FULL instead)" OFF) @@ -125,42 +125,39 @@ endif() if(MI_SECURE) message(STATUS "Set full secure build (MI_SECURE=ON)") - list(APPEND mi_defines MI_SECURE=4) - #if (MI_VALGRIND) - # message(WARNING "Secure mode is a bit weakened when compiling with Valgrind support as buffer overflow detection is no longer byte-precise (if running without valgrind)") - #endif() + list(APPEND mi_defines MI_SECURE=4) endif() -if(MI_VALGRIND) +if(MI_TRACK_VALGRIND) CHECK_INCLUDE_FILES("valgrind/valgrind.h;valgrind/memcheck.h" MI_HAS_VALGRINDH) if (NOT MI_HAS_VALGRINDH) - set(MI_VALGRIND OFF) + set(MI_TRACK_VALGRIND OFF) message(WARNING "Cannot find the 'valgrind/valgrind.h' and 'valgrind/memcheck.h' -- install valgrind first") - message(STATUS "Compile **without** Valgrind support (MI_VALGRIND=OFF)") + message(STATUS "Compile **without** Valgrind support (MI_TRACK_VALGRIND=OFF)") else() - message(STATUS "Compile with Valgrind support (MI_VALGRIND=ON)") - list(APPEND mi_defines MI_VALGRIND=1) + message(STATUS "Compile with Valgrind support (MI_TRACK_VALGRIND=ON)") + list(APPEND mi_defines MI_TRACK_VALGRIND=1) endif() endif() -if(MI_ASAN) +if(MI_TRACK_ASAN) if (APPLE AND MI_OVERRIDE) - set(MI_ASAN OFF) - message(WARNING "Cannot enable address sanitizer support on macOS if MI_OVERRIDE is ON (MI_ASAN=OFF)") + set(MI_TRACK_ASAN OFF) + message(WARNING "Cannot enable address sanitizer support on macOS if MI_OVERRIDE is ON (MI_TRACK_ASAN=OFF)") endif() - if (MI_VALGRIND) - set(MI_ASAN OFF) - message(WARNING "Cannot enable address sanitizer support with also Valgrind support enabled (MI_ASAN=OFF)") + if (MI_TRACK_VALGRIND) + set(MI_TRACK_ASAN OFF) + message(WARNING "Cannot enable address sanitizer support with also Valgrind support enabled (MI_TRACK_ASAN=OFF)") endif() - if(MI_ASAN) + if(MI_TRACK_ASAN) CHECK_INCLUDE_FILES("sanitizer/asan_interface.h" MI_HAS_ASANH) if (NOT MI_HAS_ASANH) - set(MI_ASAN OFF) + set(MI_TRACK_ASAN OFF) message(WARNING "Cannot find the 'sanitizer/asan_interface.h' -- install address sanitizer support first") - message(STATUS "Compile **without** address sanitizer support (MI_ASAN=OFF)") + message(STATUS "Compile **without** address sanitizer support (MI_TRACK_ASAN=OFF)") else() - message(STATUS "Compile with address sanitizer support (MI_ASAN=ON)") - list(APPEND mi_defines MI_ASAN=1) + message(STATUS "Compile with address sanitizer support (MI_TRACK_ASAN=ON)") + list(APPEND mi_defines MI_TRACK_ASAN=1) list(APPEND mi_cflags -fsanitize=address) list(APPEND mi_libraries -fsanitize=address) endif() @@ -327,10 +324,10 @@ set(mi_basename "mimalloc") if(MI_SECURE) set(mi_basename "${mi_basename}-secure") endif() -if(MI_VALGRIND) +if(MI_TRACK_VALGRIND) set(mi_basename "${mi_basename}-valgrind") endif() -if(MI_ASAN) +if(MI_TRACK_ASAN) set(mi_basename "${mi_basename}-asan") endif() string(TOLOWER "${CMAKE_BUILD_TYPE}" CMAKE_BUILD_TYPE_LC) diff --git a/include/mimalloc-track.h b/include/mimalloc-track.h index b2404f8d..272ca1b8 100644 --- a/include/mimalloc-track.h +++ b/include/mimalloc-track.h @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2018-2021, Microsoft Research, Daan Leijen +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. @@ -9,20 +9,39 @@ terms of the MIT license. A copy of the license can be found in the file #define MIMALLOC_TRACK_H /* ------------------------------------------------------------------------------------------------------ -Track memory ranges with macros for tools like Valgrind -address sanitizer, or other memory checkers. +Track memory ranges with macros for tools like Valgrind address sanitizer, or other memory checkers. +These can be defined for tracking allocation: + + #define mi_track_malloc_size(p,reqsize,size,zero) + #define mi_track_free_size(p,_size) The macros are set up such that the size passed to `mi_track_free_size` -matches the size of the allocation, or the new size of a `mi_track_resize` (currently unused though). +always matches the size of `mi_track_malloc_size`. (currently, `size == mi_usable_size(p)`). +The `reqsize` is what the user requested, and `size >= reqsize`. +The `size` is either byte precise (and `size==reqsize`) if `MI_PADDING` is enabled, +or otherwise it is the usable block size which may be larger than the original request. +Use `_mi_block_size_of(void* p)` to get the full block size that was allocated (including padding etc). +The `zero` parameter is `true` if the allocated block is zero initialized. + +Optional: + + #define mi_track_align(p,alignedp,offset,size) + #define mi_track_resize(p,oldsize,newsize) + +The `mi_track_align` is called right after a `mi_track_malloc` for aligned pointers in a block. +The corresponding `mi_track_free` still uses the block start pointer and original size (corresponding to the `mi_track_malloc`). +The `mi_track_resize` is currently unused but could be called on reallocations within a block. + +The following macros are for tools like asan and valgrind to track whether memory is +defined, undefined, or not accessible at all: + + #define mi_track_mem_defined(p,size) + #define mi_track_mem_undefined(p,size) + #define mi_track_mem_noaccess(p,size) -The `size` is either byte precise (and what the user requested) if `MI_PADDING` is enabled, -or otherwise it is the full block size which may be larger than the original request. -Aligned pointers in a block are signaled right after a `mi_track_malloc` -with the `mi_track_align` macro. The corresponding `mi_track_free` still -uses the block start pointer and original size (corresponding to the `mi_track_malloc`). -------------------------------------------------------------------------------------------------------*/ -#if MI_VALGRIND +#if MI_TRACK_VALGRIND #define MI_TRACK_ENABLED 1 #define MI_TRACK_HEAP_DESTROY 1 // track free of individual blocks on heap_destroy @@ -38,7 +57,7 @@ uses the block start pointer and original size (corresponding to the `mi_track_m #define mi_track_mem_undefined(p,size) VALGRIND_MAKE_MEM_UNDEFINED(p,size) #define mi_track_mem_noaccess(p,size) VALGRIND_MAKE_MEM_NOACCESS(p,size) -#elif MI_ASAN +#elif MI_TRACK_ASAN #define MI_TRACK_ENABLED 1 #define MI_TRACK_HEAP_DESTROY 0 @@ -68,6 +87,9 @@ uses the block start pointer and original size (corresponding to the `mi_track_m #endif +// ------------------- +// Utility definitions + #ifndef mi_track_resize #define mi_track_resize(p,oldsize,newsize) mi_track_free_size(p,oldsize); mi_track_malloc(p,newsize,false) #endif diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index e365a8f5..9b3f5972 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -29,8 +29,9 @@ terms of the MIT license. A copy of the license can be found in the file // Define NDEBUG in the release version to disable assertions. // #define NDEBUG -// Define MI_VALGRIND to enable valgrind support -// #define MI_VALGRIND 1 +// Define MI_TRACK_ to enable tracking support +// #define MI_TRACK_VALGRIND 1 +// #define MI_TRACK_ASAN 1 // Define MI_STAT as 1 to maintain statistics; set it to 2 to have detailed statistics (but costs some performance). // #define MI_STAT 1 @@ -59,7 +60,7 @@ terms of the MIT license. A copy of the license can be found in the file // Reserve extra padding at the end of each block to be more resilient against heap block overflows. // The padding can detect buffer overflow on free. -#if !defined(MI_PADDING) && (MI_SECURE>=3 || MI_DEBUG>=1 || MI_VALGRIND || MI_ASAN) +#if !defined(MI_PADDING) && (MI_SECURE>=3 || MI_DEBUG>=1 || MI_TRACK_VALGRIND || MI_TRACK_ASAN) #define MI_PADDING 1 #endif diff --git a/include/mimalloc.h b/include/mimalloc.h index c13bda23..27f6b331 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2018-2022, Microsoft Research, Daan Leijen +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. diff --git a/readme.md b/readme.md index 364b974b..b102a50f 100644 --- a/readme.md +++ b/readme.md @@ -78,7 +78,7 @@ Note: the `v2.x` version has a new algorithm for managing internal mimalloc page and fragmentation compared to mimalloc `v1.x` (especially for large workloads). Should otherwise have similar performance (see [below](#performance)); please report if you observe any significant performance regression. -* 2022-12-23, `v1.7.9`, `v2.0.9`: Supports building with asan and improved [Valgrind] support. Support abitrary large +* 2022-12-23, `v1.7.9`, `v2.0.9`: Supports building with [#asan] and improved [#Valgrind] support. Support abitrary large alignments (in particular for `std::pmr` pools). Added C++ STL allocators attached to a specific heap (thanks @vmarkovtsev). Heap walks now visit all object (including huge objects). Support Windows nano server containers (by Johannes Schindelin,@dscho). @@ -347,16 +347,19 @@ When _mimalloc_ is built using debug mode, various checks are done at runtime to - Double free's, and freeing invalid heap pointers are detected. - Corrupted free-lists and some forms of use-after-free are detected. -## Valgrind +## Tools -Generally, we recommend using the standard allocator with the amazing [Valgrind] tool (and -also for other address sanitizers). -However, it is possible to build mimalloc with Valgrind support. This has a small performance -overhead but does allow detecting memory leaks and byte-precise buffer overflows directly on final -executables. To build with valgrind support, use the `MI_VALGRIND=ON` cmake option: +Generally, we recommend using the standard allocator with memory tracking tools, but mimalloc +can also be build to support the [address sanitizer][asan] or the excellent [Valgrind] tool. +This has a small performance overhead but does allow detecting memory leaks and byte-precise +buffer overflows directly on final executables. See also the `test/test-wrong.c` file to test with various tools. + +### Valgrind + +To build with valgrind support, use the `MI_TRACK_VALGRIND=ON` cmake option: ``` -> cmake ../.. -DMI_VALGRIND=ON +> cmake ../.. -DMI_TRACK_VALGRIND=ON ``` This can also be combined with secure mode or debug mode. @@ -385,6 +388,35 @@ Valgrind support is in its initial development -- please report any issues. [Valgrind]: https://valgrind.org/ [valgrind-soname]: https://valgrind.org/docs/manual/manual-core.html#opt.soname-synonyms +### ASAN + +To build with the address sanitizer, use the `-DMI_TRACK_ASAN=ON` cmake option: + +``` +> cmake ../.. -DMI_TRACK_ASAN=ON +``` + +This can also be combined with secure mode or debug mode. +You can then run your programs as:' + +``` +> ASAN_OPTIONS=verbosity=1 +``` + +When you link a program with an address sanitizer build of mimalloc, you should +generally compile that program too with the address sanitizer enabled. +For example, assuming you build mimalloc in `out/debug`: + +``` +clang -g -o test-wrong -Iinclude test/test-wrong.c out/debug/libmimalloc-asan-debug.a -lpthread -fsanitize=address -fsanitize-recover=address +``` + +Since the address sanitizer redirects the standard allocation functions, on some platforms (macOSX for example) +it is required to compile mimalloc with `-DMI_OVERRIDE=OFF`. +Adress sanitizer support is in its initial development -- please report any issues. + +[asan]: https://github.com/google/sanitizers/wiki/AddressSanitizer + # Overriding Standard Malloc diff --git a/test/test-api-fill.c b/test/test-api-fill.c index 85d8524f..a32dfa27 100644 --- a/test/test-api-fill.c +++ b/test/test-api-fill.c @@ -309,7 +309,7 @@ bool check_zero_init(uint8_t* p, size_t size) { #if MI_DEBUG >= 2 bool check_debug_fill_uninit(uint8_t* p, size_t size) { -#if MI_VALGRIND +#if MI_TRACK_VALGRIND (void)p; (void)size; return true; // when compiled with valgrind we don't init on purpose #else @@ -325,7 +325,7 @@ bool check_debug_fill_uninit(uint8_t* p, size_t size) { } bool check_debug_fill_freed(uint8_t* p, size_t size) { -#if MI_VALGRIND +#if MI_TRACK_VALGRIND (void)p; (void)size; return true; // when compiled with valgrind we don't fill on purpose #else diff --git a/test/test-wrong.c b/test/test-wrong.c index aaaf60b9..56a2339a 100644 --- a/test/test-wrong.c +++ b/test/test-wrong.c @@ -12,7 +12,7 @@ terms of the MIT license. A copy of the license can be found in the file Compile in an "out/debug" folder: > cd out/debug - > cmake ../.. -DMI_VALGRIND=1 + > cmake ../.. -DMI_TRACK_VALGRIND=1 > make -j8 and then compile this file as: @@ -29,7 +29,7 @@ terms of the MIT license. A copy of the license can be found in the file Compile in an "out/debug" folder: > cd out/debug - > cmake ../.. -DMI_ASAN=1 + > cmake ../.. -DMI_TRACK_ASAN=1 > make -j8 and then compile this file as: From 08a01d26dc079756c8e94409fba051fd2eb5bd2c Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Tue, 14 Mar 2023 16:54:46 -0700 Subject: [PATCH 07/42] initial commit of new primitive layer --- CMakeLists.txt | 3 +- ide/vs2022/mimalloc-override.vcxproj | 1 + ide/vs2022/mimalloc.vcxproj | 1 + include/mimalloc-internal.h | 4 +- src/os.c | 1085 +++----------------------- src/prim/prim-unix.c | 483 ++++++++++++ src/prim/prim-wasi.c | 154 ++++ src/prim/prim-windows.c | 385 +++++++++ src/prim/prim.c | 18 + src/prim/prim.h | 64 ++ src/prim/readme.md | 6 + 11 files changed, 1221 insertions(+), 983 deletions(-) create mode 100644 src/prim/prim-unix.c create mode 100644 src/prim/prim-wasi.c create mode 100644 src/prim/prim-windows.c create mode 100644 src/prim/prim.c create mode 100644 src/prim/prim.h create mode 100644 src/prim/readme.md diff --git a/CMakeLists.txt b/CMakeLists.txt index 26f5eed8..28dfe830 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -50,7 +50,8 @@ set(mi_sources src/alloc-posix.c src/heap.c src/options.c - src/init.c) + src/init.c + src/prim/prim.c) set(mi_cflags "") set(mi_libraries "") diff --git a/ide/vs2022/mimalloc-override.vcxproj b/ide/vs2022/mimalloc-override.vcxproj index e7133af4..a1d25d28 100644 --- a/ide/vs2022/mimalloc-override.vcxproj +++ b/ide/vs2022/mimalloc-override.vcxproj @@ -237,6 +237,7 @@ + diff --git a/ide/vs2022/mimalloc.vcxproj b/ide/vs2022/mimalloc.vcxproj index 9081881c..335125c1 100644 --- a/ide/vs2022/mimalloc.vcxproj +++ b/ide/vs2022/mimalloc.vcxproj @@ -225,6 +225,7 @@ + diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index b3cdf716..ee26bfb8 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -86,7 +86,9 @@ bool _mi_os_reset(void* addr, size_t size, mi_stats_t* tld_stats); void* _mi_os_alloc_aligned_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool* large, mi_stats_t* tld_stats); void _mi_os_free_aligned(void* p, size_t size, size_t alignment, size_t align_offset, bool was_committed, mi_stats_t* tld_stats); - +void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size); +bool _mi_os_use_large_page(size_t size, size_t alignment); +size_t _mi_os_large_page_size(void); // memory.c void* _mi_mem_alloc_aligned(size_t size, size_t alignment, size_t offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* id, mi_os_tld_t* tld); diff --git a/src/os.c b/src/os.c index 5277e5e4..8ef72e04 100644 --- a/src/os.c +++ b/src/os.c @@ -1,72 +1,69 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2018-2021, Microsoft Research, Daan Leijen +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ -#ifndef _DEFAULT_SOURCE -#define _DEFAULT_SOURCE // ensure mmap flags are defined -#endif - -#if defined(__sun) -// illumos provides new mman.h api when any of these are defined -// otherwise the old api based on caddr_t which predates the void pointers one. -// stock solaris provides only the former, chose to atomically to discard those -// flags only here rather than project wide tough. -#undef _XOPEN_SOURCE -#undef _POSIX_C_SOURCE -#endif #include "mimalloc.h" #include "mimalloc-internal.h" #include "mimalloc-atomic.h" +#include "prim/prim.h" -#include // strerror - -#ifdef _MSC_VER -#pragma warning(disable:4996) // strerror -#endif - -#if defined(__wasi__) -#define MI_USE_SBRK -#endif - -#if defined(_WIN32) -#include -#elif defined(__wasi__) -#include // sbrk -#else -#include // mmap -#include // sysconf -#if defined(__linux__) -#include -#include -#if defined(__GLIBC__) -#include // linux mmap flags -#else -#include -#endif -#endif -#if defined(__APPLE__) -#include -#if !TARGET_IOS_IPHONE && !TARGET_IOS_SIMULATOR -#include -#endif -#endif -#if defined(__FreeBSD__) || defined(__DragonFly__) -#include -#if __FreeBSD_version >= 1200000 -#include -#include -#endif -#include -#endif -#endif /* ----------------------------------------------------------- Initialization. On windows initializes support for aligned allocation and large OS pages (if MIMALLOC_LARGE_OS_PAGES is true). ----------------------------------------------------------- */ + +static mi_os_mem_config_t mi_os_mem_config = { + 4096, // page size + 0, // large page size (usually 2MiB) + 4096, // allocation granularity + true, // has overcommit? (if true we use MAP_NORESERVE on mmap systems) + false // must free whole? +}; + +bool _mi_os_has_overcommit(void) { + return mi_os_mem_config.has_overcommit; +} + +// OS (small) page size +size_t _mi_os_page_size(void) { + return mi_os_mem_config.page_size; +} + +// if large OS pages are supported (2 or 4MiB), then return the size, otherwise return the small page size (4KiB) +size_t _mi_os_large_page_size(void) { + return (mi_os_mem_config.large_page_size != 0 ? mi_os_mem_config.large_page_size : _mi_os_page_size()); +} + +bool _mi_os_use_large_page(size_t size, size_t alignment) { + // if we have access, check the size and alignment requirements + if (mi_os_mem_config.large_page_size == 0 || !mi_option_is_enabled(mi_option_large_os_pages)) return false; + return ((size % mi_os_mem_config.large_page_size) == 0 && (alignment % mi_os_mem_config.large_page_size) == 0); +} + +// round to a good OS allocation size (bounded by max 12.5% waste) +size_t _mi_os_good_alloc_size(size_t size) { + size_t align_size; + if (size < 512*MI_KiB) align_size = _mi_os_page_size(); + else if (size < 2*MI_MiB) align_size = 64*MI_KiB; + else if (size < 8*MI_MiB) align_size = 256*MI_KiB; + else if (size < 32*MI_MiB) align_size = 1*MI_MiB; + else align_size = 4*MI_MiB; + if mi_unlikely(size >= (SIZE_MAX - align_size)) return size; // possible overflow? + return _mi_align_up(size, align_size); +} + +void _mi_os_init(void) { + _mi_prim_mem_init(&mi_os_mem_config); +} + + +/* ----------------------------------------------------------- + Util +-------------------------------------------------------------- */ bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats); bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats); @@ -90,226 +87,6 @@ static void* mi_align_down_ptr(void* p, size_t alignment) { } -// page size (initialized properly in `os_init`) -static size_t os_page_size = 4096; - -// minimal allocation granularity -static size_t os_alloc_granularity = 4096; - -// if non-zero, use large page allocation -static size_t large_os_page_size = 0; - -// is memory overcommit allowed? -// set dynamically in _mi_os_init (and if true we use MAP_NORESERVE) -static bool os_overcommit = true; - -bool _mi_os_has_overcommit(void) { - return os_overcommit; -} - -// OS (small) page size -size_t _mi_os_page_size(void) { - return os_page_size; -} - -// if large OS pages are supported (2 or 4MiB), then return the size, otherwise return the small page size (4KiB) -size_t _mi_os_large_page_size(void) { - return (large_os_page_size != 0 ? large_os_page_size : _mi_os_page_size()); -} - -#if !defined(MI_USE_SBRK) && !defined(__wasi__) -static bool use_large_os_page(size_t size, size_t alignment) { - // if we have access, check the size and alignment requirements - if (large_os_page_size == 0 || !mi_option_is_enabled(mi_option_large_os_pages)) return false; - return ((size % large_os_page_size) == 0 && (alignment % large_os_page_size) == 0); -} -#endif - -// round to a good OS allocation size (bounded by max 12.5% waste) -size_t _mi_os_good_alloc_size(size_t size) { - size_t align_size; - if (size < 512*MI_KiB) align_size = _mi_os_page_size(); - else if (size < 2*MI_MiB) align_size = 64*MI_KiB; - else if (size < 8*MI_MiB) align_size = 256*MI_KiB; - else if (size < 32*MI_MiB) align_size = 1*MI_MiB; - else align_size = 4*MI_MiB; - if mi_unlikely(size >= (SIZE_MAX - align_size)) return size; // possible overflow? - return _mi_align_up(size, align_size); -} - -#if defined(_WIN32) -// We use VirtualAlloc2 for aligned allocation, but it is only supported on Windows 10 and Windows Server 2016. -// So, we need to look it up dynamically to run on older systems. (use __stdcall for 32-bit compatibility) -// NtAllocateVirtualAllocEx is used for huge OS page allocation (1GiB) -// We define a minimal MEM_EXTENDED_PARAMETER ourselves in order to be able to compile with older SDK's. -typedef enum MI_MEM_EXTENDED_PARAMETER_TYPE_E { - MiMemExtendedParameterInvalidType = 0, - MiMemExtendedParameterAddressRequirements, - MiMemExtendedParameterNumaNode, - MiMemExtendedParameterPartitionHandle, - MiMemExtendedParameterUserPhysicalHandle, - MiMemExtendedParameterAttributeFlags, - MiMemExtendedParameterMax -} MI_MEM_EXTENDED_PARAMETER_TYPE; - -typedef struct DECLSPEC_ALIGN(8) MI_MEM_EXTENDED_PARAMETER_S { - struct { DWORD64 Type : 8; DWORD64 Reserved : 56; } Type; - union { DWORD64 ULong64; PVOID Pointer; SIZE_T Size; HANDLE Handle; DWORD ULong; } Arg; -} MI_MEM_EXTENDED_PARAMETER; - -typedef struct MI_MEM_ADDRESS_REQUIREMENTS_S { - PVOID LowestStartingAddress; - PVOID HighestEndingAddress; - SIZE_T Alignment; -} MI_MEM_ADDRESS_REQUIREMENTS; - -#define MI_MEM_EXTENDED_PARAMETER_NONPAGED_HUGE 0x00000010 - -#include -typedef PVOID (__stdcall *PVirtualAlloc2)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, MI_MEM_EXTENDED_PARAMETER*, ULONG); -typedef NTSTATUS (__stdcall *PNtAllocateVirtualMemoryEx)(HANDLE, PVOID*, SIZE_T*, ULONG, ULONG, MI_MEM_EXTENDED_PARAMETER*, ULONG); -static PVirtualAlloc2 pVirtualAlloc2 = NULL; -static PNtAllocateVirtualMemoryEx pNtAllocateVirtualMemoryEx = NULL; - -// Similarly, GetNumaProcesorNodeEx is only supported since Windows 7 -typedef struct MI_PROCESSOR_NUMBER_S { WORD Group; BYTE Number; BYTE Reserved; } MI_PROCESSOR_NUMBER; - -typedef VOID (__stdcall *PGetCurrentProcessorNumberEx)(MI_PROCESSOR_NUMBER* ProcNumber); -typedef BOOL (__stdcall *PGetNumaProcessorNodeEx)(MI_PROCESSOR_NUMBER* Processor, PUSHORT NodeNumber); -typedef BOOL (__stdcall* PGetNumaNodeProcessorMaskEx)(USHORT Node, PGROUP_AFFINITY ProcessorMask); -typedef BOOL (__stdcall *PGetNumaProcessorNode)(UCHAR Processor, PUCHAR NodeNumber); -static PGetCurrentProcessorNumberEx pGetCurrentProcessorNumberEx = NULL; -static PGetNumaProcessorNodeEx pGetNumaProcessorNodeEx = NULL; -static PGetNumaNodeProcessorMaskEx pGetNumaNodeProcessorMaskEx = NULL; -static PGetNumaProcessorNode pGetNumaProcessorNode = NULL; - -static bool mi_win_enable_large_os_pages(void) -{ - if (large_os_page_size > 0) return true; - - // Try to see if large OS pages are supported - // To use large pages on Windows, we first need access permission - // Set "Lock pages in memory" permission in the group policy editor - // - unsigned long err = 0; - HANDLE token = NULL; - BOOL ok = OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &token); - if (ok) { - TOKEN_PRIVILEGES tp; - ok = LookupPrivilegeValue(NULL, TEXT("SeLockMemoryPrivilege"), &tp.Privileges[0].Luid); - if (ok) { - tp.PrivilegeCount = 1; - tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; - ok = AdjustTokenPrivileges(token, FALSE, &tp, 0, (PTOKEN_PRIVILEGES)NULL, 0); - if (ok) { - err = GetLastError(); - ok = (err == ERROR_SUCCESS); - if (ok) { - large_os_page_size = GetLargePageMinimum(); - } - } - } - CloseHandle(token); - } - if (!ok) { - if (err == 0) err = GetLastError(); - _mi_warning_message("cannot enable large OS page support, error %lu\n", err); - } - return (ok!=0); -} - -void _mi_os_init(void) -{ - os_overcommit = false; - // get the page size - SYSTEM_INFO si; - GetSystemInfo(&si); - if (si.dwPageSize > 0) os_page_size = si.dwPageSize; - if (si.dwAllocationGranularity > 0) os_alloc_granularity = si.dwAllocationGranularity; - // get the VirtualAlloc2 function - HINSTANCE hDll; - hDll = LoadLibrary(TEXT("kernelbase.dll")); - if (hDll != NULL) { - // use VirtualAlloc2FromApp if possible as it is available to Windows store apps - pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2FromApp"); - if (pVirtualAlloc2==NULL) pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2"); - FreeLibrary(hDll); - } - // NtAllocateVirtualMemoryEx is used for huge page allocation - hDll = LoadLibrary(TEXT("ntdll.dll")); - if (hDll != NULL) { - pNtAllocateVirtualMemoryEx = (PNtAllocateVirtualMemoryEx)(void (*)(void))GetProcAddress(hDll, "NtAllocateVirtualMemoryEx"); - FreeLibrary(hDll); - } - // Try to use Win7+ numa API - hDll = LoadLibrary(TEXT("kernel32.dll")); - if (hDll != NULL) { - pGetCurrentProcessorNumberEx = (PGetCurrentProcessorNumberEx)(void (*)(void))GetProcAddress(hDll, "GetCurrentProcessorNumberEx"); - pGetNumaProcessorNodeEx = (PGetNumaProcessorNodeEx)(void (*)(void))GetProcAddress(hDll, "GetNumaProcessorNodeEx"); - pGetNumaNodeProcessorMaskEx = (PGetNumaNodeProcessorMaskEx)(void (*)(void))GetProcAddress(hDll, "GetNumaNodeProcessorMaskEx"); - pGetNumaProcessorNode = (PGetNumaProcessorNode)(void (*)(void))GetProcAddress(hDll, "GetNumaProcessorNode"); - FreeLibrary(hDll); - } - if (mi_option_is_enabled(mi_option_large_os_pages) || mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { - mi_win_enable_large_os_pages(); - } -} -#elif defined(__wasi__) -void _mi_os_init(void) { - os_overcommit = false; - os_page_size = 64*MI_KiB; // WebAssembly has a fixed page size: 64KiB - os_alloc_granularity = 16; -} - -#else // generic unix - -static void os_detect_overcommit(void) { -#if defined(__linux__) - int fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY); - if (fd < 0) return; - char buf[32]; - ssize_t nread = read(fd, &buf, sizeof(buf)); - close(fd); - // - // 0: heuristic overcommit, 1: always overcommit, 2: never overcommit (ignore NORESERVE) - if (nread >= 1) { - os_overcommit = (buf[0] == '0' || buf[0] == '1'); - } -#elif defined(__FreeBSD__) - int val = 0; - size_t olen = sizeof(val); - if (sysctlbyname("vm.overcommit", &val, &olen, NULL, 0) == 0) { - os_overcommit = (val != 0); - } -#else - // default: overcommit is true -#endif -} - -void _mi_os_init(void) { - // get the page size - long result = sysconf(_SC_PAGESIZE); - if (result > 0) { - os_page_size = (size_t)result; - os_alloc_granularity = os_page_size; - } - large_os_page_size = 2*MI_MiB; // TODO: can we query the OS for this? - os_detect_overcommit(); -} -#endif - - -#if defined(MADV_NORMAL) -static int mi_madvise(void* addr, size_t length, int advice) { - #if defined(__sun) - return madvise((caddr_t)addr, length, advice); // Solaris needs cast (issue #520) - #else - return madvise(addr, length, advice); - #endif -} -#endif - - /* ----------------------------------------------------------- aligned hinting -------------------------------------------------------------- */ @@ -330,7 +107,7 @@ static mi_decl_cache_align _Atomic(uintptr_t)aligned_base; #define MI_HINT_AREA ((uintptr_t)4 << 40) // upto 6TiB (since before win8 there is "only" 8TiB available to processes) #define MI_HINT_MAX ((uintptr_t)30 << 40) // wrap after 30TiB (area after 32TiB is used for huge OS pages) -static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) +void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size) { if (try_alignment <= 1 || try_alignment > MI_SEGMENT_SIZE) return NULL; size = _mi_align_up(size, MI_SEGMENT_SIZE); @@ -354,354 +131,32 @@ static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) return (void*)hint; } #else -static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) { +void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size) { MI_UNUSED(try_alignment); MI_UNUSED(size); return NULL; } #endif + /* ----------------------------------------------------------- Free memory -------------------------------------------------------------- */ -static bool mi_os_mem_free(void* addr, size_t size, bool was_committed, mi_stats_t* stats) +void _mi_os_free_ex(void* addr, size_t size, bool was_committed, mi_stats_t* tld_stats) { - if (addr == NULL || size == 0) return true; // || _mi_os_is_huge_reserved(addr) - bool err = false; -#if defined(_WIN32) - DWORD errcode = 0; - err = (VirtualFree(addr, 0, MEM_RELEASE) == 0); - if (err) { errcode = GetLastError(); } - if (errcode == ERROR_INVALID_ADDRESS) { - // In mi_os_mem_alloc_aligned the fallback path may have returned a pointer inside - // the memory region returned by VirtualAlloc; in that case we need to free using - // the start of the region. - MEMORY_BASIC_INFORMATION info = { 0 }; - VirtualQuery(addr, &info, sizeof(info)); - if (info.AllocationBase < addr && ((uint8_t*)addr - (uint8_t*)info.AllocationBase) < (ptrdiff_t)MI_SEGMENT_SIZE) { - errcode = 0; - err = (VirtualFree(info.AllocationBase, 0, MEM_RELEASE) == 0); - if (err) { errcode = GetLastError(); } - } - } - if (errcode != 0) { - _mi_warning_message("unable to release OS memory: error code 0x%x, addr: %p, size: %zu\n", errcode, addr, size); - } -#elif defined(MI_USE_SBRK) || defined(__wasi__) - err = false; // sbrk heap cannot be shrunk -#else - err = (munmap(addr, size) == -1); - if (err) { - _mi_warning_message("unable to release OS memory: %s, addr: %p, size: %zu\n", strerror(errno), addr, size); - } -#endif + MI_UNUSED(tld_stats); + mi_stats_t* stats = &_mi_stats_main; + if (addr == NULL || size == 0) return; // || _mi_os_is_huge_reserved(addr) + const size_t csize = _mi_os_good_alloc_size(size); + _mi_prim_free(addr, csize); if (was_committed) { _mi_stat_decrease(&stats->committed, size); } _mi_stat_decrease(&stats->reserved, size); - return !err; } - -/* ----------------------------------------------------------- - Raw allocation on Windows (VirtualAlloc) --------------------------------------------------------------- */ - -#ifdef _WIN32 -static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment, DWORD flags) { -#if (MI_INTPTR_SIZE >= 8) - // on 64-bit systems, try to use the virtual address area after 2TiB for 4MiB aligned allocations - if (addr == NULL) { - void* hint = mi_os_get_aligned_hint(try_alignment,size); - if (hint != NULL) { - void* p = VirtualAlloc(hint, size, flags, PAGE_READWRITE); - if (p != NULL) return p; - _mi_verbose_message("warning: unable to allocate hinted aligned OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x)\n", size, GetLastError(), hint, try_alignment, flags); - // fall through on error - } - } -#endif - // on modern Windows try use VirtualAlloc2 for aligned allocation - if (try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0 && pVirtualAlloc2 != NULL) { - MI_MEM_ADDRESS_REQUIREMENTS reqs = { 0, 0, 0 }; - reqs.Alignment = try_alignment; - MI_MEM_EXTENDED_PARAMETER param = { {0, 0}, {0} }; - param.Type.Type = MiMemExtendedParameterAddressRequirements; - param.Arg.Pointer = &reqs; - void* p = (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, ¶m, 1); - if (p != NULL) return p; - _mi_warning_message("unable to allocate aligned OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x)\n", size, GetLastError(), addr, try_alignment, flags); - // fall through on error - } - // last resort - return VirtualAlloc(addr, size, flags, PAGE_READWRITE); +void _mi_os_free(void* p, size_t size, mi_stats_t* tld_stats) { + _mi_os_free_ex(p, size, true, tld_stats); } -static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, DWORD flags, bool large_only, bool allow_large, bool* is_large) { - mi_assert_internal(!(large_only && !allow_large)); - static _Atomic(size_t) large_page_try_ok; // = 0; - void* p = NULL; - // Try to allocate large OS pages (2MiB) if allowed or required. - if ((large_only || use_large_os_page(size, try_alignment)) - && allow_large && (flags&MEM_COMMIT)!=0 && (flags&MEM_RESERVE)!=0) { - size_t try_ok = mi_atomic_load_acquire(&large_page_try_ok); - if (!large_only && try_ok > 0) { - // if a large page allocation fails, it seems the calls to VirtualAlloc get very expensive. - // therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times. - mi_atomic_cas_strong_acq_rel(&large_page_try_ok, &try_ok, try_ok - 1); - } - else { - // large OS pages must always reserve and commit. - *is_large = true; - p = mi_win_virtual_allocx(addr, size, try_alignment, flags | MEM_LARGE_PAGES); - if (large_only) return p; - // fall back to non-large page allocation on error (`p == NULL`). - if (p == NULL) { - mi_atomic_store_release(&large_page_try_ok,10UL); // on error, don't try again for the next N allocations - } - } - } - // Fall back to regular page allocation - if (p == NULL) { - *is_large = ((flags&MEM_LARGE_PAGES) != 0); - p = mi_win_virtual_allocx(addr, size, try_alignment, flags); - } - if (p == NULL) { - _mi_warning_message("unable to allocate OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x, large only: %d, allow large: %d)\n", size, GetLastError(), addr, try_alignment, flags, large_only, allow_large); - } - return p; -} - -/* ----------------------------------------------------------- - Raw allocation using `sbrk` or `wasm_memory_grow` --------------------------------------------------------------- */ - -#elif defined(MI_USE_SBRK) || defined(__wasi__) -#if defined(MI_USE_SBRK) - static void* mi_memory_grow( size_t size ) { - void* p = sbrk(size); - if (p == (void*)(-1)) return NULL; - #if !defined(__wasi__) // on wasi this is always zero initialized already (?) - memset(p,0,size); - #endif - return p; - } -#elif defined(__wasi__) - static void* mi_memory_grow( size_t size ) { - size_t base = (size > 0 ? __builtin_wasm_memory_grow(0,_mi_divide_up(size, _mi_os_page_size())) - : __builtin_wasm_memory_size(0)); - if (base == SIZE_MAX) return NULL; - return (void*)(base * _mi_os_page_size()); - } -#endif - -#if defined(MI_USE_PTHREADS) -static pthread_mutex_t mi_heap_grow_mutex = PTHREAD_MUTEX_INITIALIZER; -#endif - -static void* mi_heap_grow(size_t size, size_t try_alignment) { - void* p = NULL; - if (try_alignment <= 1) { - // `sbrk` is not thread safe in general so try to protect it (we could skip this on WASM but leave it in for now) - #if defined(MI_USE_PTHREADS) - pthread_mutex_lock(&mi_heap_grow_mutex); - #endif - p = mi_memory_grow(size); - #if defined(MI_USE_PTHREADS) - pthread_mutex_unlock(&mi_heap_grow_mutex); - #endif - } - else { - void* base = NULL; - size_t alloc_size = 0; - // to allocate aligned use a lock to try to avoid thread interaction - // between getting the current size and actual allocation - // (also, `sbrk` is not thread safe in general) - #if defined(MI_USE_PTHREADS) - pthread_mutex_lock(&mi_heap_grow_mutex); - #endif - { - void* current = mi_memory_grow(0); // get current size - if (current != NULL) { - void* aligned_current = mi_align_up_ptr(current, try_alignment); // and align from there to minimize wasted space - alloc_size = _mi_align_up( ((uint8_t*)aligned_current - (uint8_t*)current) + size, _mi_os_page_size()); - base = mi_memory_grow(alloc_size); - } - } - #if defined(MI_USE_PTHREADS) - pthread_mutex_unlock(&mi_heap_grow_mutex); - #endif - if (base != NULL) { - p = mi_align_up_ptr(base, try_alignment); - if ((uint8_t*)p + size > (uint8_t*)base + alloc_size) { - // another thread used wasm_memory_grow/sbrk in-between and we do not have enough - // space after alignment. Give up (and waste the space as we cannot shrink :-( ) - // (in `mi_os_mem_alloc_aligned` this will fall back to overallocation to align) - p = NULL; - } - } - } - if (p == NULL) { - _mi_warning_message("unable to allocate sbrk/wasm_memory_grow OS memory (%zu bytes, %zu alignment)\n", size, try_alignment); - errno = ENOMEM; - return NULL; - } - mi_assert_internal( try_alignment == 0 || (uintptr_t)p % try_alignment == 0 ); - return p; -} - -/* ----------------------------------------------------------- - Raw allocation on Unix's (mmap) --------------------------------------------------------------- */ -#else -#define MI_OS_USE_MMAP -static void* mi_unix_mmapx(void* addr, size_t size, size_t try_alignment, int protect_flags, int flags, int fd) { - MI_UNUSED(try_alignment); - #if defined(MAP_ALIGNED) // BSD - if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0) { - size_t n = mi_bsr(try_alignment); - if (((size_t)1 << n) == try_alignment && n >= 12 && n <= 30) { // alignment is a power of 2 and 4096 <= alignment <= 1GiB - flags |= MAP_ALIGNED(n); - void* p = mmap(addr, size, protect_flags, flags | MAP_ALIGNED(n), fd, 0); - if (p!=MAP_FAILED) return p; - // fall back to regular mmap - } - } - #elif defined(MAP_ALIGN) // Solaris - if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0) { - void* p = mmap((void*)try_alignment, size, protect_flags, flags | MAP_ALIGN, fd, 0); // addr parameter is the required alignment - if (p!=MAP_FAILED) return p; - // fall back to regular mmap - } - #endif - #if (MI_INTPTR_SIZE >= 8) && !defined(MAP_ALIGNED) - // on 64-bit systems, use the virtual address area after 2TiB for 4MiB aligned allocations - if (addr == NULL) { - void* hint = mi_os_get_aligned_hint(try_alignment, size); - if (hint != NULL) { - void* p = mmap(hint, size, protect_flags, flags, fd, 0); - if (p!=MAP_FAILED) return p; - // fall back to regular mmap - } - } - #endif - // regular mmap - void* p = mmap(addr, size, protect_flags, flags, fd, 0); - if (p!=MAP_FAILED) return p; - // failed to allocate - return NULL; -} - -static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int protect_flags, bool large_only, bool allow_large, bool* is_large) { - void* p = NULL; - #if !defined(MAP_ANONYMOUS) - #define MAP_ANONYMOUS MAP_ANON - #endif - #if !defined(MAP_NORESERVE) - #define MAP_NORESERVE 0 - #endif - int flags = MAP_PRIVATE | MAP_ANONYMOUS; - int fd = -1; - if (_mi_os_has_overcommit()) { - flags |= MAP_NORESERVE; - } - #if defined(PROT_MAX) - protect_flags |= PROT_MAX(PROT_READ | PROT_WRITE); // BSD - #endif - #if defined(VM_MAKE_TAG) - // macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99) - int os_tag = (int)mi_option_get(mi_option_os_tag); - if (os_tag < 100 || os_tag > 255) { os_tag = 100; } - fd = VM_MAKE_TAG(os_tag); - #endif - // huge page allocation - if ((large_only || use_large_os_page(size, try_alignment)) && allow_large) { - static _Atomic(size_t) large_page_try_ok; // = 0; - size_t try_ok = mi_atomic_load_acquire(&large_page_try_ok); - if (!large_only && try_ok > 0) { - // If the OS is not configured for large OS pages, or the user does not have - // enough permission, the `mmap` will always fail (but it might also fail for other reasons). - // Therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times - // to avoid too many failing calls to mmap. - mi_atomic_cas_strong_acq_rel(&large_page_try_ok, &try_ok, try_ok - 1); - } - else { - int lflags = flags & ~MAP_NORESERVE; // using NORESERVE on huge pages seems to fail on Linux - int lfd = fd; - #ifdef MAP_ALIGNED_SUPER - lflags |= MAP_ALIGNED_SUPER; - #endif - #ifdef MAP_HUGETLB - lflags |= MAP_HUGETLB; - #endif - #ifdef MAP_HUGE_1GB - static bool mi_huge_pages_available = true; - if ((size % MI_GiB) == 0 && mi_huge_pages_available) { - lflags |= MAP_HUGE_1GB; - } - else - #endif - { - #ifdef MAP_HUGE_2MB - lflags |= MAP_HUGE_2MB; - #endif - } - #ifdef VM_FLAGS_SUPERPAGE_SIZE_2MB - lfd |= VM_FLAGS_SUPERPAGE_SIZE_2MB; - #endif - if (large_only || lflags != flags) { - // try large OS page allocation - *is_large = true; - p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, lflags, lfd); - #ifdef MAP_HUGE_1GB - if (p == NULL && (lflags & MAP_HUGE_1GB) != 0) { - mi_huge_pages_available = false; // don't try huge 1GiB pages again - _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (error %i)\n", errno); - lflags = ((lflags & ~MAP_HUGE_1GB) | MAP_HUGE_2MB); - p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, lflags, lfd); - } - #endif - if (large_only) return p; - if (p == NULL) { - mi_atomic_store_release(&large_page_try_ok, (size_t)8); // on error, don't try again for the next N allocations - } - } - } - } - // regular allocation - if (p == NULL) { - *is_large = false; - p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, flags, fd); - if (p != NULL) { - #if defined(MADV_HUGEPAGE) - // Many Linux systems don't allow MAP_HUGETLB but they support instead - // transparent huge pages (THP). Generally, it is not required to call `madvise` with MADV_HUGE - // though since properly aligned allocations will already use large pages if available - // in that case -- in particular for our large regions (in `memory.c`). - // However, some systems only allow THP if called with explicit `madvise`, so - // when large OS pages are enabled for mimalloc, we call `madvise` anyways. - if (allow_large && use_large_os_page(size, try_alignment)) { - if (mi_madvise(p, size, MADV_HUGEPAGE) == 0) { - *is_large = true; // possibly - }; - } - #elif defined(__sun) - if (allow_large && use_large_os_page(size, try_alignment)) { - struct memcntl_mha cmd = {0}; - cmd.mha_pagesize = large_os_page_size; - cmd.mha_cmd = MHA_MAPSIZE_VA; - if (memcntl((caddr_t)p, size, MC_HAT_ADVISE, (caddr_t)&cmd, 0, 0) == 0) { - *is_large = true; - } - } - #endif - } - } - if (p == NULL) { - _mi_warning_message("unable to allocate OS memory (%zu bytes, error code: %i, address: %p, large only: %d, allow large: %d)\n", size, errno, addr, large_only, allow_large); - } - return p; -} -#endif - /* ----------------------------------------------------------- Primitive allocation from the OS. @@ -714,7 +169,7 @@ static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, boo if (!commit) allow_large = false; if (try_alignment == 0) try_alignment = 1; // avoid 0 to ensure there will be no divide by zero when aligning - void* p = NULL; + void* p = _mi_prim_alloc(size, try_alignment, commit, allow_large, is_large); /* if (commit && allow_large) { p = _mi_os_try_alloc_from_huge_reserved(size, try_alignment); @@ -725,18 +180,6 @@ static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, boo } */ - #if defined(_WIN32) - int flags = MEM_RESERVE; - if (commit) { flags |= MEM_COMMIT; } - p = mi_win_virtual_alloc(NULL, size, try_alignment, flags, false, allow_large, is_large); - #elif defined(MI_USE_SBRK) || defined(__wasi__) - MI_UNUSED(allow_large); - *is_large = false; - p = mi_heap_grow(size, try_alignment); - #else - int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE); - p = mi_unix_mmap(NULL, size, try_alignment, protect_flags, false, allow_large, is_large); - #endif mi_stat_counter_increase(stats->mmap_calls, 1); if (p != NULL) { _mi_stat_increase(&stats->reserved, size); @@ -762,40 +205,41 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, // if not aligned, free it, overallocate, and unmap around it if (((uintptr_t)p % alignment != 0)) { - mi_os_mem_free(p, size, commit, stats); + _mi_os_free_ex(p, size, commit, stats); _mi_warning_message("unable to allocate aligned OS memory directly, fall back to over-allocation (%zu bytes, address: %p, alignment: %zu, commit: %d)\n", size, p, alignment, commit); if (size >= (SIZE_MAX - alignment)) return NULL; // overflow const size_t over_size = size + alignment; -#if _WIN32 - // over-allocate uncommitted (virtual) memory - p = mi_os_mem_alloc(over_size, 0 /*alignment*/, false /* commit? */, false /* allow_large */, is_large, stats); - if (p == NULL) return NULL; + if (mi_os_mem_config.must_free_whole) { // win32 virtualAlloc cannot free parts of an allocate block + // over-allocate uncommitted (virtual) memory + p = mi_os_mem_alloc(over_size, 0 /*alignment*/, false /* commit? */, false /* allow_large */, is_large, stats); + if (p == NULL) return NULL; - // set p to the aligned part in the full region - // note: this is dangerous on Windows as VirtualFree needs the actual region pointer - // but in mi_os_mem_free we handle this (hopefully exceptional) situation. - p = mi_align_up_ptr(p, alignment); + // set p to the aligned part in the full region + // note: this is dangerous on Windows as VirtualFree needs the actual region pointer + // but in mi_os_mem_free we handle this (hopefully exceptional) situation. + p = mi_align_up_ptr(p, alignment); - // explicitly commit only the aligned part - if (commit) { - _mi_os_commit(p, size, NULL, stats); + // explicitly commit only the aligned part + if (commit) { + _mi_os_commit(p, size, NULL, stats); + } + } + else { // mmap can free inside an allocation + // overallocate... + p = mi_os_mem_alloc(over_size, 1, commit, false, is_large, stats); + if (p == NULL) return NULL; + // and selectively unmap parts around the over-allocated area. (noop on sbrk) + void* aligned_p = mi_align_up_ptr(p, alignment); + size_t pre_size = (uint8_t*)aligned_p - (uint8_t*)p; + size_t mid_size = _mi_align_up(size, _mi_os_page_size()); + size_t post_size = over_size - pre_size - mid_size; + mi_assert_internal(pre_size < over_size&& post_size < over_size&& mid_size >= size); + if (pre_size > 0) _mi_os_free_ex(p, pre_size, commit, stats); + if (post_size > 0) _mi_os_free_ex((uint8_t*)aligned_p + mid_size, post_size, commit, stats); + // we can return the aligned pointer on `mmap` (and sbrk) systems + p = aligned_p; } -#else - // overallocate... - p = mi_os_mem_alloc(over_size, 1, commit, false, is_large, stats); - if (p == NULL) return NULL; - // and selectively unmap parts around the over-allocated area. (noop on sbrk) - void* aligned_p = mi_align_up_ptr(p, alignment); - size_t pre_size = (uint8_t*)aligned_p - (uint8_t*)p; - size_t mid_size = _mi_align_up(size, _mi_os_page_size()); - size_t post_size = over_size - pre_size - mid_size; - mi_assert_internal(pre_size < over_size && post_size < over_size && mid_size >= size); - if (pre_size > 0) mi_os_mem_free(p, pre_size, commit, stats); - if (post_size > 0) mi_os_mem_free((uint8_t*)aligned_p + mid_size, post_size, commit, stats); - // we can return the aligned pointer on `mmap` (and sbrk) systems - p = aligned_p; -#endif } mi_assert_internal(p == NULL || (p != NULL && ((uintptr_t)p % alignment) == 0)); @@ -804,7 +248,7 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, /* ----------------------------------------------------------- - OS API: alloc, free, alloc_aligned + OS API: alloc and alloc_aligned ----------------------------------------------------------- */ void* _mi_os_alloc(size_t size, mi_stats_t* tld_stats) { @@ -816,21 +260,9 @@ void* _mi_os_alloc(size_t size, mi_stats_t* tld_stats) { return mi_os_mem_alloc(size, 0, true, false, &is_large, stats); } -void _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* tld_stats) { - MI_UNUSED(tld_stats); - mi_stats_t* stats = &_mi_stats_main; - if (size == 0 || p == NULL) return; - size = _mi_os_good_alloc_size(size); - mi_os_mem_free(p, size, was_committed, stats); -} - -void _mi_os_free(void* p, size_t size, mi_stats_t* stats) { - _mi_os_free_ex(p, size, true, stats); -} - void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_stats_t* tld_stats) { - MI_UNUSED(&mi_os_get_aligned_hint); // suppress unused warnings + MI_UNUSED(&_mi_os_get_aligned_hint); // suppress unused warnings MI_UNUSED(tld_stats); if (size == 0) return NULL; size = _mi_os_good_alloc_size(size); @@ -883,11 +315,11 @@ void _mi_os_free_aligned(void* p, size_t size, size_t alignment, size_t align_of _mi_os_free_ex(start, size + extra, was_committed, tld_stats); } + /* ----------------------------------------------------------- OS memory API: reset, commit, decommit, protect, unprotect. ----------------------------------------------------------- */ - // OS page align within a given area, either conservative (pages inside the area only), // or not (straddling pages outside the area is possible) static void* mi_os_page_align_areax(bool conservative, void* addr, size_t size, size_t* newsize) { @@ -912,18 +344,6 @@ static void* mi_os_page_align_area_conservative(void* addr, size_t size, size_t* return mi_os_page_align_areax(true, addr, size, newsize); } -static void mi_mprotect_hint(int err) { -#if defined(MI_OS_USE_MMAP) && (MI_SECURE>=2) // guard page around every mimalloc page - if (err == ENOMEM) { - _mi_warning_message("the previous warning may have been caused by a low memory map limit.\n" - " On Linux this is controlled by the vm.max_map_count. For example:\n" - " > sudo sysctl -w vm.max_map_count=262144\n"); - } -#else - MI_UNUSED(err); -#endif -} - // Commit/Decommit memory. // Usually commit is aligned liberal, while decommit is aligned conservative. // (but not for the reset version where we want commit to be conservative as well) @@ -933,7 +353,6 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ size_t csize; void* start = mi_os_page_align_areax(conservative, addr, size, &csize); if (csize == 0) return true; // || _mi_os_is_huge_reserved(addr)) - int err = 0; if (commit) { _mi_stat_increase(&stats->committed, size); // use size for precise commit vs. decommit _mi_stat_counter_increase(&stats->commit_calls, 1); @@ -942,56 +361,9 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ _mi_stat_decrease(&stats->committed, size); } - #if defined(_WIN32) - if (commit) { - // *is_zero = true; // note: if the memory was already committed, the call succeeds but the memory is not zero'd - void* p = VirtualAlloc(start, csize, MEM_COMMIT, PAGE_READWRITE); - err = (p == start ? 0 : GetLastError()); - } - else { - BOOL ok = VirtualFree(start, csize, MEM_DECOMMIT); - err = (ok ? 0 : GetLastError()); - } - #elif defined(__wasi__) - // WebAssembly guests can't control memory protection - #elif 0 && defined(MAP_FIXED) && !defined(__APPLE__) - // Linux: disabled for now as mmap fixed seems much more expensive than MADV_DONTNEED (and splits VMA's?) - if (commit) { - // commit: just change the protection - err = mprotect(start, csize, (PROT_READ | PROT_WRITE)); - if (err != 0) { err = errno; } - } - else { - // decommit: use mmap with MAP_FIXED to discard the existing memory (and reduce rss) - const int fd = mi_unix_mmap_fd(); - void* p = mmap(start, csize, PROT_NONE, (MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE), fd, 0); - if (p != start) { err = errno; } - } - #else - // Linux, macOSX and others. - if (commit) { - // commit: ensure we can access the area - err = mprotect(start, csize, (PROT_READ | PROT_WRITE)); - if (err != 0) { err = errno; } - } - else { - #if defined(MADV_DONTNEED) && MI_DEBUG == 0 && MI_SECURE == 0 - // decommit: use MADV_DONTNEED as it decreases rss immediately (unlike MADV_FREE) - // (on the other hand, MADV_FREE would be good enough.. it is just not reflected in the stats :-( ) - err = madvise(start, csize, MADV_DONTNEED); - #else - // decommit: just disable access (also used in debug and secure mode to trap on illegal access) - err = mprotect(start, csize, PROT_NONE); - if (err != 0) { err = errno; } - #endif - //#if defined(MADV_FREE_REUSE) - // while ((err = mi_madvise(start, csize, MADV_FREE_REUSE)) != 0 && errno == EAGAIN) { errno = 0; } - //#endif - } - #endif + int err = _mi_prim_commit(start, csize, commit); if (err != 0) { _mi_warning_message("%s error: start: %p, csize: 0x%zx, err: %i\n", commit ? "commit" : "decommit", start, csize, err); - mi_mprotect_hint(err); } mi_assert_internal(err == 0); return (err == 0); @@ -1033,39 +405,11 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) } #endif -#if defined(_WIN32) - // Testing shows that for us (on `malloc-large`) MEM_RESET is 2x faster than DiscardVirtualMemory - void* p = VirtualAlloc(start, csize, MEM_RESET, PAGE_READWRITE); - mi_assert_internal(p == start); - #if 1 - if (p == start && start != NULL) { - VirtualUnlock(start,csize); // VirtualUnlock after MEM_RESET removes the memory from the working set - } - #endif - if (p != start) return false; -#else -#if defined(MADV_FREE) - static _Atomic(size_t) advice = MI_ATOMIC_VAR_INIT(MADV_FREE); - int oadvice = (int)mi_atomic_load_relaxed(&advice); - int err; - while ((err = mi_madvise(start, csize, oadvice)) != 0 && errno == EAGAIN) { errno = 0; }; - if (err != 0 && errno == EINVAL && oadvice == MADV_FREE) { - // if MADV_FREE is not supported, fall back to MADV_DONTNEED from now on - mi_atomic_store_release(&advice, (size_t)MADV_DONTNEED); - err = mi_madvise(start, csize, MADV_DONTNEED); - } -#elif defined(__wasi__) - int err = 0; -#else - int err = mi_madvise(start, csize, MADV_DONTNEED); -#endif + int err = _mi_prim_reset(start, csize); if (err != 0) { - _mi_warning_message("madvise reset error: start: %p, csize: 0x%zx, errno: %i\n", start, csize, errno); + _mi_warning_message("madvise reset error: start: %p, csize: 0x%zx, errno: %i\n", start, csize, err); } - //mi_assert(err == 0); - if (err != 0) return false; -#endif - return true; + return (err == 0); } // Signal to the OS that the address range is no longer in use @@ -1097,20 +441,9 @@ static bool mi_os_protectx(void* addr, size_t size, bool protect) { _mi_warning_message("cannot mprotect memory allocated in huge OS pages\n"); } */ - int err = 0; -#ifdef _WIN32 - DWORD oldprotect = 0; - BOOL ok = VirtualProtect(start, csize, protect ? PAGE_NOACCESS : PAGE_READWRITE, &oldprotect); - err = (ok ? 0 : GetLastError()); -#elif defined(__wasi__) - err = 0; -#else - err = mprotect(start, csize, protect ? PROT_NONE : (PROT_READ | PROT_WRITE)); - if (err != 0) { err = errno; } -#endif + int err = _mi_prim_protect(start,csize,protect); if (err != 0) { _mi_warning_message("mprotect error: start: %p, csize: 0x%zx, err: %i\n", start, csize, err); - mi_mprotect_hint(err); } return (err == 0); } @@ -1125,115 +458,12 @@ bool _mi_os_unprotect(void* addr, size_t size) { -bool _mi_os_shrink(void* p, size_t oldsize, size_t newsize, mi_stats_t* stats) { - // page align conservatively within the range - mi_assert_internal(oldsize > newsize && p != NULL); - if (oldsize < newsize || p == NULL) return false; - if (oldsize == newsize) return true; - - // oldsize and newsize should be page aligned or we cannot shrink precisely - void* addr = (uint8_t*)p + newsize; - size_t size = 0; - void* start = mi_os_page_align_area_conservative(addr, oldsize - newsize, &size); - if (size == 0 || start != addr) return false; - -#ifdef _WIN32 - // we cannot shrink on windows, but we can decommit - return _mi_os_decommit(start, size, stats); -#else - return mi_os_mem_free(start, size, true, stats); -#endif -} - - /* ---------------------------------------------------------------------------- Support for allocating huge OS pages (1Gib) that are reserved up-front and possibly associated with a specific NUMA node. (use `numa_node>=0`) -----------------------------------------------------------------------------*/ #define MI_HUGE_OS_PAGE_SIZE (MI_GiB) -#if defined(_WIN32) && (MI_INTPTR_SIZE >= 8) -static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) -{ - mi_assert_internal(size%MI_GiB == 0); - mi_assert_internal(addr != NULL); - const DWORD flags = MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE; - - mi_win_enable_large_os_pages(); - - MI_MEM_EXTENDED_PARAMETER params[3] = { {{0,0},{0}},{{0,0},{0}},{{0,0},{0}} }; - // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages - static bool mi_huge_pages_available = true; - if (pNtAllocateVirtualMemoryEx != NULL && mi_huge_pages_available) { - params[0].Type.Type = MiMemExtendedParameterAttributeFlags; - params[0].Arg.ULong64 = MI_MEM_EXTENDED_PARAMETER_NONPAGED_HUGE; - ULONG param_count = 1; - if (numa_node >= 0) { - param_count++; - params[1].Type.Type = MiMemExtendedParameterNumaNode; - params[1].Arg.ULong = (unsigned)numa_node; - } - SIZE_T psize = size; - void* base = addr; - NTSTATUS err = (*pNtAllocateVirtualMemoryEx)(GetCurrentProcess(), &base, &psize, flags, PAGE_READWRITE, params, param_count); - if (err == 0 && base != NULL) { - return base; - } - else { - // fall back to regular large pages - mi_huge_pages_available = false; // don't try further huge pages - _mi_warning_message("unable to allocate using huge (1GiB) pages, trying large (2MiB) pages instead (status 0x%lx)\n", err); - } - } - // on modern Windows try use VirtualAlloc2 for numa aware large OS page allocation - if (pVirtualAlloc2 != NULL && numa_node >= 0) { - params[0].Type.Type = MiMemExtendedParameterNumaNode; - params[0].Arg.ULong = (unsigned)numa_node; - return (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, params, 1); - } - - // otherwise use regular virtual alloc on older windows - return VirtualAlloc(addr, size, flags, PAGE_READWRITE); -} - -#elif defined(MI_OS_USE_MMAP) && (MI_INTPTR_SIZE >= 8) && !defined(__HAIKU__) -#include -#ifndef MPOL_PREFERRED -#define MPOL_PREFERRED 1 -#endif -#if defined(SYS_mbind) -static long mi_os_mbind(void* start, unsigned long len, unsigned long mode, const unsigned long* nmask, unsigned long maxnode, unsigned flags) { - return syscall(SYS_mbind, start, len, mode, nmask, maxnode, flags); -} -#else -static long mi_os_mbind(void* start, unsigned long len, unsigned long mode, const unsigned long* nmask, unsigned long maxnode, unsigned flags) { - MI_UNUSED(start); MI_UNUSED(len); MI_UNUSED(mode); MI_UNUSED(nmask); MI_UNUSED(maxnode); MI_UNUSED(flags); - return 0; -} -#endif -static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) { - mi_assert_internal(size%MI_GiB == 0); - bool is_large = true; - void* p = mi_unix_mmap(addr, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large); - if (p == NULL) return NULL; - if (numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes - unsigned long numa_mask = (1UL << numa_node); - // TODO: does `mbind` work correctly for huge OS pages? should we - // use `set_mempolicy` before calling mmap instead? - // see: - long err = mi_os_mbind(p, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0); - if (err != 0) { - _mi_warning_message("failed to bind huge (1GiB) pages to numa node %d: %s\n", numa_node, strerror(errno)); - } - } - return p; -} -#else -static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) { - MI_UNUSED(addr); MI_UNUSED(size); MI_UNUSED(numa_node); - return NULL; -} -#endif #if (MI_INTPTR_SIZE >= 8) // To ensure proper alignment, use our own area for huge OS pages @@ -1252,10 +482,10 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) { if (start == 0) { // Initialize the start address after the 32TiB area start = ((uintptr_t)32 << 40); // 32TiB virtual start address -#if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of huge pages unless in debug mode + #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of huge pages unless in debug mode uintptr_t r = _mi_heap_random_next(mi_get_default_heap()); start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x0FFF)); // (randomly 12bits)*1GiB == between 0 to 4TiB -#endif + #endif } end = start + size; mi_assert_internal(end % MI_SEGMENT_SIZE == 0); @@ -1288,7 +518,7 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse for (page = 0; page < pages; page++) { // allocate a page void* addr = start + (page * MI_HUGE_OS_PAGE_SIZE); - void* p = mi_os_alloc_huge_os_pagesx(addr, MI_HUGE_OS_PAGE_SIZE, numa_node); + void* p = _mi_prim_alloc_huge_os_pages(addr, MI_HUGE_OS_PAGE_SIZE, numa_node); // Did we succeed at a contiguous address? if (p != addr) { @@ -1340,113 +570,6 @@ void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats) { /* ---------------------------------------------------------------------------- Support NUMA aware allocation -----------------------------------------------------------------------------*/ -#ifdef _WIN32 -static size_t mi_os_numa_nodex(void) { - USHORT numa_node = 0; - if (pGetCurrentProcessorNumberEx != NULL && pGetNumaProcessorNodeEx != NULL) { - // Extended API is supported - MI_PROCESSOR_NUMBER pnum; - (*pGetCurrentProcessorNumberEx)(&pnum); - USHORT nnode = 0; - BOOL ok = (*pGetNumaProcessorNodeEx)(&pnum, &nnode); - if (ok) { numa_node = nnode; } - } - else if (pGetNumaProcessorNode != NULL) { - // Vista or earlier, use older API that is limited to 64 processors. Issue #277 - DWORD pnum = GetCurrentProcessorNumber(); - UCHAR nnode = 0; - BOOL ok = pGetNumaProcessorNode((UCHAR)pnum, &nnode); - if (ok) { numa_node = nnode; } - } - return numa_node; -} - -static size_t mi_os_numa_node_countx(void) { - ULONG numa_max = 0; - GetNumaHighestNodeNumber(&numa_max); - // find the highest node number that has actual processors assigned to it. Issue #282 - while(numa_max > 0) { - if (pGetNumaNodeProcessorMaskEx != NULL) { - // Extended API is supported - GROUP_AFFINITY affinity; - if ((*pGetNumaNodeProcessorMaskEx)((USHORT)numa_max, &affinity)) { - if (affinity.Mask != 0) break; // found the maximum non-empty node - } - } - else { - // Vista or earlier, use older API that is limited to 64 processors. - ULONGLONG mask; - if (GetNumaNodeProcessorMask((UCHAR)numa_max, &mask)) { - if (mask != 0) break; // found the maximum non-empty node - }; - } - // max node was invalid or had no processor assigned, try again - numa_max--; - } - return ((size_t)numa_max + 1); -} -#elif defined(__linux__) -#include // getcpu -#include // access - -static size_t mi_os_numa_nodex(void) { -#ifdef SYS_getcpu - unsigned long node = 0; - unsigned long ncpu = 0; - long err = syscall(SYS_getcpu, &ncpu, &node, NULL); - if (err != 0) return 0; - return node; -#else - return 0; -#endif -} -static size_t mi_os_numa_node_countx(void) { - char buf[128]; - unsigned node = 0; - for(node = 0; node < 256; node++) { - // enumerate node entries -- todo: it there a more efficient way to do this? (but ensure there is no allocation) - snprintf(buf, 127, "/sys/devices/system/node/node%u", node + 1); - if (access(buf,R_OK) != 0) break; - } - return (node+1); -} -#elif defined(__FreeBSD__) && __FreeBSD_version >= 1200000 -static size_t mi_os_numa_nodex(void) { - domainset_t dom; - size_t node; - int policy; - if (cpuset_getdomain(CPU_LEVEL_CPUSET, CPU_WHICH_PID, -1, sizeof(dom), &dom, &policy) == -1) return 0ul; - for (node = 0; node < MAXMEMDOM; node++) { - if (DOMAINSET_ISSET(node, &dom)) return node; - } - return 0ul; -} -static size_t mi_os_numa_node_countx(void) { - size_t ndomains = 0; - size_t len = sizeof(ndomains); - if (sysctlbyname("vm.ndomains", &ndomains, &len, NULL, 0) == -1) return 0ul; - return ndomains; -} -#elif defined(__DragonFly__) -static size_t mi_os_numa_nodex(void) { - // TODO: DragonFly does not seem to provide any userland means to get this information. - return 0ul; -} -static size_t mi_os_numa_node_countx(void) { - size_t ncpus = 0, nvirtcoresperphys = 0; - size_t len = sizeof(size_t); - if (sysctlbyname("hw.ncpu", &ncpus, &len, NULL, 0) == -1) return 0ul; - if (sysctlbyname("hw.cpu_topology_ht_ids", &nvirtcoresperphys, &len, NULL, 0) == -1) return 0ul; - return nvirtcoresperphys * ncpus; -} -#else -static size_t mi_os_numa_nodex(void) { - return 0; -} -static size_t mi_os_numa_node_countx(void) { - return 1; -} -#endif _Atomic(size_t) _mi_numa_node_count; // = 0 // cache the node count @@ -1458,7 +581,7 @@ size_t _mi_os_numa_node_count_get(void) { count = (size_t)ncount; } else { - count = mi_os_numa_node_countx(); // or detect dynamically + count = _mi_prim_numa_node_count(); // or detect dynamically if (count == 0) count = 1; } mi_atomic_store_release(&_mi_numa_node_count, count); // save it @@ -1472,7 +595,7 @@ int _mi_os_numa_node_get(mi_os_tld_t* tld) { size_t numa_count = _mi_os_numa_node_count(); if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0 // never more than the node count and >= 0 - size_t numa_node = mi_os_numa_nodex(); + size_t numa_node = _mi_prim_numa_node(); if (numa_node >= numa_count) { numa_node = numa_node % numa_count; } return (int)numa_node; } diff --git a/src/prim/prim-unix.c b/src/prim/prim-unix.c new file mode 100644 index 00000000..fdbf8e9e --- /dev/null +++ b/src/prim/prim-unix.c @@ -0,0 +1,483 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +#ifndef _DEFAULT_SOURCE +#define _DEFAULT_SOURCE // ensure mmap flags are defined +#endif + +#if defined(__sun) +// illumos provides new mman.h api when any of these are defined +// otherwise the old api based on caddr_t which predates the void pointers one. +// stock solaris provides only the former, chose to atomically to discard those +// flags only here rather than project wide tough. +#undef _XOPEN_SOURCE +#undef _POSIX_C_SOURCE +#endif + +#include "mimalloc.h" +#include "mimalloc-internal.h" +#include "mimalloc-atomic.h" +#include "prim.h" + +#include // mmap +#include // sysconf + +#if defined(__linux__) + #include + #include + #if defined(__GLIBC__) + #include // linux mmap flags + #else + #include + #endif +#elif defined(__APPLE__) + #include + #if !TARGET_IOS_IPHONE && !TARGET_IOS_SIMULATOR + #include + #endif +#elif defined(__FreeBSD__) || defined(__DragonFly__) + #include + #if __FreeBSD_version >= 1200000 + #include + #include + #endif + #include +#endif + +//--------------------------------------------- +// init +//--------------------------------------------- + +static bool unix_detect_overcommit(void) { + bool os_overcommit = true; +#if defined(__linux__) + int fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY); + if (fd >= 0) { + char buf[32]; + ssize_t nread = read(fd, &buf, sizeof(buf)); + close(fd); + // + // 0: heuristic overcommit, 1: always overcommit, 2: never overcommit (ignore NORESERVE) + if (nread >= 1) { + os_overcommit = (buf[0] == '0' || buf[0] == '1'); + } + } +#elif defined(__FreeBSD__) + int val = 0; + size_t olen = sizeof(val); + if (sysctlbyname("vm.overcommit", &val, &olen, NULL, 0) == 0) { + os_overcommit = (val != 0); + } +#else + // default: overcommit is true +#endif + return os_overcommit; +} + +void _mi_prim_mem_init( mi_os_mem_config_t* config ) { + long psize = sysconf(_SC_PAGESIZE); + if (psize > 0) { + config->page_size = (size_t)psize; + config->alloc_granularity = (size_t)psize; + } + config->large_page_size = 2*MI_MiB; // TODO: can we query the OS for this? + config->has_overcommit = unix_detect_overcommit(); + config->must_free_whole = false; // mmap can free in parts +} + + +//--------------------------------------------- +// free +//--------------------------------------------- + +void _mi_prim_free(void* addr, size_t size ) { + bool err = (munmap(addr, size) == -1); + if (err) { + _mi_warning_message("unable to release OS memory: %s, addr: %p, size: %zu\n", strerror(errno), addr, size); + } +} + + +//--------------------------------------------- +// mmap +//--------------------------------------------- + +static int unix_madvise(void* addr, size_t size, int advice) { + #if defined(__sun) + return madvise((caddr_t)addr, size, advice); // Solaris needs cast (issue #520) + #else + return madvise(addr, size, advice); + #endif +} + +static void* unix_mmap_prim(void* addr, size_t size, size_t try_alignment, int protect_flags, int flags, int fd) { + MI_UNUSED(try_alignment); + #if defined(MAP_ALIGNED) // BSD + if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0) { + size_t n = mi_bsr(try_alignment); + if (((size_t)1 << n) == try_alignment && n >= 12 && n <= 30) { // alignment is a power of 2 and 4096 <= alignment <= 1GiB + flags |= MAP_ALIGNED(n); + void* p = mmap(addr, size, protect_flags, flags | MAP_ALIGNED(n), fd, 0); + if (p!=MAP_FAILED) return p; + // fall back to regular mmap + } + } + #elif defined(MAP_ALIGN) // Solaris + if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0) { + void* p = mmap((void*)try_alignment, size, protect_flags, flags | MAP_ALIGN, fd, 0); // addr parameter is the required alignment + if (p!=MAP_FAILED) return p; + // fall back to regular mmap + } + #endif + #if (MI_INTPTR_SIZE >= 8) && !defined(MAP_ALIGNED) + // on 64-bit systems, use the virtual address area after 2TiB for 4MiB aligned allocations + if (addr == NULL) { + void* hint = _mi_os_get_aligned_hint(try_alignment, size); + if (hint != NULL) { + void* p = mmap(hint, size, protect_flags, flags, fd, 0); + if (p!=MAP_FAILED) return p; + // fall back to regular mmap + } + } + #endif + // regular mmap + void* p = mmap(addr, size, protect_flags, flags, fd, 0); + if (p!=MAP_FAILED) return p; + // failed to allocate + return NULL; +} + +static void* unix_mmap(void* addr, size_t size, size_t try_alignment, int protect_flags, bool large_only, bool allow_large, bool* is_large) { + void* p = NULL; + #if !defined(MAP_ANONYMOUS) + #define MAP_ANONYMOUS MAP_ANON + #endif + #if !defined(MAP_NORESERVE) + #define MAP_NORESERVE 0 + #endif + int flags = MAP_PRIVATE | MAP_ANONYMOUS; + int fd = -1; + if (_mi_os_has_overcommit()) { + flags |= MAP_NORESERVE; + } + #if defined(PROT_MAX) + protect_flags |= PROT_MAX(PROT_READ | PROT_WRITE); // BSD + #endif + #if defined(VM_MAKE_TAG) + // macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99) + int os_tag = (int)mi_option_get(mi_option_os_tag); + if (os_tag < 100 || os_tag > 255) { os_tag = 100; } + fd = VM_MAKE_TAG(os_tag); + #endif + // huge page allocation + if ((large_only || _mi_os_use_large_page(size, try_alignment)) && allow_large) { + static _Atomic(size_t) large_page_try_ok; // = 0; + size_t try_ok = mi_atomic_load_acquire(&large_page_try_ok); + if (!large_only && try_ok > 0) { + // If the OS is not configured for large OS pages, or the user does not have + // enough permission, the `mmap` will always fail (but it might also fail for other reasons). + // Therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times + // to avoid too many failing calls to mmap. + mi_atomic_cas_strong_acq_rel(&large_page_try_ok, &try_ok, try_ok - 1); + } + else { + int lflags = flags & ~MAP_NORESERVE; // using NORESERVE on huge pages seems to fail on Linux + int lfd = fd; + #ifdef MAP_ALIGNED_SUPER + lflags |= MAP_ALIGNED_SUPER; + #endif + #ifdef MAP_HUGETLB + lflags |= MAP_HUGETLB; + #endif + #ifdef MAP_HUGE_1GB + static bool mi_huge_pages_available = true; + if ((size % MI_GiB) == 0 && mi_huge_pages_available) { + lflags |= MAP_HUGE_1GB; + } + else + #endif + { + #ifdef MAP_HUGE_2MB + lflags |= MAP_HUGE_2MB; + #endif + } + #ifdef VM_FLAGS_SUPERPAGE_SIZE_2MB + lfd |= VM_FLAGS_SUPERPAGE_SIZE_2MB; + #endif + if (large_only || lflags != flags) { + // try large OS page allocation + *is_large = true; + p = unix_mmap_prim(addr, size, try_alignment, protect_flags, lflags, lfd); + #ifdef MAP_HUGE_1GB + if (p == NULL && (lflags & MAP_HUGE_1GB) != 0) { + mi_huge_pages_available = false; // don't try huge 1GiB pages again + _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (error %i)\n", errno); + lflags = ((lflags & ~MAP_HUGE_1GB) | MAP_HUGE_2MB); + p = unix_mmap_prim(addr, size, try_alignment, protect_flags, lflags, lfd); + } + #endif + if (large_only) return p; + if (p == NULL) { + mi_atomic_store_release(&large_page_try_ok, (size_t)8); // on error, don't try again for the next N allocations + } + } + } + } + // regular allocation + if (p == NULL) { + *is_large = false; + p = unix_mmap_prim(addr, size, try_alignment, protect_flags, flags, fd); + if (p != NULL) { + #if defined(MADV_HUGEPAGE) + // Many Linux systems don't allow MAP_HUGETLB but they support instead + // transparent huge pages (THP). Generally, it is not required to call `madvise` with MADV_HUGE + // though since properly aligned allocations will already use large pages if available + // in that case -- in particular for our large regions (in `memory.c`). + // However, some systems only allow THP if called with explicit `madvise`, so + // when large OS pages are enabled for mimalloc, we call `madvise` anyways. + if (allow_large && _mi_os_use_large_page(size, try_alignment)) { + if (unix_madvise(p, size, MADV_HUGEPAGE) == 0) { + *is_large = true; // possibly + }; + } + #elif defined(__sun) + if (allow_large && _mi_os_use_large_page(size, try_alignment)) { + struct memcntl_mha cmd = {0}; + cmd.mha_pagesize = large_os_page_size; + cmd.mha_cmd = MHA_MAPSIZE_VA; + if (memcntl((caddr_t)p, size, MC_HAT_ADVISE, (caddr_t)&cmd, 0, 0) == 0) { + *is_large = true; + } + } + #endif + } + } + if (p == NULL) { + _mi_warning_message("unable to allocate OS memory (%zu bytes, error code: %i, address: %p, large only: %d, allow large: %d)\n", size, errno, addr, large_only, allow_large); + } + return p; +} + +// Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned. +void* _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large) { + mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0); + mi_assert_internal(commit || !allow_large); + mi_assert_internal(try_alignment > 0); + + int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE); + return unix_mmap(NULL, size, try_alignment, protect_flags, false, allow_large, is_large); +} + + +//--------------------------------------------- +// Commit/Reset +//--------------------------------------------- + +static void unix_mprotect_hint(int err) { + #if defined(__linux__) && (MI_SECURE>=2) // guard page around every mimalloc page + if (err == ENOMEM) { + _mi_warning_message("The next warning may be caused by a low memory map limit.\n" + " On Linux this is controlled by the vm.max_map_count -- maybe increase it?\n" + " For example: sudo sysctl -w vm.max_map_count=262144\n"); + } + #else + MI_UNUSED(err); + #endif +} + + +int _mi_prim_commit(void* start, size_t size, bool commit) { + /* + #if 0 && defined(MAP_FIXED) && !defined(__APPLE__) + // Linux: disabled for now as mmap fixed seems much more expensive than MADV_DONTNEED (and splits VMA's?) + if (commit) { + // commit: just change the protection + err = mprotect(start, csize, (PROT_READ | PROT_WRITE)); + if (err != 0) { err = errno; } + } + else { + // decommit: use mmap with MAP_FIXED to discard the existing memory (and reduce rss) + const int fd = mi_unix_mmap_fd(); + void* p = mmap(start, csize, PROT_NONE, (MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE), fd, 0); + if (p != start) { err = errno; } + } + #else + */ + int err = 0; + if (commit) { + // commit: ensure we can access the area + err = mprotect(start, size, (PROT_READ | PROT_WRITE)); + if (err != 0) { err = errno; } + } + else { + #if defined(MADV_DONTNEED) && MI_DEBUG == 0 && MI_SECURE == 0 + // decommit: use MADV_DONTNEED as it decreases rss immediately (unlike MADV_FREE) + // (on the other hand, MADV_FREE would be good enough.. it is just not reflected in the stats :-( ) + err = unix_madvise(start, size, MADV_DONTNEED); + #else + // decommit: just disable access (also used in debug and secure mode to trap on illegal access) + err = mprotect(start, size, PROT_NONE); + if (err != 0) { err = errno; } + #endif + } + unix_mprotect_hint(err); + return err; +} + +int _mi_prim_reset(void* start, size_t size) { + #if defined(MADV_FREE) + static _Atomic(size_t) advice = MI_ATOMIC_VAR_INIT(MADV_FREE); + int oadvice = (int)mi_atomic_load_relaxed(&advice); + int err; + while ((err = unix_madvise(start, size, oadvice)) != 0 && errno == EAGAIN) { errno = 0; }; + if (err != 0 && errno == EINVAL && oadvice == MADV_FREE) { + // if MADV_FREE is not supported, fall back to MADV_DONTNEED from now on + mi_atomic_store_release(&advice, (size_t)MADV_DONTNEED); + err = unix_madvise(start, size, MADV_DONTNEED); + } + #else + int err = unix_madvise(start, csize, MADV_DONTNEED); + #endif + return err; +} + +int _mi_prim_protect(void* start, size_t size, bool protect) { + int err = mprotect(start, size, protect ? PROT_NONE : (PROT_READ | PROT_WRITE)); + if (err != 0) { err = errno; } + unix_mprotect_hint(err); + return err; +} + + + +//--------------------------------------------- +// Huge page allocation +//--------------------------------------------- + +#if (MI_INTPTR_SIZE >= 8) && !defined(__HAIKU__) + +#include + +#ifndef MPOL_PREFERRED +#define MPOL_PREFERRED 1 +#endif + +#if defined(SYS_mbind) +static long mi_prim_mbind(void* start, unsigned long len, unsigned long mode, const unsigned long* nmask, unsigned long maxnode, unsigned flags) { + return syscall(SYS_mbind, start, len, mode, nmask, maxnode, flags); +} +#else +static long mi_prim_mbind(void* start, unsigned long len, unsigned long mode, const unsigned long* nmask, unsigned long maxnode, unsigned flags) { + MI_UNUSED(start); MI_UNUSED(len); MI_UNUSED(mode); MI_UNUSED(nmask); MI_UNUSED(maxnode); MI_UNUSED(flags); + return 0; +} +#endif + +void* _mi_prim_alloc_huge_os_pages(void* addr, size_t size, int numa_node) { + bool is_large = true; + void* p = unix_mmap(addr, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large); + if (p == NULL) return NULL; + if (numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes + unsigned long numa_mask = (1UL << numa_node); + // TODO: does `mbind` work correctly for huge OS pages? should we + // use `set_mempolicy` before calling mmap instead? + // see: + long err = mi_prim_mbind(p, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0); + if (err != 0) { + _mi_warning_message("failed to bind huge (1GiB) pages to numa node %d: %s\n", numa_node, strerror(errno)); + } + } + return p; +} + +#else + +void* _mi_prim_alloc_huge_os_pages(void* addr, size_t size, int numa_node) { + MI_UNUSED(addr); MI_UNUSED(size); MI_UNUSED(numa_node); + return NULL; +} + +#endif + +//--------------------------------------------- +// NUMA nodes +//--------------------------------------------- + +#if defined(__linux__) + +#include // getcpu +#include // access + +size_t _mi_prim_numa_node(void) { + #ifdef SYS_getcpu + unsigned long node = 0; + unsigned long ncpu = 0; + long err = syscall(SYS_getcpu, &ncpu, &node, NULL); + if (err != 0) return 0; + return node; + #else + return 0; + #endif +} + +size_t _mi_prim_numa_node_count(void) { + char buf[128]; + unsigned node = 0; + for(node = 0; node < 256; node++) { + // enumerate node entries -- todo: it there a more efficient way to do this? (but ensure there is no allocation) + snprintf(buf, 127, "/sys/devices/system/node/node%u", node + 1); + if (access(buf,R_OK) != 0) break; + } + return (node+1); +} + +#elif defined(__FreeBSD__) && __FreeBSD_version >= 1200000 + +size_t mi_prim_numa_node(void) { + domainset_t dom; + size_t node; + int policy; + if (cpuset_getdomain(CPU_LEVEL_CPUSET, CPU_WHICH_PID, -1, sizeof(dom), &dom, &policy) == -1) return 0ul; + for (node = 0; node < MAXMEMDOM; node++) { + if (DOMAINSET_ISSET(node, &dom)) return node; + } + return 0ul; +} + +size_t _mi_prim_numa_node_count(void) { + size_t ndomains = 0; + size_t len = sizeof(ndomains); + if (sysctlbyname("vm.ndomains", &ndomains, &len, NULL, 0) == -1) return 0ul; + return ndomains; +} + +#elif defined(__DragonFly__) + +size_t _mi_prim_numa_node(void) { + // TODO: DragonFly does not seem to provide any userland means to get this information. + return 0ul; +} + +size_t _mi_prim_numa_node_count(void) { + size_t ncpus = 0, nvirtcoresperphys = 0; + size_t len = sizeof(size_t); + if (sysctlbyname("hw.ncpu", &ncpus, &len, NULL, 0) == -1) return 0ul; + if (sysctlbyname("hw.cpu_topology_ht_ids", &nvirtcoresperphys, &len, NULL, 0) == -1) return 0ul; + return nvirtcoresperphys * ncpus; +} + +#else + +size_t _mi_prim_numa_node(void) { + return 0; +} + +size_t _mi_prim_numa_node_count(void) { + return 1; +} + +#endif diff --git a/src/prim/prim-wasi.c b/src/prim/prim-wasi.c new file mode 100644 index 00000000..431113e3 --- /dev/null +++ b/src/prim/prim-wasi.c @@ -0,0 +1,154 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +#include "mimalloc.h" +#include "mimalloc-internal.h" +#include "mimalloc-atomic.h" +#include "prim.h" + +//--------------------------------------------- +// Initialize +//--------------------------------------------- + +void _mi_prim_mem_init( mi_os_mem_config_t* config ) { + config->page_size = 64*MI_KiB; // WebAssembly has a fixed page size: 64KiB + config->alloc_granularity = 16; + config->has_overcommit = false; + config->must_free_whole = true; +} + +//--------------------------------------------- +// Free +//--------------------------------------------- + +void _mi_prim_free(void* addr, size_t size ) { + MI_UNUSED(addr); MI_UNUSED(size); + // wasi heap cannot be shrunk +} + + +//--------------------------------------------- +// Allocation: sbrk or memory_grow +//--------------------------------------------- + +#if defined(MI_USE_SBRK) + static void* mi_memory_grow( size_t size ) { + void* p = sbrk(size); + if (p == (void*)(-1)) return NULL; + #if !defined(__wasi__) // on wasi this is always zero initialized already (?) + memset(p,0,size); + #endif + return p; + } +#elif defined(__wasi__) + static void* mi_memory_grow( size_t size ) { + size_t base = (size > 0 ? __builtin_wasm_memory_grow(0,_mi_divide_up(size, _mi_os_page_size())) + : __builtin_wasm_memory_size(0)); + if (base == SIZE_MAX) return NULL; + return (void*)(base * _mi_os_page_size()); + } +#endif + +#if defined(MI_USE_PTHREADS) +static pthread_mutex_t mi_heap_grow_mutex = PTHREAD_MUTEX_INITIALIZER; +#endif + +static void* mi_prim_mem_grow(size_t size, size_t try_alignment) { + void* p = NULL; + if (try_alignment <= 1) { + // `sbrk` is not thread safe in general so try to protect it (we could skip this on WASM but leave it in for now) + #if defined(MI_USE_PTHREADS) + pthread_mutex_lock(&mi_heap_grow_mutex); + #endif + p = mi_memory_grow(size); + #if defined(MI_USE_PTHREADS) + pthread_mutex_unlock(&mi_heap_grow_mutex); + #endif + } + else { + void* base = NULL; + size_t alloc_size = 0; + // to allocate aligned use a lock to try to avoid thread interaction + // between getting the current size and actual allocation + // (also, `sbrk` is not thread safe in general) + #if defined(MI_USE_PTHREADS) + pthread_mutex_lock(&mi_heap_grow_mutex); + #endif + { + void* current = mi_memory_grow(0); // get current size + if (current != NULL) { + void* aligned_current = mi_align_up_ptr(current, try_alignment); // and align from there to minimize wasted space + alloc_size = _mi_align_up( ((uint8_t*)aligned_current - (uint8_t*)current) + size, _mi_os_page_size()); + base = mi_memory_grow(alloc_size); + } + } + #if defined(MI_USE_PTHREADS) + pthread_mutex_unlock(&mi_heap_grow_mutex); + #endif + if (base != NULL) { + p = mi_align_up_ptr(base, try_alignment); + if ((uint8_t*)p + size > (uint8_t*)base + alloc_size) { + // another thread used wasm_memory_grow/sbrk in-between and we do not have enough + // space after alignment. Give up (and waste the space as we cannot shrink :-( ) + // (in `mi_os_mem_alloc_aligned` this will fall back to overallocation to align) + p = NULL; + } + } + } + if (p == NULL) { + _mi_warning_message("unable to allocate sbrk/wasm_memory_grow OS memory (%zu bytes, %zu alignment)\n", size, try_alignment); + errno = ENOMEM; + return NULL; + } + mi_assert_internal( try_alignment == 0 || (uintptr_t)p % try_alignment == 0 ); + return p; +} + +// Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned. +void* _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large) { + MI_UNUSED(allow_large); + *is_large = false; + return mi_prim_mem_grow(size, try_alignment); +} + + +//--------------------------------------------- +// Commit/Reset/Protect +//--------------------------------------------- + +int _mi_prim_commit(void* addr, size_t size, bool commit) { + MI_UNUSED(addr); MI_UNUSED(size); MI_UNUSED(commit); + return 0; +} + +int _mi_prim_reset(void* addr, size_t size) { + MI_UNUSED(addr); MI_UNUSED(size); + return 0; +} + +int _mi_prim_protect(void* addr, size_t size, bool protect) { + MI_UNUSED(addr); MI_UNUSED(size); MI_UNUSED(protect); + return 0; +} + + +//--------------------------------------------- +// Huge pages and NUMA nodes +//--------------------------------------------- + +void* _mi_prim_alloc_huge_os_pages(void* addr, size_t size, int numa_node) { + MI_UNUSED(addr); MI_UNUSED(size); MI_UNUSED(numa_node); + return NULL; +} + +size_t _mi_prim_numa_node(void) { + return 0; +} + +size_t _mi_prim_numa_node_count(void) { + return 1; +} diff --git a/src/prim/prim-windows.c b/src/prim/prim-windows.c new file mode 100644 index 00000000..44ce9e8e --- /dev/null +++ b/src/prim/prim-windows.c @@ -0,0 +1,385 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +#include "mimalloc.h" +#include "mimalloc-internal.h" +#include "mimalloc-atomic.h" +#include "prim.h" +#include // strerror + +#ifdef _MSC_VER +#pragma warning(disable:4996) // strerror +#endif + +//--------------------------------------------- +// Dynamically bind Windows API points for portability +//--------------------------------------------- + +// We use VirtualAlloc2 for aligned allocation, but it is only supported on Windows 10 and Windows Server 2016. +// So, we need to look it up dynamically to run on older systems. (use __stdcall for 32-bit compatibility) +// NtAllocateVirtualAllocEx is used for huge OS page allocation (1GiB) +// We define a minimal MEM_EXTENDED_PARAMETER ourselves in order to be able to compile with older SDK's. +typedef enum MI_MEM_EXTENDED_PARAMETER_TYPE_E { + MiMemExtendedParameterInvalidType = 0, + MiMemExtendedParameterAddressRequirements, + MiMemExtendedParameterNumaNode, + MiMemExtendedParameterPartitionHandle, + MiMemExtendedParameterUserPhysicalHandle, + MiMemExtendedParameterAttributeFlags, + MiMemExtendedParameterMax +} MI_MEM_EXTENDED_PARAMETER_TYPE; + +typedef struct DECLSPEC_ALIGN(8) MI_MEM_EXTENDED_PARAMETER_S { + struct { DWORD64 Type : 8; DWORD64 Reserved : 56; } Type; + union { DWORD64 ULong64; PVOID Pointer; SIZE_T Size; HANDLE Handle; DWORD ULong; } Arg; +} MI_MEM_EXTENDED_PARAMETER; + +typedef struct MI_MEM_ADDRESS_REQUIREMENTS_S { + PVOID LowestStartingAddress; + PVOID HighestEndingAddress; + SIZE_T Alignment; +} MI_MEM_ADDRESS_REQUIREMENTS; + +#define MI_MEM_EXTENDED_PARAMETER_NONPAGED_HUGE 0x00000010 + +#include +typedef PVOID (__stdcall *PVirtualAlloc2)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, MI_MEM_EXTENDED_PARAMETER*, ULONG); +typedef NTSTATUS (__stdcall *PNtAllocateVirtualMemoryEx)(HANDLE, PVOID*, SIZE_T*, ULONG, ULONG, MI_MEM_EXTENDED_PARAMETER*, ULONG); +static PVirtualAlloc2 pVirtualAlloc2 = NULL; +static PNtAllocateVirtualMemoryEx pNtAllocateVirtualMemoryEx = NULL; + +// Similarly, GetNumaProcesorNodeEx is only supported since Windows 7 +typedef struct MI_PROCESSOR_NUMBER_S { WORD Group; BYTE Number; BYTE Reserved; } MI_PROCESSOR_NUMBER; + +typedef VOID (__stdcall *PGetCurrentProcessorNumberEx)(MI_PROCESSOR_NUMBER* ProcNumber); +typedef BOOL (__stdcall *PGetNumaProcessorNodeEx)(MI_PROCESSOR_NUMBER* Processor, PUSHORT NodeNumber); +typedef BOOL (__stdcall* PGetNumaNodeProcessorMaskEx)(USHORT Node, PGROUP_AFFINITY ProcessorMask); +typedef BOOL (__stdcall *PGetNumaProcessorNode)(UCHAR Processor, PUCHAR NodeNumber); +static PGetCurrentProcessorNumberEx pGetCurrentProcessorNumberEx = NULL; +static PGetNumaProcessorNodeEx pGetNumaProcessorNodeEx = NULL; +static PGetNumaNodeProcessorMaskEx pGetNumaNodeProcessorMaskEx = NULL; +static PGetNumaProcessorNode pGetNumaProcessorNode = NULL; + +//--------------------------------------------- +// Enable large page support dynamically (if possible) +//--------------------------------------------- + +static bool win_enable_large_os_pages(size_t* large_page_size) +{ + static bool large_initialized = false; + if (large_initialized) return (_mi_os_large_page_size() > 0); + large_initialized = true; + + // Try to see if large OS pages are supported + // To use large pages on Windows, we first need access permission + // Set "Lock pages in memory" permission in the group policy editor + // + unsigned long err = 0; + HANDLE token = NULL; + BOOL ok = OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &token); + if (ok) { + TOKEN_PRIVILEGES tp; + ok = LookupPrivilegeValue(NULL, TEXT("SeLockMemoryPrivilege"), &tp.Privileges[0].Luid); + if (ok) { + tp.PrivilegeCount = 1; + tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; + ok = AdjustTokenPrivileges(token, FALSE, &tp, 0, (PTOKEN_PRIVILEGES)NULL, 0); + if (ok) { + err = GetLastError(); + ok = (err == ERROR_SUCCESS); + if (ok && large_page_size != NULL) { + *large_page_size = GetLargePageMinimum(); + } + } + } + CloseHandle(token); + } + if (!ok) { + if (err == 0) err = GetLastError(); + _mi_warning_message("cannot enable large OS page support, error %lu\n", err); + } + return (ok!=0); +} + + +//--------------------------------------------- +// Initialize +//--------------------------------------------- + +void _mi_prim_mem_init( mi_os_mem_config_t* config ) +{ + config->has_overcommit = false; + config->must_free_whole = true; + // get the page size + SYSTEM_INFO si; + GetSystemInfo(&si); + if (si.dwPageSize > 0) { config->page_size = si.dwPageSize; } + if (si.dwAllocationGranularity > 0) { config->alloc_granularity = si.dwAllocationGranularity; } + // get the VirtualAlloc2 function + HINSTANCE hDll; + hDll = LoadLibrary(TEXT("kernelbase.dll")); + if (hDll != NULL) { + // use VirtualAlloc2FromApp if possible as it is available to Windows store apps + pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2FromApp"); + if (pVirtualAlloc2==NULL) pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2"); + FreeLibrary(hDll); + } + // NtAllocateVirtualMemoryEx is used for huge page allocation + hDll = LoadLibrary(TEXT("ntdll.dll")); + if (hDll != NULL) { + pNtAllocateVirtualMemoryEx = (PNtAllocateVirtualMemoryEx)(void (*)(void))GetProcAddress(hDll, "NtAllocateVirtualMemoryEx"); + FreeLibrary(hDll); + } + // Try to use Win7+ numa API + hDll = LoadLibrary(TEXT("kernel32.dll")); + if (hDll != NULL) { + pGetCurrentProcessorNumberEx = (PGetCurrentProcessorNumberEx)(void (*)(void))GetProcAddress(hDll, "GetCurrentProcessorNumberEx"); + pGetNumaProcessorNodeEx = (PGetNumaProcessorNodeEx)(void (*)(void))GetProcAddress(hDll, "GetNumaProcessorNodeEx"); + pGetNumaNodeProcessorMaskEx = (PGetNumaNodeProcessorMaskEx)(void (*)(void))GetProcAddress(hDll, "GetNumaNodeProcessorMaskEx"); + pGetNumaProcessorNode = (PGetNumaProcessorNode)(void (*)(void))GetProcAddress(hDll, "GetNumaProcessorNode"); + FreeLibrary(hDll); + } + if (mi_option_is_enabled(mi_option_large_os_pages) || mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { + win_enable_large_os_pages(&config->large_page_size); + } +} + + +//--------------------------------------------- +// Free +//--------------------------------------------- + +void _mi_prim_free(void* addr, size_t size ) { + DWORD errcode = 0; + bool err = (VirtualFree(addr, 0, MEM_RELEASE) == 0); + if (err) { errcode = GetLastError(); } + if (errcode == ERROR_INVALID_ADDRESS) { + // In mi_os_mem_alloc_aligned the fallback path may have returned a pointer inside + // the memory region returned by VirtualAlloc; in that case we need to free using + // the start of the region. + MEMORY_BASIC_INFORMATION info = { 0 }; + VirtualQuery(addr, &info, sizeof(info)); + if (info.AllocationBase < addr && ((uint8_t*)addr - (uint8_t*)info.AllocationBase) < (ptrdiff_t)MI_SEGMENT_SIZE) { + errcode = 0; + err = (VirtualFree(info.AllocationBase, 0, MEM_RELEASE) == 0); + if (err) { errcode = GetLastError(); } + } + } + if (errcode != 0) { + _mi_warning_message("unable to release OS memory: error code 0x%x, addr: %p, size: %zu\n", errcode, addr, size); + } +} + + +//--------------------------------------------- +// VirtualAlloc +//--------------------------------------------- + +static void* win_virtual_alloc_prim(void* addr, size_t size, size_t try_alignment, DWORD flags) { + #if (MI_INTPTR_SIZE >= 8) + // on 64-bit systems, try to use the virtual address area after 2TiB for 4MiB aligned allocations + if (addr == NULL) { + void* hint = _mi_os_get_aligned_hint(try_alignment,size); + if (hint != NULL) { + void* p = VirtualAlloc(hint, size, flags, PAGE_READWRITE); + if (p != NULL) return p; + _mi_verbose_message("warning: unable to allocate hinted aligned OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x)\n", size, GetLastError(), hint, try_alignment, flags); + // fall through on error + } + } + #endif + // on modern Windows try use VirtualAlloc2 for aligned allocation + if (try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0 && pVirtualAlloc2 != NULL) { + MI_MEM_ADDRESS_REQUIREMENTS reqs = { 0, 0, 0 }; + reqs.Alignment = try_alignment; + MI_MEM_EXTENDED_PARAMETER param = { {0, 0}, {0} }; + param.Type.Type = MiMemExtendedParameterAddressRequirements; + param.Arg.Pointer = &reqs; + void* p = (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, ¶m, 1); + if (p != NULL) return p; + _mi_warning_message("unable to allocate aligned OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x)\n", size, GetLastError(), addr, try_alignment, flags); + // fall through on error + } + // last resort + return VirtualAlloc(addr, size, flags, PAGE_READWRITE); +} + +static void* win_virtual_alloc(void* addr, size_t size, size_t try_alignment, DWORD flags, bool large_only, bool allow_large, bool* is_large) { + mi_assert_internal(!(large_only && !allow_large)); + static _Atomic(size_t) large_page_try_ok; // = 0; + void* p = NULL; + // Try to allocate large OS pages (2MiB) if allowed or required. + if ((large_only || _mi_os_use_large_page(size, try_alignment)) + && allow_large && (flags&MEM_COMMIT)!=0 && (flags&MEM_RESERVE)!=0) { + size_t try_ok = mi_atomic_load_acquire(&large_page_try_ok); + if (!large_only && try_ok > 0) { + // if a large page allocation fails, it seems the calls to VirtualAlloc get very expensive. + // therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times. + mi_atomic_cas_strong_acq_rel(&large_page_try_ok, &try_ok, try_ok - 1); + } + else { + // large OS pages must always reserve and commit. + *is_large = true; + p = win_virtual_alloc_prim(addr, size, try_alignment, flags | MEM_LARGE_PAGES); + if (large_only) return p; + // fall back to non-large page allocation on error (`p == NULL`). + if (p == NULL) { + mi_atomic_store_release(&large_page_try_ok,10UL); // on error, don't try again for the next N allocations + } + } + } + // Fall back to regular page allocation + if (p == NULL) { + *is_large = ((flags&MEM_LARGE_PAGES) != 0); + p = win_virtual_alloc_prim(addr, size, try_alignment, flags); + } + if (p == NULL) { + _mi_warning_message("unable to allocate OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x, large only: %d, allow large: %d)\n", size, GetLastError(), addr, try_alignment, flags, large_only, allow_large); + } + return p; +} + +void* _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large) { + mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0); + mi_assert_internal(commit || !allow_large); + mi_assert_internal(try_alignment > 0); + int flags = MEM_RESERVE; + if (commit) { flags |= MEM_COMMIT; } + return win_virtual_alloc(NULL, size, try_alignment, flags, false, allow_large, is_large); +} + + +//--------------------------------------------- +// Commit/Reset/Protect +//--------------------------------------------- +#ifdef _MSC_VER +#pragma warning(disable:6250) // suppress warning calling VirtualFree without MEM_RELEASE (for decommit) +#endif + +int _mi_prim_commit(void* addr, size_t size, bool commit) { + if (commit) { + void* p = VirtualAlloc(addr, size, MEM_COMMIT, PAGE_READWRITE); + return (p == addr ? 0 : (int)GetLastError()); + } + else { + BOOL ok = VirtualFree(addr, size, MEM_DECOMMIT); + return (ok ? 0 : (int)GetLastError()); + } +} + +int _mi_prim_reset(void* addr, size_t size) { + void* p = VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE); + mi_assert_internal(p == addr); + #if 1 + if (p == addr && addr != NULL) { + VirtualUnlock(addr,size); // VirtualUnlock after MEM_RESET removes the memory from the working set + } + #endif + return (p == addr ? 0 : (int)GetLastError()); +} + +int _mi_prim_protect(void* addr, size_t size, bool protect) { + DWORD oldprotect = 0; + BOOL ok = VirtualProtect(addr, size, protect ? PAGE_NOACCESS : PAGE_READWRITE, &oldprotect); + return (ok ? 0 : (int)GetLastError()); +} + + +//--------------------------------------------- +// Huge page allocation +//--------------------------------------------- + +void* _mi_prim_alloc_huge_os_pages(void* addr, size_t size, int numa_node) +{ + const DWORD flags = MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE; + + win_enable_large_os_pages(NULL); + + MI_MEM_EXTENDED_PARAMETER params[3] = { {{0,0},{0}},{{0,0},{0}},{{0,0},{0}} }; + // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages + static bool mi_huge_pages_available = true; + if (pNtAllocateVirtualMemoryEx != NULL && mi_huge_pages_available) { + params[0].Type.Type = MiMemExtendedParameterAttributeFlags; + params[0].Arg.ULong64 = MI_MEM_EXTENDED_PARAMETER_NONPAGED_HUGE; + ULONG param_count = 1; + if (numa_node >= 0) { + param_count++; + params[1].Type.Type = MiMemExtendedParameterNumaNode; + params[1].Arg.ULong = (unsigned)numa_node; + } + SIZE_T psize = size; + void* base = addr; + NTSTATUS err = (*pNtAllocateVirtualMemoryEx)(GetCurrentProcess(), &base, &psize, flags, PAGE_READWRITE, params, param_count); + if (err == 0 && base != NULL) { + return base; + } + else { + // fall back to regular large pages + mi_huge_pages_available = false; // don't try further huge pages + _mi_warning_message("unable to allocate using huge (1GiB) pages, trying large (2MiB) pages instead (status 0x%lx)\n", err); + } + } + // on modern Windows try use VirtualAlloc2 for numa aware large OS page allocation + if (pVirtualAlloc2 != NULL && numa_node >= 0) { + params[0].Type.Type = MiMemExtendedParameterNumaNode; + params[0].Arg.ULong = (unsigned)numa_node; + return (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, params, 1); + } + + // otherwise use regular virtual alloc on older windows + return VirtualAlloc(addr, size, flags, PAGE_READWRITE); +} + + +//--------------------------------------------- +// Numa nodes +//--------------------------------------------- + +size_t _mi_prim_numa_node(void) { + USHORT numa_node = 0; + if (pGetCurrentProcessorNumberEx != NULL && pGetNumaProcessorNodeEx != NULL) { + // Extended API is supported + MI_PROCESSOR_NUMBER pnum; + (*pGetCurrentProcessorNumberEx)(&pnum); + USHORT nnode = 0; + BOOL ok = (*pGetNumaProcessorNodeEx)(&pnum, &nnode); + if (ok) { numa_node = nnode; } + } + else if (pGetNumaProcessorNode != NULL) { + // Vista or earlier, use older API that is limited to 64 processors. Issue #277 + DWORD pnum = GetCurrentProcessorNumber(); + UCHAR nnode = 0; + BOOL ok = pGetNumaProcessorNode((UCHAR)pnum, &nnode); + if (ok) { numa_node = nnode; } + } + return numa_node; +} + +size_t _mi_prim_numa_node_count(void) { + ULONG numa_max = 0; + GetNumaHighestNodeNumber(&numa_max); + // find the highest node number that has actual processors assigned to it. Issue #282 + while(numa_max > 0) { + if (pGetNumaNodeProcessorMaskEx != NULL) { + // Extended API is supported + GROUP_AFFINITY affinity; + if ((*pGetNumaNodeProcessorMaskEx)((USHORT)numa_max, &affinity)) { + if (affinity.Mask != 0) break; // found the maximum non-empty node + } + } + else { + // Vista or earlier, use older API that is limited to 64 processors. + ULONGLONG mask; + if (GetNumaNodeProcessorMask((UCHAR)numa_max, &mask)) { + if (mask != 0) break; // found the maximum non-empty node + }; + } + // max node was invalid or had no processor assigned, try again + numa_max--; + } + return ((size_t)numa_max + 1); +} diff --git a/src/prim/prim.c b/src/prim/prim.c new file mode 100644 index 00000000..83b7abc1 --- /dev/null +++ b/src/prim/prim.c @@ -0,0 +1,18 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +// Select the implementation of the primitives +// depending on the OS. + +#if defined(_WIN32) +#include "prim-windows.c" // VirtualAlloc (Windows) +#elif defined(__wasi__) +#define MI_USE_SBRK +#include "prim-wasi.h" // memory-grow or sbrk (Wasm) +#else +#include "prim-unix.c" // mmap() (Linux, macOSX, BSD, Illumnos, Haiku, DragonFly, etc.) +#endif diff --git a/src/prim/prim.h b/src/prim/prim.h new file mode 100644 index 00000000..ec001a9e --- /dev/null +++ b/src/prim/prim.h @@ -0,0 +1,64 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ +#pragma once +#ifndef MIMALLOC_PRIM_H +#define MIMALLOC_PRIM_H + +// note: on all primitive functions, we always get: +// addr != NULL and page aligned +// size > 0 and page aligned +// + +// OS memory configuration +typedef struct mi_os_mem_config_s { + size_t page_size; // 4KiB + size_t large_page_size; // 2MiB + size_t alloc_granularity; // smallest allocation size (on Windows 64KiB) + bool has_overcommit; // can we reserve more memory than can be actually committed? + bool must_free_whole; // must allocated blocks free as a whole (false for mmap, true for VirtualAlloc) +} mi_os_mem_config_t; + +// Initialize +void _mi_prim_mem_init( mi_os_mem_config_t* config ); + +// Free OS memory +// pre: addr != NULL, size > 0 +void _mi_prim_free(void* addr, size_t size ); + +// Allocate OS memory. +// The `try_alignment` is just a hint and the returned pointer does not have to be aligned. +// return NULL on error. +// pre: !commit => !allow_large +// try_alignment >= _mi_os_page_size() and a power of 2 +void* _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large); + +// Commit memory. Returns error code or 0 on success. +int _mi_prim_commit(void* addr, size_t size, bool commit); + +// Reset memory. The range keeps being accessible but the content might be reset. +// Returns error code or 0 on success. +int _mi_prim_reset(void* addr, size_t size); + +// Protect memory. Returns error code or 0 on success. +int _mi_prim_protect(void* addr, size_t size, bool protect); + +// Allocate huge (1GiB) pages possibly associated with a NUMA node. +// pre: size > 0 and a multiple of 1GiB. +// addr is either NULL or an address hint. +// numa_node is either negative (don't care), or a numa node number. +void* _mi_prim_alloc_huge_os_pages(void* addr, size_t size, int numa_node); + +// Return the current NUMA node +size_t _mi_prim_numa_node(void); + +// Return the number of logical NUMA nodes +size_t _mi_prim_numa_node_count(void); + + +#endif // MIMALLOC_PRIM_H + + diff --git a/src/prim/readme.md b/src/prim/readme.md new file mode 100644 index 00000000..14248496 --- /dev/null +++ b/src/prim/readme.md @@ -0,0 +1,6 @@ +This is the portability layer where all primitives needed from the OS are defined. + +- `prim.h`: API definition +- `prim.c`: Selects one of `prim-unix.c`, `prim-wasi.c`, or `prim-windows.c` depending on the host platform. + +Note: still work in progress, there may be other places in the sources that still depend on OS ifdef's. \ No newline at end of file From 69cb30a874858ab1d161731732453202b62e1e2b Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Tue, 14 Mar 2023 17:15:52 -0700 Subject: [PATCH 08/42] move process info into primitives --- src/prim/prim-unix.c | 108 ++++++++++++++++++++++++ src/prim/prim-wasi.c | 46 ++++++++++ src/prim/prim-windows.c | 72 ++++++++++++++++ src/prim/prim.h | 9 ++ src/stats.c | 182 +++------------------------------------- 5 files changed, 248 insertions(+), 169 deletions(-) diff --git a/src/prim/prim-unix.c b/src/prim/prim-unix.c index fdbf8e9e..297dc2a9 100644 --- a/src/prim/prim-unix.c +++ b/src/prim/prim-unix.c @@ -481,3 +481,111 @@ size_t _mi_prim_numa_node_count(void) { } #endif + +// ---------------------------------------------------------------- +// Clock +// ---------------------------------------------------------------- + +#include + +#if defined(CLOCK_REALTIME) || defined(CLOCK_MONOTONIC) + +mi_msecs_t _mi_prim_clock_now(void) { + struct timespec t; + #ifdef CLOCK_MONOTONIC + clock_gettime(CLOCK_MONOTONIC, &t); + #else + clock_gettime(CLOCK_REALTIME, &t); + #endif + return ((mi_msecs_t)t.tv_sec * 1000) + ((mi_msecs_t)t.tv_nsec / 1000000); +} + +#else + +// low resolution timer +mi_msecs_t _mi_prim_clock_now(void) { + return ((mi_msecs_t)clock() / ((mi_msecs_t)CLOCKS_PER_SEC / 1000)); +} + +#endif + + + + +//---------------------------------------------------------------- +// Process info +//---------------------------------------------------------------- + +#if defined(__unix__) || defined(__unix) || defined(unix) || defined(__APPLE__) || defined(__HAIKU__) +#include +#include +#include + +#if defined(__APPLE__) +#include +#endif + +#if defined(__HAIKU__) +#include +#endif + +static mi_msecs_t timeval_secs(const struct timeval* tv) { + return ((mi_msecs_t)tv->tv_sec * 1000L) + ((mi_msecs_t)tv->tv_usec / 1000L); +} + +void _mi_prim_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) +{ + struct rusage rusage; + getrusage(RUSAGE_SELF, &rusage); + *utime = timeval_secs(&rusage.ru_utime); + *stime = timeval_secs(&rusage.ru_stime); +#if !defined(__HAIKU__) + *page_faults = rusage.ru_majflt; +#endif + // estimate commit using our stats + *peak_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.peak)); + *current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.current)); + *current_rss = *current_commit; // estimate +#if defined(__HAIKU__) + // Haiku does not have (yet?) a way to + // get these stats per process + thread_info tid; + area_info mem; + ssize_t c; + get_thread_info(find_thread(0), &tid); + while (get_next_area_info(tid.team, &c, &mem) == B_OK) { + *peak_rss += mem.ram_size; + } + *page_faults = 0; +#elif defined(__APPLE__) + *peak_rss = rusage.ru_maxrss; // BSD reports in bytes + struct mach_task_basic_info info; + mach_msg_type_number_t infoCount = MACH_TASK_BASIC_INFO_COUNT; + if (task_info(mach_task_self(), MACH_TASK_BASIC_INFO, (task_info_t)&info, &infoCount) == KERN_SUCCESS) { + *current_rss = (size_t)info.resident_size; + } +#else + *peak_rss = rusage.ru_maxrss * 1024; // Linux reports in KiB +#endif +} + +#else + +#ifndef __wasi__ +// WebAssembly instances are not processes +#pragma message("define a way to get process info") +#endif + +void _mi_prim_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) +{ + *peak_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.peak)); + *current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.current)); + *peak_rss = *peak_commit; + *current_rss = *current_commit; + *page_faults = 0; + *utime = 0; + *stime = 0; +} + +#endif + diff --git a/src/prim/prim-wasi.c b/src/prim/prim-wasi.c index 431113e3..964a7ede 100644 --- a/src/prim/prim-wasi.c +++ b/src/prim/prim-wasi.c @@ -152,3 +152,49 @@ size_t _mi_prim_numa_node(void) { size_t _mi_prim_numa_node_count(void) { return 1; } + + +//---------------------------------------------------------------- +// Clock +//---------------------------------------------------------------- + +#include + +#if defined(CLOCK_REALTIME) || defined(CLOCK_MONOTONIC) + +mi_msecs_t _mi_prim_clock_now(void) { + struct timespec t; + #ifdef CLOCK_MONOTONIC + clock_gettime(CLOCK_MONOTONIC, &t); + #else + clock_gettime(CLOCK_REALTIME, &t); + #endif + return ((mi_msecs_t)t.tv_sec * 1000) + ((mi_msecs_t)t.tv_nsec / 1000000); +} + +#else + +// low resolution timer +mi_msecs_t _mi_prim_clock_now(void) { + return ((mi_msecs_t)clock() / ((mi_msecs_t)CLOCKS_PER_SEC / 1000)); +} + +#endif + + +//---------------------------------------------------------------- +// Process info +//---------------------------------------------------------------- + +void _mi_prim_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) +{ + *peak_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.peak)); + *current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.current)); + *peak_rss = *peak_commit; + *current_rss = *current_commit; + *page_faults = 0; + *utime = 0; + *stime = 0; +} + + diff --git a/src/prim/prim-windows.c b/src/prim/prim-windows.c index 44ce9e8e..b83082e3 100644 --- a/src/prim/prim-windows.c +++ b/src/prim/prim-windows.c @@ -383,3 +383,75 @@ size_t _mi_prim_numa_node_count(void) { } return ((size_t)numa_max + 1); } + + +//---------------------------------------------------------------- +// Clock +//---------------------------------------------------------------- + +static mi_msecs_t mi_to_msecs(LARGE_INTEGER t) { + static LARGE_INTEGER mfreq; // = 0 + if (mfreq.QuadPart == 0LL) { + LARGE_INTEGER f; + QueryPerformanceFrequency(&f); + mfreq.QuadPart = f.QuadPart/1000LL; + if (mfreq.QuadPart == 0) mfreq.QuadPart = 1; + } + return (mi_msecs_t)(t.QuadPart / mfreq.QuadPart); +} + +mi_msecs_t _mi_prim_clock_now(void) { + LARGE_INTEGER t; + QueryPerformanceCounter(&t); + return mi_to_msecs(t); +} + + +//---------------------------------------------------------------- +// Process info +//---------------------------------------------------------------- + +#include +#include + +static mi_msecs_t filetime_msecs(const FILETIME* ftime) { + ULARGE_INTEGER i; + i.LowPart = ftime->dwLowDateTime; + i.HighPart = ftime->dwHighDateTime; + mi_msecs_t msecs = (i.QuadPart / 10000); // FILETIME is in 100 nano seconds + return msecs; +} + +typedef BOOL (WINAPI *PGetProcessMemoryInfo)(HANDLE, PPROCESS_MEMORY_COUNTERS, DWORD); +static PGetProcessMemoryInfo pGetProcessMemoryInfo = NULL; + +void _mi_prim_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) +{ + FILETIME ct; + FILETIME ut; + FILETIME st; + FILETIME et; + GetProcessTimes(GetCurrentProcess(), &ct, &et, &st, &ut); + *utime = filetime_msecs(&ut); + *stime = filetime_msecs(&st); + + // load psapi on demand + if (pGetProcessMemoryInfo == NULL) { + HINSTANCE hDll = LoadLibrary(TEXT("psapi.dll")); + if (hDll != NULL) { + pGetProcessMemoryInfo = (PGetProcessMemoryInfo)(void (*)(void))GetProcAddress(hDll, "GetProcessMemoryInfo"); + } + } + + // get process info + PROCESS_MEMORY_COUNTERS info; + memset(&info, 0, sizeof(info)); + if (pGetProcessMemoryInfo != NULL) { + pGetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info)); + } + *current_rss = (size_t)info.WorkingSetSize; + *peak_rss = (size_t)info.PeakWorkingSetSize; + *current_commit = (size_t)info.PagefileUsage; + *peak_commit = (size_t)info.PeakPagefileUsage; + *page_faults = (size_t)info.PageFaultCount; +} diff --git a/src/prim/prim.h b/src/prim/prim.h index ec001a9e..14f75156 100644 --- a/src/prim/prim.h +++ b/src/prim/prim.h @@ -58,6 +58,15 @@ size_t _mi_prim_numa_node(void); // Return the number of logical NUMA nodes size_t _mi_prim_numa_node_count(void); +// High resolution clock +mi_msecs_t _mi_prim_clock_now(void); + +// Return process information +void _mi_prim_process_info(mi_msecs_t* utime, mi_msecs_t* stime, + size_t* current_rss, size_t* peak_rss, + size_t* current_commit, size_t* peak_commit, size_t* page_faults); + + #endif // MIMALLOC_PRIM_H diff --git a/src/stats.c b/src/stats.c index 84d677fa..c9b3bb95 100644 --- a/src/stats.c +++ b/src/stats.c @@ -7,8 +7,9 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc.h" #include "mimalloc-internal.h" #include "mimalloc-atomic.h" +#include "prim/prim.h" -#include // fputs, stderr +#include // snprintf #include // memset #if defined(_MSC_VER) && (_MSC_VER < 1920) @@ -291,8 +292,6 @@ static void mi_cdecl mi_buffered_out(const char* msg, void* arg) { // Print statistics //------------------------------------------------------------ -static void mi_stat_process_info(mi_msecs_t* elapsed, mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults); - static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0) mi_attr_noexcept { // wrap the output function to be line buffered char buf[256]; @@ -337,15 +336,15 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0) mi_stat_counter_print_avg(&stats->searches, "searches", out, arg); _mi_fprintf(out, arg, "%10s: %7zu\n", "numa nodes", _mi_os_numa_node_count()); - mi_msecs_t elapsed; - mi_msecs_t user_time; - mi_msecs_t sys_time; + size_t elapsed; + size_t user_time; + size_t sys_time; size_t current_rss; size_t peak_rss; size_t current_commit; size_t peak_commit; size_t page_faults; - mi_stat_process_info(&elapsed, &user_time, &sys_time, ¤t_rss, &peak_rss, ¤t_commit, &peak_commit, &page_faults); + mi_process_info(&elapsed, &user_time, &sys_time, ¤t_rss, &peak_rss, ¤t_commit, &peak_commit, &page_faults); _mi_fprintf(out, arg, "%10s: %7ld.%03ld s\n", "elapsed", elapsed/1000, elapsed%1000); _mi_fprintf(out, arg, "%10s: user: %ld.%03ld s, system: %ld.%03ld s, faults: %lu, rss: ", "process", user_time/1000, user_time%1000, sys_time/1000, sys_time%1000, (unsigned long)page_faults ); @@ -404,47 +403,13 @@ void mi_thread_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept { // ---------------------------------------------------------------- // Basic timer for convenience; use milli-seconds to avoid doubles // ---------------------------------------------------------------- -#ifdef _WIN32 -#include -static mi_msecs_t mi_to_msecs(LARGE_INTEGER t) { - static LARGE_INTEGER mfreq; // = 0 - if (mfreq.QuadPart == 0LL) { - LARGE_INTEGER f; - QueryPerformanceFrequency(&f); - mfreq.QuadPart = f.QuadPart/1000LL; - if (mfreq.QuadPart == 0) mfreq.QuadPart = 1; - } - return (mi_msecs_t)(t.QuadPart / mfreq.QuadPart); -} - -mi_msecs_t _mi_clock_now(void) { - LARGE_INTEGER t; - QueryPerformanceCounter(&t); - return mi_to_msecs(t); -} -#else -#include -#if defined(CLOCK_REALTIME) || defined(CLOCK_MONOTONIC) -mi_msecs_t _mi_clock_now(void) { - struct timespec t; - #ifdef CLOCK_MONOTONIC - clock_gettime(CLOCK_MONOTONIC, &t); - #else - clock_gettime(CLOCK_REALTIME, &t); - #endif - return ((mi_msecs_t)t.tv_sec * 1000) + ((mi_msecs_t)t.tv_nsec / 1000000); -} -#else -// low resolution timer -mi_msecs_t _mi_clock_now(void) { - return ((mi_msecs_t)clock() / ((mi_msecs_t)CLOCKS_PER_SEC / 1000)); -} -#endif -#endif - static mi_msecs_t mi_clock_diff; +mi_msecs_t _mi_clock_now(void) { + return _mi_prim_clock_now(); +} + mi_msecs_t _mi_clock_start(void) { if (mi_clock_diff == 0.0) { mi_msecs_t t0 = _mi_clock_now(); @@ -463,130 +428,9 @@ mi_msecs_t _mi_clock_end(mi_msecs_t start) { // Basic process statistics // -------------------------------------------------------- -#if defined(_WIN32) -#include -#include - -static mi_msecs_t filetime_msecs(const FILETIME* ftime) { - ULARGE_INTEGER i; - i.LowPart = ftime->dwLowDateTime; - i.HighPart = ftime->dwHighDateTime; - mi_msecs_t msecs = (i.QuadPart / 10000); // FILETIME is in 100 nano seconds - return msecs; -} - -typedef BOOL (WINAPI *PGetProcessMemoryInfo)(HANDLE, PPROCESS_MEMORY_COUNTERS, DWORD); -static PGetProcessMemoryInfo pGetProcessMemoryInfo = NULL; - -static void mi_stat_process_info(mi_msecs_t* elapsed, mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) -{ - *elapsed = _mi_clock_end(mi_process_start); - FILETIME ct; - FILETIME ut; - FILETIME st; - FILETIME et; - GetProcessTimes(GetCurrentProcess(), &ct, &et, &st, &ut); - *utime = filetime_msecs(&ut); - *stime = filetime_msecs(&st); - - // load psapi on demand - if (pGetProcessMemoryInfo == NULL) { - HINSTANCE hDll = LoadLibrary(TEXT("psapi.dll")); - if (hDll != NULL) { - pGetProcessMemoryInfo = (PGetProcessMemoryInfo)(void (*)(void))GetProcAddress(hDll, "GetProcessMemoryInfo"); - } - } - - // get process info - PROCESS_MEMORY_COUNTERS info; - memset(&info, 0, sizeof(info)); - if (pGetProcessMemoryInfo != NULL) { - pGetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info)); - } - *current_rss = (size_t)info.WorkingSetSize; - *peak_rss = (size_t)info.PeakWorkingSetSize; - *current_commit = (size_t)info.PagefileUsage; - *peak_commit = (size_t)info.PeakPagefileUsage; - *page_faults = (size_t)info.PageFaultCount; -} - -#elif !defined(__wasi__) && (defined(__unix__) || defined(__unix) || defined(unix) || defined(__APPLE__) || defined(__HAIKU__)) -#include -#include -#include - -#if defined(__APPLE__) -#include -#endif - -#if defined(__HAIKU__) -#include -#endif - -static mi_msecs_t timeval_secs(const struct timeval* tv) { - return ((mi_msecs_t)tv->tv_sec * 1000L) + ((mi_msecs_t)tv->tv_usec / 1000L); -} - -static void mi_stat_process_info(mi_msecs_t* elapsed, mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) -{ - *elapsed = _mi_clock_end(mi_process_start); - struct rusage rusage; - getrusage(RUSAGE_SELF, &rusage); - *utime = timeval_secs(&rusage.ru_utime); - *stime = timeval_secs(&rusage.ru_stime); -#if !defined(__HAIKU__) - *page_faults = rusage.ru_majflt; -#endif - // estimate commit using our stats - *peak_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.peak)); - *current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.current)); - *current_rss = *current_commit; // estimate -#if defined(__HAIKU__) - // Haiku does not have (yet?) a way to - // get these stats per process - thread_info tid; - area_info mem; - ssize_t c; - get_thread_info(find_thread(0), &tid); - while (get_next_area_info(tid.team, &c, &mem) == B_OK) { - *peak_rss += mem.ram_size; - } - *page_faults = 0; -#elif defined(__APPLE__) - *peak_rss = rusage.ru_maxrss; // BSD reports in bytes - struct mach_task_basic_info info; - mach_msg_type_number_t infoCount = MACH_TASK_BASIC_INFO_COUNT; - if (task_info(mach_task_self(), MACH_TASK_BASIC_INFO, (task_info_t)&info, &infoCount) == KERN_SUCCESS) { - *current_rss = (size_t)info.resident_size; - } -#else - *peak_rss = rusage.ru_maxrss * 1024; // Linux reports in KiB -#endif -} - -#else -#ifndef __wasi__ -// WebAssembly instances are not processes -#pragma message("define a way to get process info") -#endif - -static void mi_stat_process_info(mi_msecs_t* elapsed, mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) -{ - *elapsed = _mi_clock_end(mi_process_start); - *peak_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.peak)); - *current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.current)); - *peak_rss = *peak_commit; - *current_rss = *current_commit; - *page_faults = 0; - *utime = 0; - *stime = 0; -} -#endif - - mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) mi_attr_noexcept { - mi_msecs_t elapsed = 0; + mi_msecs_t elapsed = _mi_clock_end(mi_process_start); mi_msecs_t utime = 0; mi_msecs_t stime = 0; size_t current_rss0 = 0; @@ -594,8 +438,8 @@ mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, s size_t current_commit0 = 0; size_t peak_commit0 = 0; size_t page_faults0 = 0; - mi_stat_process_info(&elapsed,&utime, &stime, ¤t_rss0, &peak_rss0, ¤t_commit0, &peak_commit0, &page_faults0); - if (elapsed_msecs!=NULL) *elapsed_msecs = (elapsed < 0 ? 0 : (elapsed < (mi_msecs_t)PTRDIFF_MAX ? (size_t)elapsed : PTRDIFF_MAX)); + _mi_prim_process_info(&utime, &stime, ¤t_rss0, &peak_rss0, ¤t_commit0, &peak_commit0, &page_faults0); + if (elapsed_msecs!=NULL) *elapsed_msecs = (elapsed < 0 ? 0 : (elapsed < (mi_msecs_t)PTRDIFF_MAX ? (size_t)elapsed : PTRDIFF_MAX)); if (user_msecs!=NULL) *user_msecs = (utime < 0 ? 0 : (utime < (mi_msecs_t)PTRDIFF_MAX ? (size_t)utime : PTRDIFF_MAX)); if (system_msecs!=NULL) *system_msecs = (stime < 0 ? 0 : (stime < (mi_msecs_t)PTRDIFF_MAX ? (size_t)stime : PTRDIFF_MAX)); if (current_rss!=NULL) *current_rss = current_rss0; From 10f62eb5a19e412c8f0691cad64fe3b59b9a346d Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Tue, 14 Mar 2023 18:10:00 -0700 Subject: [PATCH 09/42] add c primitives, move getenv into primitives --- include/mimalloc-internal.h | 9 ++ src/alloc-posix.c | 2 +- src/options.c | 166 ++++++++++-------------------------- src/prim/prim-unix.c | 66 ++++++++++++++ src/prim/prim-wasi.c | 30 +++++++ src/prim/prim-windows.c | 55 +++++++++++- src/prim/prim.h | 7 ++ 7 files changed, 209 insertions(+), 126 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index ee26bfb8..ce6e6a6b 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -166,6 +166,15 @@ bool _mi_free_delayed_block(mi_block_t* block); void _mi_free_generic(const mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept; // for runtime integration void _mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size); +// option.c, c primitives +char _mi_toupper(char c); +int _mi_strnicmp(const char* s, const char* t, size_t n); +void _mi_strlcpy(char* dest, const char* src, size_t dest_size); +void _mi_strlcat(char* dest, const char* src, size_t dest_size); +size_t _mi_strlen(const char* s); +size_t _mi_strnlen(const char* s, size_t max_len); + + #if MI_DEBUG>1 bool _mi_page_is_valid(mi_page_t* page); #endif diff --git a/src/alloc-posix.c b/src/alloc-posix.c index e6505f29..f0cfe629 100644 --- a/src/alloc-posix.c +++ b/src/alloc-posix.c @@ -149,7 +149,7 @@ int mi_dupenv_s(char** buf, size_t* size, const char* name) mi_attr_noexcept { else { *buf = mi_strdup(p); if (*buf==NULL) return ENOMEM; - if (size != NULL) *size = strlen(p); + if (size != NULL) *size = _mi_strlen(p); } return 0; } diff --git a/src/options.c b/src/options.c index 44319a42..a93ea89e 100644 --- a/src/options.c +++ b/src/options.c @@ -7,17 +7,11 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc.h" #include "mimalloc-internal.h" #include "mimalloc-atomic.h" +#include "prim/prim.h" // mi_prim_out_stderr #include -#include // strtol -#include // strncpy, strncat, strlen, strstr -#include // toupper #include -#ifdef _MSC_VER -#pragma warning(disable:4996) // strncpy, strncat -#endif - static long mi_max_error_count = 16; // stop outputting errors after this (use < 0 for no limit) static long mi_max_warning_count = 16; // stop outputting warnings after this (use < 0 for no limit) @@ -170,41 +164,11 @@ void mi_option_disable(mi_option_t option) { mi_option_set_enabled(option,false); } - static void mi_cdecl mi_out_stderr(const char* msg, void* arg) { MI_UNUSED(arg); - if (msg == NULL) return; - #ifdef _WIN32 - // on windows with redirection, the C runtime cannot handle locale dependent output - // after the main thread closes so we use direct console output. - if (!_mi_preloading()) { - // _cputs(msg); // _cputs cannot be used at is aborts if it fails to lock the console - static HANDLE hcon = INVALID_HANDLE_VALUE; - static bool hconIsConsole; - if (hcon == INVALID_HANDLE_VALUE) { - CONSOLE_SCREEN_BUFFER_INFO sbi; - hcon = GetStdHandle(STD_ERROR_HANDLE); - hconIsConsole = ((hcon != INVALID_HANDLE_VALUE) && GetConsoleScreenBufferInfo(hcon, &sbi)); - } - const size_t len = strlen(msg); - if (len > 0 && len < UINT32_MAX) { - DWORD written = 0; - if (hconIsConsole) { - WriteConsoleA(hcon, msg, (DWORD)len, &written, NULL); - } - else if (hcon != INVALID_HANDLE_VALUE) { - // use direct write if stderr was redirected - WriteFile(hcon, msg, (DWORD)len, &written, NULL); - } - else { - // finally fall back to fputs after all - fputs(msg, stderr); - } - } + if (msg != NULL && msg[0] != 0) { + _mi_prim_out_stderr(msg); } - #else - fputs(msg, stderr); - #endif } // Since an output function can be registered earliest in the `main` @@ -221,7 +185,7 @@ static void mi_cdecl mi_out_buf(const char* msg, void* arg) { MI_UNUSED(arg); if (msg==NULL) return; if (mi_atomic_load_relaxed(&out_len)>=MI_MAX_DELAY_OUTPUT) return; - size_t n = strlen(msg); + size_t n = _mi_strlen(msg); if (n==0) return; // claim space size_t start = mi_atomic_add_acq_rel(&out_len, n); @@ -358,7 +322,7 @@ void _mi_fprintf( mi_output_fun* out, void* arg, const char* fmt, ... ) { } static void mi_vfprintf_thread(mi_output_fun* out, void* arg, const char* prefix, const char* fmt, va_list args) { - if (prefix != NULL && strlen(prefix) <= 32 && !_mi_is_main_thread()) { + if (prefix != NULL && _mi_strnlen(prefix,33) <= 32 && !_mi_is_main_thread()) { char tprefix[64]; snprintf(tprefix, sizeof(tprefix), "%sthread 0x%llx: ", prefix, (unsigned long long)_mi_thread_id()); mi_vfprintf(out, arg, tprefix, fmt, args); @@ -463,8 +427,20 @@ void _mi_error_message(int err, const char* fmt, ...) { // -------------------------------------------------------- // Initialize options by checking the environment // -------------------------------------------------------- +char _mi_toupper(char c) { + if (c >= 'a' && c <= 'z') return (c - 'a' + 'A'); + else return c; +} -static void mi_strlcpy(char* dest, const char* src, size_t dest_size) { +int _mi_strnicmp(const char* s, const char* t, size_t n) { + if (n == 0) return 0; + for (; *s != 0 && *t != 0 && n > 0; s++, t++, n--) { + if (_mi_toupper(*s) != _mi_toupper(*t)) break; + } + return (n == 0 ? 0 : *s - *t); +} + +void _mi_strlcpy(char* dest, const char* src, size_t dest_size) { if (dest==NULL || src==NULL || dest_size == 0) return; // copy until end of src, or when dest is (almost) full while (*src != 0 && dest_size > 1) { @@ -475,7 +451,7 @@ static void mi_strlcpy(char* dest, const char* src, size_t dest_size) { *dest = 0; } -static void mi_strlcat(char* dest, const char* src, size_t dest_size) { +void _mi_strlcat(char* dest, const char* src, size_t dest_size) { if (dest==NULL || src==NULL || dest_size == 0) return; // find end of string in the dest buffer while (*dest != 0 && dest_size > 1) { @@ -483,7 +459,21 @@ static void mi_strlcat(char* dest, const char* src, size_t dest_size) { dest_size--; } // and catenate - mi_strlcpy(dest, src, dest_size); + _mi_strlcpy(dest, src, dest_size); +} + +size_t _mi_strlen(const char* s) { + if (s==NULL) return 0; + size_t len = 0; + while(s[len] != 0) { len++; } + return len; +} + +size_t _mi_strnlen(const char* s, size_t max_len) { + if (s==NULL) return 0; + size_t len = 0; + while(s[len] != 0 && len < max_len) { len++; } + return len; } #ifdef MI_NO_GETENV @@ -494,94 +484,28 @@ static bool mi_getenv(const char* name, char* result, size_t result_size) { return false; } #else -#if defined _WIN32 -// On Windows use GetEnvironmentVariable instead of getenv to work -// reliably even when this is invoked before the C runtime is initialized. -// i.e. when `_mi_preloading() == true`. -// Note: on windows, environment names are not case sensitive. -#include static bool mi_getenv(const char* name, char* result, size_t result_size) { - result[0] = 0; - size_t len = GetEnvironmentVariableA(name, result, (DWORD)result_size); - return (len > 0 && len < result_size); -} -#elif !defined(MI_USE_ENVIRON) || (MI_USE_ENVIRON!=0) -// On Posix systemsr use `environ` to acces environment variables -// even before the C runtime is initialized. -#if defined(__APPLE__) && defined(__has_include) && __has_include() -#include -static char** mi_get_environ(void) { - return (*_NSGetEnviron()); -} -#else -extern char** environ; -static char** mi_get_environ(void) { - return environ; + if (name==NULL || result == NULL || result_size < 64) return false; + return _mi_prim_getenv(name,result,result_size); } #endif -static int mi_strnicmp(const char* s, const char* t, size_t n) { - if (n == 0) return 0; - for (; *s != 0 && *t != 0 && n > 0; s++, t++, n--) { - if (toupper(*s) != toupper(*t)) break; - } - return (n == 0 ? 0 : *s - *t); -} -static bool mi_getenv(const char* name, char* result, size_t result_size) { - if (name==NULL) return false; - const size_t len = strlen(name); - if (len == 0) return false; - char** env = mi_get_environ(); - if (env == NULL) return false; - // compare up to 256 entries - for (int i = 0; i < 256 && env[i] != NULL; i++) { - const char* s = env[i]; - if (mi_strnicmp(name, s, len) == 0 && s[len] == '=') { // case insensitive - // found it - mi_strlcpy(result, s + len + 1, result_size); - return true; - } - } - return false; -} -#else -// fallback: use standard C `getenv` but this cannot be used while initializing the C runtime -static bool mi_getenv(const char* name, char* result, size_t result_size) { - // cannot call getenv() when still initializing the C runtime. - if (_mi_preloading()) return false; - const char* s = getenv(name); - if (s == NULL) { - // we check the upper case name too. - char buf[64+1]; - size_t len = strlen(name); - if (len >= sizeof(buf)) len = sizeof(buf) - 1; - for (size_t i = 0; i < len; i++) { - buf[i] = toupper(name[i]); - } - buf[len] = 0; - s = getenv(buf); - } - if (s != NULL && strlen(s) < result_size) { - mi_strlcpy(result, s, result_size); - return true; - } - else { - return false; - } -} -#endif // !MI_USE_ENVIRON -#endif // !MI_NO_GETENV + +// TODO: implement ourselves to reduce dependencies on the C runtime +#include // strtol +#include // strstr + static void mi_option_init(mi_option_desc_t* desc) { // Read option value from the environment char buf[64+1]; - mi_strlcpy(buf, "mimalloc_", sizeof(buf)); - mi_strlcat(buf, desc->name, sizeof(buf)); + _mi_strlcpy(buf, "mimalloc_", sizeof(buf)); + _mi_strlcat(buf, desc->name, sizeof(buf)); char s[64+1]; if (mi_getenv(buf, s, sizeof(s))) { - size_t len = strlen(s); + size_t len = _mi_strnlen(s,64); if (len >= sizeof(buf)) len = sizeof(buf) - 1; for (size_t i = 0; i < len; i++) { - buf[i] = (char)toupper(s[i]); + buf[i] = _mi_toupper(s[i]); } buf[len] = 0; if (buf[0]==0 || strstr("1;TRUE;YES;ON", buf) != NULL) { diff --git a/src/prim/prim-unix.c b/src/prim/prim-unix.c index 297dc2a9..495b274e 100644 --- a/src/prim/prim-unix.c +++ b/src/prim/prim-unix.c @@ -589,3 +589,69 @@ void _mi_prim_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* current #endif + +//---------------------------------------------------------------- +// Output +//---------------------------------------------------------------- + +void _mi_prim_out_stderr( const char* msg ) { + fputs(msg,stderr); +} + + +//---------------------------------------------------------------- +// Environment +//---------------------------------------------------------------- + +#if !defined(MI_USE_ENVIRON) || (MI_USE_ENVIRON!=0) +// On Posix systemsr use `environ` to acces environment variables +// even before the C runtime is initialized. +#if defined(__APPLE__) && defined(__has_include) && __has_include() +#include +static char** mi_get_environ(void) { + return (*_NSGetEnviron()); +} +#else +extern char** environ; +static char** mi_get_environ(void) { + return environ; +} +#endif +bool _mi_prim_getenv(const char* name, char* result, size_t result_size) { + if (name==NULL) return false; + const size_t len = _mi_strlen(name); + if (len == 0) return false; + char** env = mi_get_environ(); + if (env == NULL) return false; + // compare up to 256 entries + for (int i = 0; i < 256 && env[i] != NULL; i++) { + const char* s = env[i]; + if (_mi_strnicmp(name, s, len) == 0 && s[len] == '=') { // case insensitive + // found it + _mi_strlcpy(result, s + len + 1, result_size); + return true; + } + } + return false; +} +#else +// fallback: use standard C `getenv` but this cannot be used while initializing the C runtime +bool _mi_prim_getenv(const char* name, char* result, size_t result_size) { + // cannot call getenv() when still initializing the C runtime. + if (_mi_preloading()) return false; + const char* s = getenv(name); + if (s == NULL) { + // we check the upper case name too. + char buf[64+1]; + size_t len = _mi_strnlen(name,sizeof(buf)-1); + for (size_t i = 0; i < len; i++) { + buf[i] = _mi_toupper(name[i]); + } + buf[len] = 0; + s = getenv(buf); + } + if (s == NULL || _mi_strnlen(s,result_size) >= result_size) return false; + _mi_strlcpy(result, s, result_size); + return true; +} +#endif // !MI_USE_ENVIRON diff --git a/src/prim/prim-wasi.c b/src/prim/prim-wasi.c index 964a7ede..7de491e4 100644 --- a/src/prim/prim-wasi.c +++ b/src/prim/prim-wasi.c @@ -197,4 +197,34 @@ void _mi_prim_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* current *stime = 0; } +//---------------------------------------------------------------- +// Output +//---------------------------------------------------------------- +void _mi_prim_out_stderr( const char* msg ) { + fputs(msg,stderr); +} + + +//---------------------------------------------------------------- +// Environment +//---------------------------------------------------------------- + +bool _mi_prim_getenv(const char* name, char* result, size_t result_size) { + // cannot call getenv() when still initializing the C runtime. + if (_mi_preloading()) return false; + const char* s = getenv(name); + if (s == NULL) { + // we check the upper case name too. + char buf[64+1]; + size_t len = _mi_strnlen(name,sizeof(buf)-1); + for (size_t i = 0; i < len; i++) { + buf[i] = _mi_toupper(name[i]); + } + buf[len] = 0; + s = getenv(buf); + } + if (s == NULL || _mi_strnlen(s,result_size) >= result_size) return false; + _mi_strlcpy(result, s, result_size); + return true; +} \ No newline at end of file diff --git a/src/prim/prim-windows.c b/src/prim/prim-windows.c index b83082e3..97b1936f 100644 --- a/src/prim/prim-windows.c +++ b/src/prim/prim-windows.c @@ -10,6 +10,7 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc-atomic.h" #include "prim.h" #include // strerror +#include // fputs, stderr #ifdef _MSC_VER #pragma warning(disable:4996) // strerror @@ -407,10 +408,6 @@ mi_msecs_t _mi_prim_clock_now(void) { } -//---------------------------------------------------------------- -// Process info -//---------------------------------------------------------------- - #include #include @@ -455,3 +452,53 @@ void _mi_prim_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* current *peak_commit = (size_t)info.PeakPagefileUsage; *page_faults = (size_t)info.PageFaultCount; } + +//---------------------------------------------------------------- +// Output +//---------------------------------------------------------------- + +void _mi_prim_out_stderr( const char* msg ) +{ + // on windows with redirection, the C runtime cannot handle locale dependent output + // after the main thread closes so we use direct console output. + if (!_mi_preloading()) { + // _cputs(msg); // _cputs cannot be used at is aborts if it fails to lock the console + static HANDLE hcon = INVALID_HANDLE_VALUE; + static bool hconIsConsole; + if (hcon == INVALID_HANDLE_VALUE) { + CONSOLE_SCREEN_BUFFER_INFO sbi; + hcon = GetStdHandle(STD_ERROR_HANDLE); + hconIsConsole = ((hcon != INVALID_HANDLE_VALUE) && GetConsoleScreenBufferInfo(hcon, &sbi)); + } + const size_t len = strlen(msg); + if (len > 0 && len < UINT32_MAX) { + DWORD written = 0; + if (hconIsConsole) { + WriteConsoleA(hcon, msg, (DWORD)len, &written, NULL); + } + else if (hcon != INVALID_HANDLE_VALUE) { + // use direct write if stderr was redirected + WriteFile(hcon, msg, (DWORD)len, &written, NULL); + } + else { + // finally fall back to fputs after all + fputs(msg, stderr); + } + } + } +} + + +//---------------------------------------------------------------- +// Environment +//---------------------------------------------------------------- + +// On Windows use GetEnvironmentVariable instead of getenv to work +// reliably even when this is invoked before the C runtime is initialized. +// i.e. when `_mi_preloading() == true`. +// Note: on windows, environment names are not case sensitive. +bool _mi_prim_getenv(const char* name, char* result, size_t result_size) { + result[0] = 0; + size_t len = GetEnvironmentVariableA(name, result, (DWORD)result_size); + return (len > 0 && len < result_size); +} \ No newline at end of file diff --git a/src/prim/prim.h b/src/prim/prim.h index 14f75156..734d02bf 100644 --- a/src/prim/prim.h +++ b/src/prim/prim.h @@ -66,6 +66,13 @@ void _mi_prim_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults); +// Default stderr output. +// msg != NULL && strlen(msg) > 0 +void _mi_prim_out_stderr( const char* msg ); + +// Get an environment variable. +// name != NULL, result != NULL, result_size >= 64 +bool _mi_prim_getenv(const char* name, char* result, size_t result_size); #endif // MIMALLOC_PRIM_H From 4348a05d0f92d3197cbd26acdf641551d125121f Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Tue, 14 Mar 2023 18:24:38 -0700 Subject: [PATCH 10/42] small fixes --- src/options.c | 3 ++- src/prim/prim-unix.c | 8 +++++++- src/prim/prim-wasi.c | 10 ++++++++-- src/prim/prim-windows.c | 8 ++++++-- src/prim/prim.h | 23 +++++++++-------------- 5 files changed, 32 insertions(+), 20 deletions(-) diff --git a/src/options.c b/src/options.c index a93ea89e..4c76ae41 100644 --- a/src/options.c +++ b/src/options.c @@ -9,7 +9,8 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc-atomic.h" #include "prim/prim.h" // mi_prim_out_stderr -#include +#include // FILE +#include // abort #include diff --git a/src/prim/prim-unix.c b/src/prim/prim-unix.c index 495b274e..ce24c24a 100644 --- a/src/prim/prim-unix.c +++ b/src/prim/prim-unix.c @@ -504,7 +504,13 @@ mi_msecs_t _mi_prim_clock_now(void) { // low resolution timer mi_msecs_t _mi_prim_clock_now(void) { - return ((mi_msecs_t)clock() / ((mi_msecs_t)CLOCKS_PER_SEC / 1000)); + #if !defined(CLOCKS_PER_SEC) || (CLOCKS_PER_SEC == 1000) || (CLOCKS_PER_SEC == 0) + return (mi_msecs_t)clock(); + #elif (CLOCKS_PER_SEC < 1000) + return (mi_msecs_t)clock() * (1000 / (mi_msecs_t)CLOCKS_PER_SEC); + #else + return (mi_msecs_t)clock() / ((mi_msecs_t)CLOCKS_PER_SEC / 1000); + #endif } #endif diff --git a/src/prim/prim-wasi.c b/src/prim/prim-wasi.c index 7de491e4..c37b0847 100644 --- a/src/prim/prim-wasi.c +++ b/src/prim/prim-wasi.c @@ -110,7 +110,7 @@ static void* mi_prim_mem_grow(size_t size, size_t try_alignment) { // Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned. void* _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large) { - MI_UNUSED(allow_large); + MI_UNUSED(allow_large); MI_UNUSED(commit); *is_large = false; return mi_prim_mem_grow(size, try_alignment); } @@ -176,7 +176,13 @@ mi_msecs_t _mi_prim_clock_now(void) { // low resolution timer mi_msecs_t _mi_prim_clock_now(void) { - return ((mi_msecs_t)clock() / ((mi_msecs_t)CLOCKS_PER_SEC / 1000)); + #if !defined(CLOCKS_PER_SEC) || (CLOCKS_PER_SEC == 1000) || (CLOCKS_PER_SEC == 0) + return (mi_msecs_t)clock(); + #elif (CLOCKS_PER_SEC < 1000) + return (mi_msecs_t)clock() * (1000 / (mi_msecs_t)CLOCKS_PER_SEC); + #else + return (mi_msecs_t)clock() / ((mi_msecs_t)CLOCKS_PER_SEC / 1000); + #endif } #endif diff --git a/src/prim/prim-windows.c b/src/prim/prim-windows.c index 97b1936f..f6f1a9db 100644 --- a/src/prim/prim-windows.c +++ b/src/prim/prim-windows.c @@ -408,6 +408,10 @@ mi_msecs_t _mi_prim_clock_now(void) { } +//---------------------------------------------------------------- +// Process Info +//---------------------------------------------------------------- + #include #include @@ -470,7 +474,7 @@ void _mi_prim_out_stderr( const char* msg ) hcon = GetStdHandle(STD_ERROR_HANDLE); hconIsConsole = ((hcon != INVALID_HANDLE_VALUE) && GetConsoleScreenBufferInfo(hcon, &sbi)); } - const size_t len = strlen(msg); + const size_t len = _mi_strlen(msg); if (len > 0 && len < UINT32_MAX) { DWORD written = 0; if (hconIsConsole) { @@ -501,4 +505,4 @@ bool _mi_prim_getenv(const char* name, char* result, size_t result_size) { result[0] = 0; size_t len = GetEnvironmentVariableA(name, result, (DWORD)result_size); return (len > 0 && len < result_size); -} \ No newline at end of file +} diff --git a/src/prim/prim.h b/src/prim/prim.h index 734d02bf..2cbbc85c 100644 --- a/src/prim/prim.h +++ b/src/prim/prim.h @@ -9,9 +9,9 @@ terms of the MIT license. A copy of the license can be found in the file #define MIMALLOC_PRIM_H // note: on all primitive functions, we always get: -// addr != NULL and page aligned -// size > 0 and page aligned -// +// addr != NULL and page aligned +// size > 0 and page aligned + // OS memory configuration typedef struct mi_os_mem_config_s { @@ -26,12 +26,10 @@ typedef struct mi_os_mem_config_s { void _mi_prim_mem_init( mi_os_mem_config_t* config ); // Free OS memory -// pre: addr != NULL, size > 0 void _mi_prim_free(void* addr, size_t size ); -// Allocate OS memory. +// Allocate OS memory. Return NULL on error. // The `try_alignment` is just a hint and the returned pointer does not have to be aligned. -// return NULL on error. // pre: !commit => !allow_large // try_alignment >= _mi_os_page_size() and a power of 2 void* _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large); @@ -58,23 +56,20 @@ size_t _mi_prim_numa_node(void); // Return the number of logical NUMA nodes size_t _mi_prim_numa_node_count(void); -// High resolution clock +// Clock ticks mi_msecs_t _mi_prim_clock_now(void); -// Return process information +// Return process information (only for statistics) void _mi_prim_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults); -// Default stderr output. -// msg != NULL && strlen(msg) > 0 +// Default stderr output. (only for warnings etc. with verbose enabled) +// msg != NULL && _mi_strlen(msg) > 0 void _mi_prim_out_stderr( const char* msg ); -// Get an environment variable. +// Get an environment variable. (only for options) // name != NULL, result != NULL, result_size >= 64 bool _mi_prim_getenv(const char* name, char* result, size_t result_size); - #endif // MIMALLOC_PRIM_H - - From 3579d3b8617c858a3d2f9a6c25b0c13d34cf811a Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Tue, 14 Mar 2023 19:38:45 -0700 Subject: [PATCH 11/42] move mi_thread_id to primitives --- include/mimalloc-internal.h | 102 +-------------------------------- src/alloc.c | 8 +-- src/init.c | 5 ++ src/prim/prim.h | 110 +++++++++++++++++++++++++++++++++++- 4 files changed, 119 insertions(+), 106 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index ce6e6a6b..6e6da9f3 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -74,6 +74,7 @@ extern mi_decl_cache_align const mi_page_t _mi_page_empty; bool _mi_is_main_thread(void); size_t _mi_current_thread_count(void); bool _mi_preloading(void); // true while the C runtime is not ready +mi_threadid_t _mi_thread_id(void) mi_attr_noexcept; // os.c size_t _mi_os_page_size(void); @@ -754,107 +755,6 @@ static inline size_t _mi_os_numa_node_count(void) { } -// ------------------------------------------------------------------- -// Getting the thread id should be performant as it is called in the -// fast path of `_mi_free` and we specialize for various platforms. -// We only require _mi_threadid() to return a unique id for each thread. -// ------------------------------------------------------------------- -#if defined(_WIN32) - -#define WIN32_LEAN_AND_MEAN -#include -static inline mi_threadid_t _mi_thread_id(void) mi_attr_noexcept { - // Windows: works on Intel and ARM in both 32- and 64-bit - return (uintptr_t)NtCurrentTeb(); -} - -// We use assembly for a fast thread id on the main platforms. The TLS layout depends on -// both the OS and libc implementation so we use specific tests for each main platform. -// If you test on another platform and it works please send a PR :-) -// see also https://akkadia.org/drepper/tls.pdf for more info on the TLS register. -#elif defined(__GNUC__) && ( \ - (defined(__GLIBC__) && (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))) \ - || (defined(__APPLE__) && (defined(__x86_64__) || defined(__aarch64__))) \ - || (defined(__BIONIC__) && (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))) \ - || (defined(__FreeBSD__) && (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__))) \ - || (defined(__OpenBSD__) && (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__))) \ - ) - -static inline void* mi_tls_slot(size_t slot) mi_attr_noexcept { - void* res; - const size_t ofs = (slot*sizeof(void*)); - #if defined(__i386__) - __asm__("movl %%gs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86 32-bit always uses GS - #elif defined(__APPLE__) && defined(__x86_64__) - __asm__("movq %%gs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86_64 macOSX uses GS - #elif defined(__x86_64__) && (MI_INTPTR_SIZE==4) - __asm__("movl %%fs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x32 ABI - #elif defined(__x86_64__) - __asm__("movq %%fs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86_64 Linux, BSD uses FS - #elif defined(__arm__) - void** tcb; MI_UNUSED(ofs); - __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb)); - res = tcb[slot]; - #elif defined(__aarch64__) - void** tcb; MI_UNUSED(ofs); - #if defined(__APPLE__) // M1, issue #343 - __asm__ volatile ("mrs %0, tpidrro_el0\nbic %0, %0, #7" : "=r" (tcb)); - #else - __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb)); - #endif - res = tcb[slot]; - #endif - return res; -} - -// setting a tls slot is only used on macOS for now -static inline void mi_tls_slot_set(size_t slot, void* value) mi_attr_noexcept { - const size_t ofs = (slot*sizeof(void*)); - #if defined(__i386__) - __asm__("movl %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // 32-bit always uses GS - #elif defined(__APPLE__) && defined(__x86_64__) - __asm__("movq %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x86_64 macOS uses GS - #elif defined(__x86_64__) && (MI_INTPTR_SIZE==4) - __asm__("movl %1,%%fs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x32 ABI - #elif defined(__x86_64__) - __asm__("movq %1,%%fs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x86_64 Linux, BSD uses FS - #elif defined(__arm__) - void** tcb; MI_UNUSED(ofs); - __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb)); - tcb[slot] = value; - #elif defined(__aarch64__) - void** tcb; MI_UNUSED(ofs); - #if defined(__APPLE__) // M1, issue #343 - __asm__ volatile ("mrs %0, tpidrro_el0\nbic %0, %0, #7" : "=r" (tcb)); - #else - __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb)); - #endif - tcb[slot] = value; - #endif -} - -static inline mi_threadid_t _mi_thread_id(void) mi_attr_noexcept { - #if defined(__BIONIC__) - // issue #384, #495: on the Bionic libc (Android), slot 1 is the thread id - // see: https://github.com/aosp-mirror/platform_bionic/blob/c44b1d0676ded732df4b3b21c5f798eacae93228/libc/platform/bionic/tls_defines.h#L86 - return (uintptr_t)mi_tls_slot(1); - #else - // in all our other targets, slot 0 is the thread id - // glibc: https://sourceware.org/git/?p=glibc.git;a=blob_plain;f=sysdeps/x86_64/nptl/tls.h - // apple: https://github.com/apple/darwin-xnu/blob/main/libsyscall/os/tsd.h#L36 - return (uintptr_t)mi_tls_slot(0); - #endif -} - -#else - -// otherwise use portable C, taking the address of a thread local variable (this is still very fast on most platforms). -static inline mi_threadid_t _mi_thread_id(void) mi_attr_noexcept { - return (uintptr_t)&_mi_heap_default; -} - -#endif - // ----------------------------------------------------------------------- // Count bits: trailing or leading zeros (with MI_INTPTR_BITS on all zero) diff --git a/src/alloc.c b/src/alloc.c index 04c4c48c..d8276df9 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -11,10 +11,10 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc.h" #include "mimalloc-internal.h" #include "mimalloc-atomic.h" +#include "prim/prim.h" // _mi_prim_thread_id() - -#include // memset, strlen -#include // malloc, exit +#include // memset, strlen (for mi_strdup) +#include // malloc, abort #define MI_IN_ALLOC_C #include "alloc-override.c" @@ -536,7 +536,7 @@ void mi_free(void* p) mi_attr_noexcept { if mi_unlikely(p == NULL) return; mi_segment_t* const segment = mi_checked_ptr_segment(p,"mi_free"); - const bool is_local= (_mi_thread_id() == mi_atomic_load_relaxed(&segment->thread_id)); + const bool is_local= (_mi_prim_thread_id() == mi_atomic_load_relaxed(&segment->thread_id)); mi_page_t* const page = _mi_segment_page_of(segment, p); if mi_likely(is_local) { // thread-local free? diff --git a/src/init.c b/src/init.c index a2a6be75..dd555030 100644 --- a/src/init.c +++ b/src/init.c @@ -6,6 +6,7 @@ terms of the MIT license. A copy of the license can be found in the file -----------------------------------------------------------------------------*/ #include "mimalloc.h" #include "mimalloc-internal.h" +#include "prim/prim.h" #include // memcpy, memset #include // atexit @@ -103,6 +104,10 @@ mi_decl_cache_align const mi_heap_t _mi_heap_empty = { }; +mi_threadid_t _mi_thread_id(void) mi_attr_noexcept { + return _mi_prim_thread_id(); +} + // the thread-local default heap for allocation mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty; diff --git a/src/prim/prim.h b/src/prim/prim.h index 2cbbc85c..2d951930 100644 --- a/src/prim/prim.h +++ b/src/prim/prim.h @@ -12,7 +12,6 @@ terms of the MIT license. A copy of the license can be found in the file // addr != NULL and page aligned // size > 0 and page aligned - // OS memory configuration typedef struct mi_os_mem_config_s { size_t page_size; // 4KiB @@ -72,4 +71,113 @@ void _mi_prim_out_stderr( const char* msg ); // name != NULL, result != NULL, result_size >= 64 bool _mi_prim_getenv(const char* name, char* result, size_t result_size); + +//------------------------------------------------------------------- +// Thread id +// +// Getting the thread id should be performant as it is called in the +// fast path of `_mi_free` and we specialize for various platforms as +// inlined definitions. Regular code should call `init.c:_mi_thread_id()`. +// We only require _mi_prim_thread_id() to return a unique id for each thread. +//------------------------------------------------------------------- + +static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept; + +#if defined(_WIN32) + +#define WIN32_LEAN_AND_MEAN +#include +static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { + // Windows: works on Intel and ARM in both 32- and 64-bit + return (uintptr_t)NtCurrentTeb(); +} + +// We use assembly for a fast thread id on the main platforms. The TLS layout depends on +// both the OS and libc implementation so we use specific tests for each main platform. +// If you test on another platform and it works please send a PR :-) +// see also https://akkadia.org/drepper/tls.pdf for more info on the TLS register. +#elif defined(__GNUC__) && ( \ + (defined(__GLIBC__) && (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))) \ + || (defined(__APPLE__) && (defined(__x86_64__) || defined(__aarch64__))) \ + || (defined(__BIONIC__) && (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))) \ + || (defined(__FreeBSD__) && (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__))) \ + || (defined(__OpenBSD__) && (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__))) \ + ) + +static inline void* mi_tls_slot(size_t slot) mi_attr_noexcept { + void* res; + const size_t ofs = (slot*sizeof(void*)); + #if defined(__i386__) + __asm__("movl %%gs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86 32-bit always uses GS + #elif defined(__APPLE__) && defined(__x86_64__) + __asm__("movq %%gs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86_64 macOSX uses GS + #elif defined(__x86_64__) && (MI_INTPTR_SIZE==4) + __asm__("movl %%fs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x32 ABI + #elif defined(__x86_64__) + __asm__("movq %%fs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86_64 Linux, BSD uses FS + #elif defined(__arm__) + void** tcb; MI_UNUSED(ofs); + __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb)); + res = tcb[slot]; + #elif defined(__aarch64__) + void** tcb; MI_UNUSED(ofs); + #if defined(__APPLE__) // M1, issue #343 + __asm__ volatile ("mrs %0, tpidrro_el0\nbic %0, %0, #7" : "=r" (tcb)); + #else + __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb)); + #endif + res = tcb[slot]; + #endif + return res; +} + +// setting a tls slot is only used on macOS for now +static inline void mi_tls_slot_set(size_t slot, void* value) mi_attr_noexcept { + const size_t ofs = (slot*sizeof(void*)); + #if defined(__i386__) + __asm__("movl %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // 32-bit always uses GS + #elif defined(__APPLE__) && defined(__x86_64__) + __asm__("movq %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x86_64 macOS uses GS + #elif defined(__x86_64__) && (MI_INTPTR_SIZE==4) + __asm__("movl %1,%%fs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x32 ABI + #elif defined(__x86_64__) + __asm__("movq %1,%%fs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x86_64 Linux, BSD uses FS + #elif defined(__arm__) + void** tcb; MI_UNUSED(ofs); + __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb)); + tcb[slot] = value; + #elif defined(__aarch64__) + void** tcb; MI_UNUSED(ofs); + #if defined(__APPLE__) // M1, issue #343 + __asm__ volatile ("mrs %0, tpidrro_el0\nbic %0, %0, #7" : "=r" (tcb)); + #else + __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb)); + #endif + tcb[slot] = value; + #endif +} + +static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { + #if defined(__BIONIC__) + // issue #384, #495: on the Bionic libc (Android), slot 1 is the thread id + // see: https://github.com/aosp-mirror/platform_bionic/blob/c44b1d0676ded732df4b3b21c5f798eacae93228/libc/platform/bionic/tls_defines.h#L86 + return (uintptr_t)mi_tls_slot(1); + #else + // in all our other targets, slot 0 is the thread id + // glibc: https://sourceware.org/git/?p=glibc.git;a=blob_plain;f=sysdeps/x86_64/nptl/tls.h + // apple: https://github.com/apple/darwin-xnu/blob/main/libsyscall/os/tsd.h#L36 + return (uintptr_t)mi_tls_slot(0); + #endif +} + +#else + +// otherwise use portable C, taking the address of a thread local variable (this is still very fast on most platforms). +static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { + return (uintptr_t)&_mi_heap_default; +} + +#endif + + #endif // MIMALLOC_PRIM_H From 9b110090b2f79ab202c01bd806c867122f9d729f Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Tue, 14 Mar 2023 20:35:00 -0700 Subject: [PATCH 12/42] move threadid and mi_get_default_heap to primitives --- include/mimalloc-internal.h | 94 ++--------------------------------- src/alloc-aligned.c | 27 +++++----- src/alloc.c | 38 +++++++------- src/heap.c | 14 ++++-- src/init.c | 6 +-- src/os.c | 4 +- src/page.c | 5 +- src/prim/prim.h | 99 +++++++++++++++++++++++++++++++++++-- src/random.c | 2 + 9 files changed, 152 insertions(+), 137 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 6e6da9f3..5843f60f 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -73,8 +73,9 @@ extern mi_decl_cache_align mi_stats_t _mi_stats_main; extern mi_decl_cache_align const mi_page_t _mi_page_empty; bool _mi_is_main_thread(void); size_t _mi_current_thread_count(void); -bool _mi_preloading(void); // true while the C runtime is not ready +bool _mi_preloading(void); // true while the C runtime is not ready mi_threadid_t _mi_thread_id(void) mi_attr_noexcept; +mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing heap // os.c size_t _mi_os_page_size(void); @@ -329,93 +330,11 @@ static inline bool mi_count_size_overflow(size_t count, size_t size, size_t* tot } -/* ---------------------------------------------------------------------------------------- -The thread local default heap: `_mi_get_default_heap` returns the thread local heap. -On most platforms (Windows, Linux, FreeBSD, NetBSD, etc), this just returns a -__thread local variable (`_mi_heap_default`). With the initial-exec TLS model this ensures -that the storage will always be available (allocated on the thread stacks). -On some platforms though we cannot use that when overriding `malloc` since the underlying -TLS implementation (or the loader) will call itself `malloc` on a first access and recurse. -We try to circumvent this in an efficient way: -- macOSX : we use an unused TLS slot from the OS allocated slots (MI_TLS_SLOT). On OSX, the - loader itself calls `malloc` even before the modules are initialized. -- OpenBSD: we use an unused slot from the pthread block (MI_TLS_PTHREAD_SLOT_OFS). -- DragonFly: defaults are working but seem slow compared to freeBSD (see PR #323) +/*---------------------------------------------------------------------------------------- + Heap functions ------------------------------------------------------------------------------------------- */ extern const mi_heap_t _mi_heap_empty; // read-only empty heap, initial value of the thread local default heap -extern bool _mi_process_is_initialized; -mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing heap - -#if defined(MI_MALLOC_OVERRIDE) -#if defined(__APPLE__) // macOS -#define MI_TLS_SLOT 89 // seems unused? -// #define MI_TLS_RECURSE_GUARD 1 -// other possible unused ones are 9, 29, __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY4 (94), __PTK_FRAMEWORK_GC_KEY9 (112) and __PTK_FRAMEWORK_OLDGC_KEY9 (89) -// see -#elif defined(__OpenBSD__) -// use end bytes of a name; goes wrong if anyone uses names > 23 characters (ptrhread specifies 16) -// see -#define MI_TLS_PTHREAD_SLOT_OFS (6*sizeof(int) + 4*sizeof(void*) + 24) -// #elif defined(__DragonFly__) -// #warning "mimalloc is not working correctly on DragonFly yet." -// #define MI_TLS_PTHREAD_SLOT_OFS (4 + 1*sizeof(void*)) // offset `uniqueid` (also used by gdb?) -#elif defined(__ANDROID__) -// See issue #381 -#define MI_TLS_PTHREAD -#endif -#endif - -#if defined(MI_TLS_SLOT) -static inline void* mi_tls_slot(size_t slot) mi_attr_noexcept; // forward declaration -#elif defined(MI_TLS_PTHREAD_SLOT_OFS) -static inline mi_heap_t** mi_tls_pthread_heap_slot(void) { - pthread_t self = pthread_self(); - #if defined(__DragonFly__) - if (self==NULL) { - mi_heap_t* pheap_main = _mi_heap_main_get(); - return &pheap_main; - } - #endif - return (mi_heap_t**)((uint8_t*)self + MI_TLS_PTHREAD_SLOT_OFS); -} -#elif defined(MI_TLS_PTHREAD) -extern pthread_key_t _mi_heap_default_key; -#endif - -// Default heap to allocate from (if not using TLS- or pthread slots). -// Do not use this directly but use through `mi_heap_get_default()` (or the unchecked `mi_get_default_heap`). -// This thread local variable is only used when neither MI_TLS_SLOT, MI_TLS_PTHREAD, or MI_TLS_PTHREAD_SLOT_OFS are defined. -// However, on the Apple M1 we do use the address of this variable as the unique thread-id (issue #356). -extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from - -static inline mi_heap_t* mi_get_default_heap(void) { -#if defined(MI_TLS_SLOT) - mi_heap_t* heap = (mi_heap_t*)mi_tls_slot(MI_TLS_SLOT); - if mi_unlikely(heap == NULL) { - #ifdef __GNUC__ - __asm(""); // prevent conditional load of the address of _mi_heap_empty - #endif - heap = (mi_heap_t*)&_mi_heap_empty; - } - return heap; -#elif defined(MI_TLS_PTHREAD_SLOT_OFS) - mi_heap_t* heap = *mi_tls_pthread_heap_slot(); - return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap); -#elif defined(MI_TLS_PTHREAD) - mi_heap_t* heap = (mi_unlikely(_mi_heap_default_key == (pthread_key_t)(-1)) ? _mi_heap_main_get() : (mi_heap_t*)pthread_getspecific(_mi_heap_default_key)); - return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap); -#else - #if defined(MI_TLS_RECURSE_GUARD) - if (mi_unlikely(!_mi_process_is_initialized)) return _mi_heap_main_get(); - #endif - return _mi_heap_default; -#endif -} - -static inline bool mi_heap_is_default(const mi_heap_t* heap) { - return (heap == mi_get_default_heap()); -} static inline bool mi_heap_is_backing(const mi_heap_t* heap) { return (heap->tld->heap_backing == heap); @@ -443,11 +362,6 @@ static inline mi_page_t* _mi_heap_get_free_small_page(mi_heap_t* heap, size_t si return heap->pages_free_direct[idx]; } -// Get the page belonging to a certain size class -static inline mi_page_t* _mi_get_free_small_page(size_t size) { - return _mi_heap_get_free_small_page(mi_get_default_heap(), size); -} - // Segment that contains the pointer // Large aligned blocks may be aligned at N*MI_SEGMENT_SIZE (inside a huge segment > MI_SEGMENT_SIZE), // and we need align "down" to the segment info which is `MI_SEGMENT_SIZE` bytes before it; diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index 08ad9814..aa9b7bd0 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -7,8 +7,9 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc.h" #include "mimalloc-internal.h" +#include "prim/prim.h" // mi_prim_get_default_heap -#include // memset +#include // memset // ------------------------------------------------------ // Aligned Allocation @@ -187,27 +188,27 @@ mi_decl_nodiscard mi_decl_restrict void* mi_heap_calloc_aligned(mi_heap_t* heap, } mi_decl_nodiscard mi_decl_restrict void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept { - return mi_heap_malloc_aligned_at(mi_get_default_heap(), size, alignment, offset); + return mi_heap_malloc_aligned_at(mi_prim_get_default_heap(), size, alignment, offset); } mi_decl_nodiscard mi_decl_restrict void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept { - return mi_heap_malloc_aligned(mi_get_default_heap(), size, alignment); + return mi_heap_malloc_aligned(mi_prim_get_default_heap(), size, alignment); } mi_decl_nodiscard mi_decl_restrict void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept { - return mi_heap_zalloc_aligned_at(mi_get_default_heap(), size, alignment, offset); + return mi_heap_zalloc_aligned_at(mi_prim_get_default_heap(), size, alignment, offset); } mi_decl_nodiscard mi_decl_restrict void* mi_zalloc_aligned(size_t size, size_t alignment) mi_attr_noexcept { - return mi_heap_zalloc_aligned(mi_get_default_heap(), size, alignment); + return mi_heap_zalloc_aligned(mi_prim_get_default_heap(), size, alignment); } mi_decl_nodiscard mi_decl_restrict void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { - return mi_heap_calloc_aligned_at(mi_get_default_heap(), count, size, alignment, offset); + return mi_heap_calloc_aligned_at(mi_prim_get_default_heap(), count, size, alignment, offset); } mi_decl_nodiscard mi_decl_restrict void* mi_calloc_aligned(size_t count, size_t size, size_t alignment) mi_attr_noexcept { - return mi_heap_calloc_aligned(mi_get_default_heap(), count, size, alignment); + return mi_heap_calloc_aligned(mi_prim_get_default_heap(), count, size, alignment); } @@ -282,25 +283,25 @@ mi_decl_nodiscard void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_ } mi_decl_nodiscard void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept { - return mi_heap_realloc_aligned_at(mi_get_default_heap(), p, newsize, alignment, offset); + return mi_heap_realloc_aligned_at(mi_prim_get_default_heap(), p, newsize, alignment, offset); } mi_decl_nodiscard void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept { - return mi_heap_realloc_aligned(mi_get_default_heap(), p, newsize, alignment); + return mi_heap_realloc_aligned(mi_prim_get_default_heap(), p, newsize, alignment); } mi_decl_nodiscard void* mi_rezalloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept { - return mi_heap_rezalloc_aligned_at(mi_get_default_heap(), p, newsize, alignment, offset); + return mi_heap_rezalloc_aligned_at(mi_prim_get_default_heap(), p, newsize, alignment, offset); } mi_decl_nodiscard void* mi_rezalloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept { - return mi_heap_rezalloc_aligned(mi_get_default_heap(), p, newsize, alignment); + return mi_heap_rezalloc_aligned(mi_prim_get_default_heap(), p, newsize, alignment); } mi_decl_nodiscard void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { - return mi_heap_recalloc_aligned_at(mi_get_default_heap(), p, newcount, size, alignment, offset); + return mi_heap_recalloc_aligned_at(mi_prim_get_default_heap(), p, newcount, size, alignment, offset); } mi_decl_nodiscard void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept { - return mi_heap_recalloc_aligned(mi_get_default_heap(), p, newcount, size, alignment); + return mi_heap_recalloc_aligned(mi_prim_get_default_heap(), p, newcount, size, alignment); } diff --git a/src/alloc.c b/src/alloc.c index d8276df9..0bda4db8 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -106,7 +106,7 @@ static inline mi_decl_restrict void* mi_heap_malloc_small_zero(mi_heap_t* heap, mi_track_malloc(p,size,zero); #if MI_STAT>1 if (p != NULL) { - if (!mi_heap_is_initialized(heap)) { heap = mi_get_default_heap(); } + if (!mi_heap_is_initialized(heap)) { heap = mi_prim_get_default_heap(); } mi_heap_stat_increase(heap, malloc, mi_usable_size(p)); } #endif @@ -119,7 +119,7 @@ mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_malloc_small(mi_h } mi_decl_nodiscard extern inline mi_decl_restrict void* mi_malloc_small(size_t size) mi_attr_noexcept { - return mi_heap_malloc_small(mi_get_default_heap(), size); + return mi_heap_malloc_small(mi_prim_get_default_heap(), size); } // The main allocation function @@ -135,7 +135,7 @@ extern inline void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool z mi_track_malloc(p,size,zero); #if MI_STAT>1 if (p != NULL) { - if (!mi_heap_is_initialized(heap)) { heap = mi_get_default_heap(); } + if (!mi_heap_is_initialized(heap)) { heap = mi_prim_get_default_heap(); } mi_heap_stat_increase(heap, malloc, mi_usable_size(p)); } #endif @@ -152,12 +152,12 @@ mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_malloc(mi_heap_t* } mi_decl_nodiscard extern inline mi_decl_restrict void* mi_malloc(size_t size) mi_attr_noexcept { - return mi_heap_malloc(mi_get_default_heap(), size); + return mi_heap_malloc(mi_prim_get_default_heap(), size); } // zero initialized small block mi_decl_nodiscard mi_decl_restrict void* mi_zalloc_small(size_t size) mi_attr_noexcept { - return mi_heap_malloc_small_zero(mi_get_default_heap(), size, true); + return mi_heap_malloc_small_zero(mi_prim_get_default_heap(), size, true); } mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_zalloc(mi_heap_t* heap, size_t size) mi_attr_noexcept { @@ -165,7 +165,7 @@ mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_zalloc(mi_heap_t* } mi_decl_nodiscard mi_decl_restrict void* mi_zalloc(size_t size) mi_attr_noexcept { - return mi_heap_zalloc(mi_get_default_heap(),size); + return mi_heap_zalloc(mi_prim_get_default_heap(),size); } @@ -649,7 +649,7 @@ mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_calloc(mi_heap_t* } mi_decl_nodiscard mi_decl_restrict void* mi_calloc(size_t count, size_t size) mi_attr_noexcept { - return mi_heap_calloc(mi_get_default_heap(),count,size); + return mi_heap_calloc(mi_prim_get_default_heap(),count,size); } // Uninitialized `calloc` @@ -660,7 +660,7 @@ mi_decl_nodiscard extern mi_decl_restrict void* mi_heap_mallocn(mi_heap_t* heap, } mi_decl_nodiscard mi_decl_restrict void* mi_mallocn(size_t count, size_t size) mi_attr_noexcept { - return mi_heap_mallocn(mi_get_default_heap(),count,size); + return mi_heap_mallocn(mi_prim_get_default_heap(),count,size); } // Expand (or shrink) in place (or fail) @@ -737,24 +737,24 @@ mi_decl_nodiscard void* mi_heap_recalloc(mi_heap_t* heap, void* p, size_t count, mi_decl_nodiscard void* mi_realloc(void* p, size_t newsize) mi_attr_noexcept { - return mi_heap_realloc(mi_get_default_heap(),p,newsize); + return mi_heap_realloc(mi_prim_get_default_heap(),p,newsize); } mi_decl_nodiscard void* mi_reallocn(void* p, size_t count, size_t size) mi_attr_noexcept { - return mi_heap_reallocn(mi_get_default_heap(),p,count,size); + return mi_heap_reallocn(mi_prim_get_default_heap(),p,count,size); } // Reallocate but free `p` on errors mi_decl_nodiscard void* mi_reallocf(void* p, size_t newsize) mi_attr_noexcept { - return mi_heap_reallocf(mi_get_default_heap(),p,newsize); + return mi_heap_reallocf(mi_prim_get_default_heap(),p,newsize); } mi_decl_nodiscard void* mi_rezalloc(void* p, size_t newsize) mi_attr_noexcept { - return mi_heap_rezalloc(mi_get_default_heap(), p, newsize); + return mi_heap_rezalloc(mi_prim_get_default_heap(), p, newsize); } mi_decl_nodiscard void* mi_recalloc(void* p, size_t count, size_t size) mi_attr_noexcept { - return mi_heap_recalloc(mi_get_default_heap(), p, count, size); + return mi_heap_recalloc(mi_prim_get_default_heap(), p, count, size); } @@ -775,7 +775,7 @@ mi_decl_nodiscard mi_decl_restrict char* mi_heap_strdup(mi_heap_t* heap, const c } mi_decl_nodiscard mi_decl_restrict char* mi_strdup(const char* s) mi_attr_noexcept { - return mi_heap_strdup(mi_get_default_heap(), s); + return mi_heap_strdup(mi_prim_get_default_heap(), s); } // `strndup` using mi_malloc @@ -792,7 +792,7 @@ mi_decl_nodiscard mi_decl_restrict char* mi_heap_strndup(mi_heap_t* heap, const } mi_decl_nodiscard mi_decl_restrict char* mi_strndup(const char* s, size_t n) mi_attr_noexcept { - return mi_heap_strndup(mi_get_default_heap(),s,n); + return mi_heap_strndup(mi_prim_get_default_heap(),s,n); } #ifndef __wasi__ @@ -861,7 +861,7 @@ char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) #endif mi_decl_nodiscard mi_decl_restrict char* mi_realpath(const char* fname, char* resolved_name) mi_attr_noexcept { - return mi_heap_realpath(mi_get_default_heap(),fname,resolved_name); + return mi_heap_realpath(mi_prim_get_default_heap(),fname,resolved_name); } #endif @@ -937,7 +937,7 @@ mi_decl_export mi_decl_noinline void* mi_heap_try_new(mi_heap_t* heap, size_t si } static mi_decl_noinline void* mi_try_new(size_t size, bool nothrow) { - return mi_heap_try_new(mi_get_default_heap(), size, nothrow); + return mi_heap_try_new(mi_prim_get_default_heap(), size, nothrow); } @@ -948,7 +948,7 @@ mi_decl_nodiscard mi_decl_restrict void* mi_heap_alloc_new(mi_heap_t* heap, size } mi_decl_nodiscard mi_decl_restrict void* mi_new(size_t size) { - return mi_heap_alloc_new(mi_get_default_heap(), size); + return mi_heap_alloc_new(mi_prim_get_default_heap(), size); } @@ -964,7 +964,7 @@ mi_decl_nodiscard mi_decl_restrict void* mi_heap_alloc_new_n(mi_heap_t* heap, si } mi_decl_nodiscard mi_decl_restrict void* mi_new_n(size_t count, size_t size) { - return mi_heap_alloc_new_n(mi_get_default_heap(), size, count); + return mi_heap_alloc_new_n(mi_prim_get_default_heap(), size, count); } diff --git a/src/heap.c b/src/heap.c index 94fed2b5..b12a9962 100644 --- a/src/heap.c +++ b/src/heap.c @@ -9,6 +9,7 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc-internal.h" #include "mimalloc-atomic.h" #include "mimalloc-track.h" +#include "prim/prim.h" // mi_prim_get_default_heap #include // memset, memcpy @@ -168,7 +169,7 @@ void mi_heap_collect(mi_heap_t* heap, bool force) mi_attr_noexcept { } void mi_collect(bool force) mi_attr_noexcept { - mi_heap_collect(mi_get_default_heap(), force); + mi_heap_collect(mi_prim_get_default_heap(), force); } @@ -178,9 +179,14 @@ void mi_collect(bool force) mi_attr_noexcept { mi_heap_t* mi_heap_get_default(void) { mi_thread_init(); - return mi_get_default_heap(); + return mi_prim_get_default_heap(); } +static bool mi_heap_is_default(const mi_heap_t* heap) { + return (heap == mi_prim_get_default_heap()); +} + + mi_heap_t* mi_heap_get_backing(void) { mi_heap_t* heap = mi_heap_get_default(); mi_assert_internal(heap!=NULL); @@ -418,7 +424,7 @@ mi_heap_t* mi_heap_set_default(mi_heap_t* heap) { mi_assert(mi_heap_is_initialized(heap)); if (heap==NULL || !mi_heap_is_initialized(heap)) return NULL; mi_assert_expensive(mi_heap_is_valid(heap)); - mi_heap_t* old = mi_get_default_heap(); + mi_heap_t* old = mi_prim_get_default_heap(); _mi_heap_set_default_direct(heap); return old; } @@ -468,7 +474,7 @@ bool mi_heap_check_owned(mi_heap_t* heap, const void* p) { } bool mi_check_owned(const void* p) { - return mi_heap_check_owned(mi_get_default_heap(), p); + return mi_heap_check_owned(mi_prim_get_default_heap(), p); } /* ----------------------------------------------------------- diff --git a/src/init.c b/src/init.c index dd555030..2d8f0df2 100644 --- a/src/init.c +++ b/src/init.c @@ -239,13 +239,13 @@ static void mi_thread_data_collect(void) { // Initialize the thread local default heap, called from `mi_thread_init` static bool _mi_heap_init(void) { - if (mi_heap_is_initialized(mi_get_default_heap())) return true; + if (mi_heap_is_initialized(mi_prim_get_default_heap())) return true; if (_mi_is_main_thread()) { // mi_assert_internal(_mi_heap_main.thread_id != 0); // can happen on freeBSD where alloc is called before any initialization // the main heap is statically allocated mi_heap_main_init(); _mi_heap_set_default_direct(&_mi_heap_main); - //mi_assert_internal(_mi_heap_default->tld->heap_backing == mi_get_default_heap()); + //mi_assert_internal(_mi_heap_default->tld->heap_backing == mi_prim_get_default_heap()); } else { // use `_mi_os_alloc` to allocate directly from the OS @@ -418,7 +418,7 @@ void mi_thread_init(void) mi_attr_noexcept } void mi_thread_done(void) mi_attr_noexcept { - _mi_thread_done(mi_get_default_heap()); + _mi_thread_done(mi_prim_get_default_heap()); } static void _mi_thread_done(mi_heap_t* heap) { diff --git a/src/os.c b/src/os.c index 8ef72e04..8fb7b781 100644 --- a/src/os.c +++ b/src/os.c @@ -120,7 +120,7 @@ void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size) if (hint == 0 || hint > MI_HINT_MAX) { // wrap or initialize uintptr_t init = MI_HINT_BASE; #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of aligned allocations unless in debug mode - uintptr_t r = _mi_heap_random_next(mi_get_default_heap()); + uintptr_t r = _mi_heap_random_next(mi_prim_get_default_heap()); init = init + ((MI_SEGMENT_SIZE * ((r>>17) & 0xFFFFF)) % MI_HINT_AREA); // (randomly 20 bits)*4MiB == 0 to 4TiB #endif uintptr_t expected = hint + size; @@ -483,7 +483,7 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) { // Initialize the start address after the 32TiB area start = ((uintptr_t)32 << 40); // 32TiB virtual start address #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of huge pages unless in debug mode - uintptr_t r = _mi_heap_random_next(mi_get_default_heap()); + uintptr_t r = _mi_heap_random_next(mi_prim_get_default_heap()); start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x0FFF)); // (randomly 12bits)*1GiB == between 0 to 4TiB #endif } diff --git a/src/page.c b/src/page.c index 46dfd26f..1347c845 100644 --- a/src/page.c +++ b/src/page.c @@ -103,6 +103,8 @@ static bool mi_page_is_valid_init(mi_page_t* page) { return true; } +extern bool _mi_process_is_initialized; // has mi_process_init been called? + bool _mi_page_is_valid(mi_page_t* page) { mi_assert_internal(mi_page_is_valid_init(page)); #if MI_SECURE @@ -873,8 +875,7 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_al // initialize if necessary if mi_unlikely(!mi_heap_is_initialized(heap)) { - mi_thread_init(); // calls `_mi_heap_init` in turn - heap = mi_get_default_heap(); + heap = mi_heap_get_default(); // calls mi_thread_init if mi_unlikely(!mi_heap_is_initialized(heap)) { return NULL; } } mi_assert_internal(mi_heap_is_initialized(heap)); diff --git a/src/prim/prim.h b/src/prim/prim.h index 2d951930..272fd72f 100644 --- a/src/prim/prim.h +++ b/src/prim/prim.h @@ -104,7 +104,7 @@ static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { || (defined(__OpenBSD__) && (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__))) \ ) -static inline void* mi_tls_slot(size_t slot) mi_attr_noexcept { +static inline void* mi_prim_tls_slot(size_t slot) mi_attr_noexcept { void* res; const size_t ofs = (slot*sizeof(void*)); #if defined(__i386__) @@ -132,7 +132,7 @@ static inline void* mi_tls_slot(size_t slot) mi_attr_noexcept { } // setting a tls slot is only used on macOS for now -static inline void mi_tls_slot_set(size_t slot, void* value) mi_attr_noexcept { +static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexcept { const size_t ofs = (slot*sizeof(void*)); #if defined(__i386__) __asm__("movl %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // 32-bit always uses GS @@ -161,12 +161,12 @@ static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { #if defined(__BIONIC__) // issue #384, #495: on the Bionic libc (Android), slot 1 is the thread id // see: https://github.com/aosp-mirror/platform_bionic/blob/c44b1d0676ded732df4b3b21c5f798eacae93228/libc/platform/bionic/tls_defines.h#L86 - return (uintptr_t)mi_tls_slot(1); + return (uintptr_t)mi_prim_tls_slot(1); #else // in all our other targets, slot 0 is the thread id // glibc: https://sourceware.org/git/?p=glibc.git;a=blob_plain;f=sysdeps/x86_64/nptl/tls.h // apple: https://github.com/apple/darwin-xnu/blob/main/libsyscall/os/tsd.h#L36 - return (uintptr_t)mi_tls_slot(0); + return (uintptr_t)mi_prim_tls_slot(0); #endif } @@ -180,4 +180,95 @@ static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { #endif + +/* ---------------------------------------------------------------------------------------- +The thread local default heap: `_mi_prim_get_default_heap()` +This is inlined here as it is on the fast path for allocation functions. + +On most platforms (Windows, Linux, FreeBSD, NetBSD, etc), this just returns a +__thread local variable (`_mi_heap_default`). With the initial-exec TLS model this ensures +that the storage will always be available (allocated on the thread stacks). +On some platforms though we cannot use that when overriding `malloc` since the underlying +TLS implementation (or the loader) will call itself `malloc` on a first access and recurse. +We try to circumvent this in an efficient way: +- macOSX : we use an unused TLS slot from the OS allocated slots (MI_TLS_SLOT). On OSX, the + loader itself calls `malloc` even before the modules are initialized. +- OpenBSD: we use an unused slot from the pthread block (MI_TLS_PTHREAD_SLOT_OFS). +- DragonFly: defaults are working but seem slow compared to freeBSD (see PR #323) +------------------------------------------------------------------------------------------- */ + +// defined in `init.c`; do not use these directly +extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from +extern bool _mi_process_is_initialized; // has mi_process_init been called? + +static inline mi_heap_t* mi_prim_get_default_heap(void); + +#if defined(MI_MALLOC_OVERRIDE) +#if defined(__APPLE__) // macOS + #define MI_TLS_SLOT 89 // seems unused? + // #define MI_TLS_RECURSE_GUARD 1 + // other possible unused ones are 9, 29, __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY4 (94), __PTK_FRAMEWORK_GC_KEY9 (112) and __PTK_FRAMEWORK_OLDGC_KEY9 (89) + // see +#elif defined(__OpenBSD__) + // use end bytes of a name; goes wrong if anyone uses names > 23 characters (ptrhread specifies 16) + // see + #define MI_TLS_PTHREAD_SLOT_OFS (6*sizeof(int) + 4*sizeof(void*) + 24) + // #elif defined(__DragonFly__) + // #warning "mimalloc is not working correctly on DragonFly yet." + // #define MI_TLS_PTHREAD_SLOT_OFS (4 + 1*sizeof(void*)) // offset `uniqueid` (also used by gdb?) +#elif defined(__ANDROID__) + // See issue #381 + #define MI_TLS_PTHREAD +#endif +#endif + + +#if defined(MI_TLS_SLOT) + +static inline mi_heap_t* mi_prim_get_default_heap(void) { + mi_heap_t* heap = (mi_heap_t*)mi_prim_tls_slot(MI_TLS_SLOT); + if mi_unlikely(heap == NULL) { + #ifdef __GNUC__ + __asm(""); // prevent conditional load of the address of _mi_heap_empty + #endif + heap = (mi_heap_t*)&_mi_heap_empty; + } + return heap; +} + +#elif defined(MI_TLS_PTHREAD_SLOT_OFS) + +static inline mi_heap_t* mi_prim_get_default_heap(void) { + mi_heap_t* heap; + pthread_t self = pthread_self(); + #if defined(__DragonFly__) + if (self==NULL) { heap = _mi_heap_main_get(); } else + #endif + { + heap = *((mi_heap_t**)((uint8_t*)self + MI_TLS_PTHREAD_SLOT_OFS)); + } + return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap); +} + +#elif defined(MI_TLS_PTHREAD) + +extern pthread_key_t _mi_heap_default_key; +static inline mi_heap_t* mi_prim_get_default_heap(void) { + mi_heap_t* heap = (mi_unlikely(_mi_heap_default_key == (pthread_key_t)(-1)) ? _mi_heap_main_get() : (mi_heap_t*)pthread_getspecific(_mi_heap_default_key)); + return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap); +} + +#else // default using a thread local variable; used on most platforms. + +static inline mi_heap_t* mi_prim_get_default_heap(void) { + #if defined(MI_TLS_RECURSE_GUARD) + if (mi_unlikely(!_mi_process_is_initialized)) return _mi_heap_main_get(); + #endif + return _mi_heap_default; +} + +#endif // mi_prim_get_default_heap() + + + #endif // MIMALLOC_PRIM_H diff --git a/src/random.c b/src/random.c index 06d4ba4a..ad14bf1f 100644 --- a/src/random.c +++ b/src/random.c @@ -168,6 +168,8 @@ If we cannot get good randomness, we fall back to weak randomness based on a tim #if defined(_WIN32) +#include + #if defined(MI_USE_RTLGENRANDOM) // || defined(__cplusplus) // We prefer to use BCryptGenRandom instead of (the unofficial) RtlGenRandom but when using // dynamic overriding, we observed it can raise an exception when compiled with C++, and From 973268bf1ed92713b413defd5de27d7d3309fd7f Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Wed, 15 Mar 2023 12:40:18 -0700 Subject: [PATCH 13/42] move random initialization to primitives --- src/prim/prim-unix.c | 90 +++++++++++++++++++++- src/prim/prim-wasi.c | 11 ++- src/prim/prim-windows.c | 42 +++++++++++ src/prim/prim.h | 4 + src/random.c | 162 ++-------------------------------------- 5 files changed, 150 insertions(+), 159 deletions(-) diff --git a/src/prim/prim-unix.c b/src/prim/prim-unix.c index ce24c24a..6b593fd3 100644 --- a/src/prim/prim-unix.c +++ b/src/prim/prim-unix.c @@ -6,7 +6,7 @@ terms of the MIT license. A copy of the license can be found in the file -----------------------------------------------------------------------------*/ #ifndef _DEFAULT_SOURCE -#define _DEFAULT_SOURCE // ensure mmap flags are defined +#define _DEFAULT_SOURCE // ensure mmap flags and syscall are defined #endif #if defined(__sun) @@ -661,3 +661,91 @@ bool _mi_prim_getenv(const char* name, char* result, size_t result_size) { return true; } #endif // !MI_USE_ENVIRON + + +//---------------------------------------------------------------- +// Random +//---------------------------------------------------------------- + +#if defined(__APPLE__) + +#include +#if defined(MAC_OS_X_VERSION_10_10) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_10 +#include +#include +#endif +bool _mi_prim_random_buf(void* buf, size_t buf_len) { + #if defined(MAC_OS_X_VERSION_10_15) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_15 + // We prefere CCRandomGenerateBytes as it returns an error code while arc4random_buf + // may fail silently on macOS. See PR #390, and + return (CCRandomGenerateBytes(buf, buf_len) == kCCSuccess); + #else + // fall back on older macOS + arc4random_buf(buf, buf_len); + return true; + #endif +} + +#elif defined(__ANDROID__) || defined(__DragonFly__) || \ + defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \ + defined(__sun) + +#include +bool _mi_prim_random_buf(void* buf, size_t buf_len) { + arc4random_buf(buf, buf_len); + return true; +} + +#elif defined(__linux__) || defined(__HAIKU__) + +#if defined(__linux__) +#include +#endif +#include +#include +#include +#include +bool _mi_prim_random_buf(void* buf, size_t buf_len) { + // Modern Linux provides `getrandom` but different distributions either use `sys/random.h` or `linux/random.h` + // and for the latter the actual `getrandom` call is not always defined. + // (see ) + // We therefore use a syscall directly and fall back dynamically to /dev/urandom when needed. + #ifdef SYS_getrandom + #ifndef GRND_NONBLOCK + #define GRND_NONBLOCK (1) + #endif + static _Atomic(uintptr_t) no_getrandom; // = 0 + if (mi_atomic_load_acquire(&no_getrandom)==0) { + ssize_t ret = syscall(SYS_getrandom, buf, buf_len, GRND_NONBLOCK); + if (ret >= 0) return (buf_len == (size_t)ret); + if (errno != ENOSYS) return false; + mi_atomic_store_release(&no_getrandom, 1UL); // don't call again, and fall back to /dev/urandom + } + #endif + int flags = O_RDONLY; + #if defined(O_CLOEXEC) + flags |= O_CLOEXEC; + #endif + int fd = open("/dev/urandom", flags, 0); + if (fd < 0) return false; + size_t count = 0; + while(count < buf_len) { + ssize_t ret = read(fd, (char*)buf + count, buf_len - count); + if (ret<=0) { + if (errno!=EAGAIN && errno!=EINTR) break; + } + else { + count += ret; + } + } + close(fd); + return (count==buf_len); +} + +#else + +bool _mi_prim_random_buf(void* buf, size_t buf_len) { + return false; +} + +#endif \ No newline at end of file diff --git a/src/prim/prim-wasi.c b/src/prim/prim-wasi.c index c37b0847..38efece0 100644 --- a/src/prim/prim-wasi.c +++ b/src/prim/prim-wasi.c @@ -233,4 +233,13 @@ bool _mi_prim_getenv(const char* name, char* result, size_t result_size) { if (s == NULL || _mi_strnlen(s,result_size) >= result_size) return false; _mi_strlcpy(result, s, result_size); return true; -} \ No newline at end of file +} + + +//---------------------------------------------------------------- +// Random +//---------------------------------------------------------------- + +bool _mi_prim_random_buf(void* buf, size_t buf_len) { + return false; +} diff --git a/src/prim/prim-windows.c b/src/prim/prim-windows.c index f6f1a9db..3b791de7 100644 --- a/src/prim/prim-windows.c +++ b/src/prim/prim-windows.c @@ -506,3 +506,45 @@ bool _mi_prim_getenv(const char* name, char* result, size_t result_size) { size_t len = GetEnvironmentVariableA(name, result, (DWORD)result_size); return (len > 0 && len < result_size); } + + + +//---------------------------------------------------------------- +// Random +//---------------------------------------------------------------- + +#if defined(MI_USE_RTLGENRANDOM) // || defined(__cplusplus) +// We prefer to use BCryptGenRandom instead of (the unofficial) RtlGenRandom but when using +// dynamic overriding, we observed it can raise an exception when compiled with C++, and +// sometimes deadlocks when also running under the VS debugger. +// In contrast, issue #623 implies that on Windows Server 2019 we need to use BCryptGenRandom. +// To be continued.. +#pragma comment (lib,"advapi32.lib") +#define RtlGenRandom SystemFunction036 +mi_decl_externc BOOLEAN NTAPI RtlGenRandom(PVOID RandomBuffer, ULONG RandomBufferLength); + +bool _mi_prim_random_buf(void* buf, size_t buf_len) { + return (RtlGenRandom(buf, (ULONG)buf_len) != 0); +} + +#else + +#ifndef BCRYPT_USE_SYSTEM_PREFERRED_RNG +#define BCRYPT_USE_SYSTEM_PREFERRED_RNG 0x00000002 +#endif + +typedef LONG (NTAPI *PBCryptGenRandom)(HANDLE, PUCHAR, ULONG, ULONG); +static PBCryptGenRandom pBCryptGenRandom = NULL; + +bool _mi_prim_random_buf(void* buf, size_t buf_len) { + if (pBCryptGenRandom == NULL) { + HINSTANCE hDll = LoadLibrary(TEXT("bcrypt.dll")); + if (hDll != NULL) { + pBCryptGenRandom = (PBCryptGenRandom)(void (*)(void))GetProcAddress(hDll, "BCryptGenRandom"); + } + if (pBCryptGenRandom == NULL) return false; + } + return (pBCryptGenRandom(NULL, (PUCHAR)buf, (ULONG)buf_len, BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0); +} + +#endif // MI_USE_RTLGENRANDOM diff --git a/src/prim/prim.h b/src/prim/prim.h index 272fd72f..c067046d 100644 --- a/src/prim/prim.h +++ b/src/prim/prim.h @@ -72,6 +72,10 @@ void _mi_prim_out_stderr( const char* msg ); bool _mi_prim_getenv(const char* name, char* result, size_t result_size); +// Fill a buffer with strong randomness; return `false` on error or if +// there is no strong randomization available. +bool _mi_prim_random_buf(void* buf, size_t buf_len); + //------------------------------------------------------------------- // Thread id // diff --git a/src/random.c b/src/random.c index ad14bf1f..3c8372c8 100644 --- a/src/random.c +++ b/src/random.c @@ -4,14 +4,10 @@ This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ -#ifndef _DEFAULT_SOURCE -#define _DEFAULT_SOURCE // for syscall() on Linux -#endif - #include "mimalloc.h" #include "mimalloc-internal.h" - -#include // memset +#include "prim/prim.h" // _mi_prim_random_buf +#include // memset /* ---------------------------------------------------------------------------- We use our own PRNG to keep predictable performance of random number generation @@ -158,161 +154,13 @@ uintptr_t _mi_random_next(mi_random_ctx_t* ctx) { /* ---------------------------------------------------------------------------- -To initialize a fresh random context we rely on the OS: -- Windows : BCryptGenRandom (or RtlGenRandom) -- macOS : CCRandomGenerateBytes, arc4random_buf -- bsd,wasi : arc4random_buf -- Linux : getrandom,/dev/urandom +To initialize a fresh random context. If we cannot get good randomness, we fall back to weak randomness based on a timer and ASLR. -----------------------------------------------------------------------------*/ -#if defined(_WIN32) - -#include - -#if defined(MI_USE_RTLGENRANDOM) // || defined(__cplusplus) -// We prefer to use BCryptGenRandom instead of (the unofficial) RtlGenRandom but when using -// dynamic overriding, we observed it can raise an exception when compiled with C++, and -// sometimes deadlocks when also running under the VS debugger. -// In contrast, issue #623 implies that on Windows Server 2019 we need to use BCryptGenRandom. -// To be continued.. -#pragma comment (lib,"advapi32.lib") -#define RtlGenRandom SystemFunction036 -#ifdef __cplusplus -extern "C" { -#endif -BOOLEAN NTAPI RtlGenRandom(PVOID RandomBuffer, ULONG RandomBufferLength); -#ifdef __cplusplus -} -#endif -static bool os_random_buf(void* buf, size_t buf_len) { - return (RtlGenRandom(buf, (ULONG)buf_len) != 0); -} -#else - -#ifndef BCRYPT_USE_SYSTEM_PREFERRED_RNG -#define BCRYPT_USE_SYSTEM_PREFERRED_RNG 0x00000002 -#endif - -typedef LONG (NTAPI *PBCryptGenRandom)(HANDLE, PUCHAR, ULONG, ULONG); -static PBCryptGenRandom pBCryptGenRandom = NULL; - -static bool os_random_buf(void* buf, size_t buf_len) { - if (pBCryptGenRandom == NULL) { - HINSTANCE hDll = LoadLibrary(TEXT("bcrypt.dll")); - if (hDll != NULL) { - pBCryptGenRandom = (PBCryptGenRandom)(void (*)(void))GetProcAddress(hDll, "BCryptGenRandom"); - } - } - if (pBCryptGenRandom == NULL) { - return false; - } - else { - return (pBCryptGenRandom(NULL, (PUCHAR)buf, (ULONG)buf_len, BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0); - } -} -#endif - -#elif defined(__APPLE__) -#include -#if defined(MAC_OS_X_VERSION_10_10) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_10 -#include -#include -#endif -static bool os_random_buf(void* buf, size_t buf_len) { - #if defined(MAC_OS_X_VERSION_10_15) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_15 - // We prefere CCRandomGenerateBytes as it returns an error code while arc4random_buf - // may fail silently on macOS. See PR #390, and - return (CCRandomGenerateBytes(buf, buf_len) == kCCSuccess); - #else - // fall back on older macOS - arc4random_buf(buf, buf_len); - return true; - #endif -} - -#elif defined(__ANDROID__) || defined(__DragonFly__) || \ - defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \ - defined(__sun) // todo: what to use with __wasi__? -#include -static bool os_random_buf(void* buf, size_t buf_len) { - arc4random_buf(buf, buf_len); - return true; -} -#elif defined(__linux__) || defined(__HAIKU__) -#if defined(__linux__) -#include -#endif -#include -#include -#include -#include -#include -static bool os_random_buf(void* buf, size_t buf_len) { - // Modern Linux provides `getrandom` but different distributions either use `sys/random.h` or `linux/random.h` - // and for the latter the actual `getrandom` call is not always defined. - // (see ) - // We therefore use a syscall directly and fall back dynamically to /dev/urandom when needed. -#ifdef SYS_getrandom - #ifndef GRND_NONBLOCK - #define GRND_NONBLOCK (1) - #endif - static _Atomic(uintptr_t) no_getrandom; // = 0 - if (mi_atomic_load_acquire(&no_getrandom)==0) { - ssize_t ret = syscall(SYS_getrandom, buf, buf_len, GRND_NONBLOCK); - if (ret >= 0) return (buf_len == (size_t)ret); - if (errno != ENOSYS) return false; - mi_atomic_store_release(&no_getrandom, 1UL); // don't call again, and fall back to /dev/urandom - } -#endif - int flags = O_RDONLY; - #if defined(O_CLOEXEC) - flags |= O_CLOEXEC; - #endif - int fd = open("/dev/urandom", flags, 0); - if (fd < 0) return false; - size_t count = 0; - while(count < buf_len) { - ssize_t ret = read(fd, (char*)buf + count, buf_len - count); - if (ret<=0) { - if (errno!=EAGAIN && errno!=EINTR) break; - } - else { - count += ret; - } - } - close(fd); - return (count==buf_len); -} -#else -static bool os_random_buf(void* buf, size_t buf_len) { - return false; -} -#endif - -#if defined(_WIN32) -#include -#elif defined(__APPLE__) -#include -#else -#include -#endif - uintptr_t _mi_os_random_weak(uintptr_t extra_seed) { uintptr_t x = (uintptr_t)&_mi_os_random_weak ^ extra_seed; // ASLR makes the address random - - #if defined(_WIN32) - LARGE_INTEGER pcount; - QueryPerformanceCounter(&pcount); - x ^= (uintptr_t)(pcount.QuadPart); - #elif defined(__APPLE__) - x ^= (uintptr_t)mach_absolute_time(); - #else - struct timespec time; - clock_gettime(CLOCK_MONOTONIC, &time); - x ^= (uintptr_t)time.tv_sec; - x ^= (uintptr_t)time.tv_nsec; - #endif + x ^= _mi_prim_clock_now(); // and do a few randomization steps uintptr_t max = ((x ^ (x >> 17)) & 0x0F) + 1; for (uintptr_t i = 0; i < max; i++) { @@ -324,7 +172,7 @@ uintptr_t _mi_os_random_weak(uintptr_t extra_seed) { static void mi_random_init_ex(mi_random_ctx_t* ctx, bool use_weak) { uint8_t key[32]; - if (use_weak || !os_random_buf(key, sizeof(key))) { + if (use_weak || !_mi_prim_random_buf(key, sizeof(key))) { // if we fail to get random data from the OS, we fall back to a // weak random source based on the current time #if !defined(__wasi__) From 9a2dbf373e2f8665c88950cf65bc11469d9fa94d Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Wed, 15 Mar 2023 13:35:23 -0700 Subject: [PATCH 14/42] move thread init to primitives --- include/mimalloc-internal.h | 3 +- src/init.c | 78 ++++++++----------------------------- src/prim/prim-unix.c | 48 +++++++++++++++++++++++ src/prim/prim-wasi.c | 17 ++++++++ src/prim/prim-windows.c | 56 ++++++++++++++++++++++++++ src/prim/prim.h | 10 +++++ 6 files changed, 149 insertions(+), 63 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 5843f60f..36de1342 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -75,7 +75,8 @@ bool _mi_is_main_thread(void); size_t _mi_current_thread_count(void); bool _mi_preloading(void); // true while the C runtime is not ready mi_threadid_t _mi_thread_id(void) mi_attr_noexcept; -mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing heap +mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing heap +void _mi_thread_done(mi_heap_t* heap); // os.c size_t _mi_os_page_size(void); diff --git a/src/init.c b/src/init.c index 2d8f0df2..31ec87cb 100644 --- a/src/init.c +++ b/src/init.c @@ -339,54 +339,12 @@ static bool _mi_heap_done(mi_heap_t* heap) { // to set up the thread local keys. // -------------------------------------------------------- -static void _mi_thread_done(mi_heap_t* default_heap); - -#if defined(_WIN32) && defined(MI_SHARED_LIB) - // nothing to do as it is done in DllMain -#elif defined(_WIN32) && !defined(MI_SHARED_LIB) - // use thread local storage keys to detect thread ending - #include - #include - #if (_WIN32_WINNT < 0x600) // before Windows Vista - WINBASEAPI DWORD WINAPI FlsAlloc( _In_opt_ PFLS_CALLBACK_FUNCTION lpCallback ); - WINBASEAPI PVOID WINAPI FlsGetValue( _In_ DWORD dwFlsIndex ); - WINBASEAPI BOOL WINAPI FlsSetValue( _In_ DWORD dwFlsIndex, _In_opt_ PVOID lpFlsData ); - WINBASEAPI BOOL WINAPI FlsFree(_In_ DWORD dwFlsIndex); - #endif - static DWORD mi_fls_key = (DWORD)(-1); - static void NTAPI mi_fls_done(PVOID value) { - mi_heap_t* heap = (mi_heap_t*)value; - if (heap != NULL) { - _mi_thread_done(heap); - FlsSetValue(mi_fls_key, NULL); // prevent recursion as _mi_thread_done may set it back to the main heap, issue #672 - } - } -#elif defined(MI_USE_PTHREADS) - // use pthread local storage keys to detect thread ending - // (and used with MI_TLS_PTHREADS for the default heap) - pthread_key_t _mi_heap_default_key = (pthread_key_t)(-1); - static void mi_pthread_done(void* value) { - if (value!=NULL) _mi_thread_done((mi_heap_t*)value); - } -#elif defined(__wasi__) -// no pthreads in the WebAssembly Standard Interface -#else - #pragma message("define a way to call mi_thread_done when a thread is done") -#endif - // Set up handlers so `mi_thread_done` is called automatically static void mi_process_setup_auto_thread_done(void) { static bool tls_initialized = false; // fine if it races if (tls_initialized) return; tls_initialized = true; - #if defined(_WIN32) && defined(MI_SHARED_LIB) - // nothing to do as it is done in DllMain - #elif defined(_WIN32) && !defined(MI_SHARED_LIB) - mi_fls_key = FlsAlloc(&mi_fls_done); - #elif defined(MI_USE_PTHREADS) - mi_assert_internal(_mi_heap_default_key == (pthread_key_t)(-1)); - pthread_key_create(&_mi_heap_default_key, &mi_pthread_done); - #endif + _mi_prim_thread_init_auto_done(); _mi_heap_set_default_direct(&_mi_heap_main); } @@ -418,13 +376,19 @@ void mi_thread_init(void) mi_attr_noexcept } void mi_thread_done(void) mi_attr_noexcept { - _mi_thread_done(mi_prim_get_default_heap()); + _mi_thread_done(NULL); } -static void _mi_thread_done(mi_heap_t* heap) { +void _mi_thread_done(mi_heap_t* heap) +{ mi_atomic_decrement_relaxed(&thread_count); _mi_stat_decrease(&_mi_stats_main.threads, 1); + if (heap == NULL) { + heap = mi_prim_get_default_heap(); + if (heap == NULL) return; + } + // check thread-id as on Windows shutdown with FLS the main (exit) thread may call this on thread-local heaps... if (heap->thread_id != _mi_thread_id()) return; @@ -446,16 +410,7 @@ void _mi_heap_set_default_direct(mi_heap_t* heap) { // ensure the default heap is passed to `_mi_thread_done` // setting to a non-NULL value also ensures `mi_thread_done` is called. - #if defined(_WIN32) && defined(MI_SHARED_LIB) - // nothing to do as it is done in DllMain - #elif defined(_WIN32) && !defined(MI_SHARED_LIB) - mi_assert_internal(mi_fls_key != 0); - FlsSetValue(mi_fls_key, heap); - #elif defined(MI_USE_PTHREADS) - if (_mi_heap_default_key != (pthread_key_t)(-1)) { // can happen during recursive invocation on freeBSD - pthread_setspecific(_mi_heap_default_key, heap); - } - #endif + _mi_prim_thread_associate_default_heap(heap); } @@ -570,11 +525,11 @@ void mi_process_init(void) mi_attr_noexcept { _mi_verbose_message("mem tracking: %s\n", MI_TRACK_TOOL); mi_thread_init(); - #if defined(_WIN32) && !defined(MI_SHARED_LIB) - // When building as a static lib the FLS cleanup happens to early for the main thread. + #if defined(_WIN32) + // On windows, when building as a static lib the FLS cleanup happens to early for the main thread. // To avoid this, set the FLS value for the main thread to NULL so the fls cleanup // will not call _mi_thread_done on the (still executing) main thread. See issue #508. - FlsSetValue(mi_fls_key, NULL); + _mi_prim_thread_associate_default_heap(NULL); #endif mi_stats_reset(); // only call stat reset *after* thread init (or the heap tld == NULL) @@ -605,10 +560,9 @@ static void mi_cdecl mi_process_done(void) { if (process_done) return; process_done = true; - #if defined(_WIN32) && !defined(MI_SHARED_LIB) - FlsFree(mi_fls_key); // call thread-done on all threads (except the main thread) to prevent dangling callback pointer if statically linked with a DLL; Issue #208 - #endif - + // release any thread specific resources and ensure _mi_thread_done is called on all but the main thread + _mi_prim_thread_done_auto_done(); + #ifndef MI_SKIP_COLLECT_ON_EXIT #if (MI_DEBUG != 0) || !defined(MI_SHARED_LIB) // free all memory if possible on process exit. This is not needed for a stand-alone process diff --git a/src/prim/prim-unix.c b/src/prim/prim-unix.c index 6b593fd3..9270e088 100644 --- a/src/prim/prim-unix.c +++ b/src/prim/prim-unix.c @@ -748,4 +748,52 @@ bool _mi_prim_random_buf(void* buf, size_t buf_len) { return false; } +#endif + + +//---------------------------------------------------------------- +// Thread init/done +//---------------------------------------------------------------- + +#if defined(MI_USE_PTHREADS) + +// use pthread local storage keys to detect thread ending +// (and used with MI_TLS_PTHREADS for the default heap) +pthread_key_t _mi_heap_default_key = (pthread_key_t)(-1); + +static void mi_pthread_done(void* value) { + if (value!=NULL) { + _mi_thread_done((mi_heap_t*)value); + } +} + +void _mi_prim_thread_init_auto_done(void) { + mi_assert_internal(_mi_heap_default_key == (pthread_key_t)(-1)); + pthread_key_create(&_mi_heap_default_key, &mi_pthread_done); +} + +void _mi_prim_thread_done_auto_done(void) { + // nothing to do +} + +void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) { + if (_mi_heap_default_key != (pthread_key_t)(-1)) { // can happen during recursive invocation on freeBSD + pthread_setspecific(_mi_heap_default_key, heap); + } +} + +#else + +void _mi_prim_thread_init_auto_done(void) { + // nothing +} + +void _mi_prim_thread_done_auto_done(void) { + // nothing +} + +void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) { + MI_UNUSED(heap); +} + #endif \ No newline at end of file diff --git a/src/prim/prim-wasi.c b/src/prim/prim-wasi.c index 38efece0..f4d3be58 100644 --- a/src/prim/prim-wasi.c +++ b/src/prim/prim-wasi.c @@ -243,3 +243,20 @@ bool _mi_prim_getenv(const char* name, char* result, size_t result_size) { bool _mi_prim_random_buf(void* buf, size_t buf_len) { return false; } + + +//---------------------------------------------------------------- +// Thread init/done +//---------------------------------------------------------------- + +void _mi_prim_thread_init_auto_done(void) { + // nothing +} + +void _mi_prim_thread_done_auto_done(void) { + // nothing +} + +void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) { + MI_UNUSED(heap); +} diff --git a/src/prim/prim-windows.c b/src/prim/prim-windows.c index 3b791de7..008b9fa4 100644 --- a/src/prim/prim-windows.c +++ b/src/prim/prim-windows.c @@ -548,3 +548,59 @@ bool _mi_prim_random_buf(void* buf, size_t buf_len) { } #endif // MI_USE_RTLGENRANDOM + +//---------------------------------------------------------------- +// Thread init/done +//---------------------------------------------------------------- + +#if !defined(MI_SHARED_LIB) + +// use thread local storage keys to detect thread ending +#include +#if (_WIN32_WINNT < 0x600) // before Windows Vista +WINBASEAPI DWORD WINAPI FlsAlloc( _In_opt_ PFLS_CALLBACK_FUNCTION lpCallback ); +WINBASEAPI PVOID WINAPI FlsGetValue( _In_ DWORD dwFlsIndex ); +WINBASEAPI BOOL WINAPI FlsSetValue( _In_ DWORD dwFlsIndex, _In_opt_ PVOID lpFlsData ); +WINBASEAPI BOOL WINAPI FlsFree(_In_ DWORD dwFlsIndex); +#endif + +static DWORD mi_fls_key = (DWORD)(-1); + +static void NTAPI mi_fls_done(PVOID value) { + mi_heap_t* heap = (mi_heap_t*)value; + if (heap != NULL) { + _mi_thread_done(heap); + FlsSetValue(mi_fls_key, NULL); // prevent recursion as _mi_thread_done may set it back to the main heap, issue #672 + } +} + +void _mi_prim_thread_init_auto_done(void) { + mi_fls_key = FlsAlloc(&mi_fls_done); +} + +void _mi_prim_thread_done_auto_done(void) { + // call thread-done on all threads (except the main thread) to prevent + // dangling callback pointer if statically linked with a DLL; Issue #208 + FlsFree(mi_fls_key); +} + +void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) { + mi_assert_internal(mi_fls_key != (DWORD)(-1)); + FlsSetValue(mi_fls_key, heap); +} + +#else + +// Dll; nothing to do as in that case thread_done is handled through the DLL_THREAD_DETACH event. + +void _mi_prim_thread_init_auto_done(void) { +} + +void _mi_prim_thread_done_auto_done(void) { +} + +void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) { + MI_UNUSED(heap); +} + +#endif diff --git a/src/prim/prim.h b/src/prim/prim.h index c067046d..5ed0f2e0 100644 --- a/src/prim/prim.h +++ b/src/prim/prim.h @@ -76,6 +76,16 @@ bool _mi_prim_getenv(const char* name, char* result, size_t result_size); // there is no strong randomization available. bool _mi_prim_random_buf(void* buf, size_t buf_len); +// Called on the first thread start, and should ensure `_mi_thread_done` is called on thread termination. +void _mi_prim_thread_init_auto_done(void); + +// Called on process exit and may take action to clean up resources associated with the thread auto done. +void _mi_prim_thread_done_auto_done(void); + +// Called when the default heap for a thread changes +void _mi_prim_thread_associate_default_heap(mi_heap_t* heap); + + //------------------------------------------------------------------- // Thread id // From 84ef963a479eba52a016ae9eed938f450e77094f Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Wed, 15 Mar 2023 14:43:35 -0700 Subject: [PATCH 15/42] remove conioinclude --- src/options.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/options.c b/src/options.c index 4c76ae41..f0c52c84 100644 --- a/src/options.c +++ b/src/options.c @@ -23,9 +23,6 @@ int mi_version(void) mi_attr_noexcept { return MI_MALLOC_VERSION; } -#ifdef _WIN32 -#include -#endif // -------------------------------------------------------- // Options From 479ef4bf4c8cdfad0f5be274275337a813a23012 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Wed, 15 Mar 2023 19:07:35 -0700 Subject: [PATCH 16/42] fix precise free size in aligned allocation --- src/os.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/src/os.c b/src/os.c index 8fb7b781..a91bbb91 100644 --- a/src/os.c +++ b/src/os.c @@ -142,17 +142,22 @@ void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size) { Free memory -------------------------------------------------------------- */ -void _mi_os_free_ex(void* addr, size_t size, bool was_committed, mi_stats_t* tld_stats) -{ +static void mi_os_mem_free(void* addr, size_t size, bool was_committed, mi_stats_t* tld_stats) { MI_UNUSED(tld_stats); - mi_stats_t* stats = &_mi_stats_main; + mi_assert_internal((size % _mi_os_page_size()) == 0); if (addr == NULL || size == 0) return; // || _mi_os_is_huge_reserved(addr) - const size_t csize = _mi_os_good_alloc_size(size); - _mi_prim_free(addr, csize); + _mi_prim_free(addr, size); + mi_stats_t* stats = &_mi_stats_main; if (was_committed) { _mi_stat_decrease(&stats->committed, size); } _mi_stat_decrease(&stats->reserved, size); } + +void _mi_os_free_ex(void* addr, size_t size, bool was_committed, mi_stats_t* tld_stats) { + const size_t csize = _mi_os_good_alloc_size(size); + mi_os_mem_free(addr,csize,was_committed,tld_stats); +} + void _mi_os_free(void* p, size_t size, mi_stats_t* tld_stats) { _mi_os_free_ex(p, size, true, tld_stats); } @@ -205,7 +210,7 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, // if not aligned, free it, overallocate, and unmap around it if (((uintptr_t)p % alignment != 0)) { - _mi_os_free_ex(p, size, commit, stats); + mi_os_mem_free(p, size, commit, stats); _mi_warning_message("unable to allocate aligned OS memory directly, fall back to over-allocation (%zu bytes, address: %p, alignment: %zu, commit: %d)\n", size, p, alignment, commit); if (size >= (SIZE_MAX - alignment)) return NULL; // overflow const size_t over_size = size + alignment; @@ -235,8 +240,8 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, size_t mid_size = _mi_align_up(size, _mi_os_page_size()); size_t post_size = over_size - pre_size - mid_size; mi_assert_internal(pre_size < over_size&& post_size < over_size&& mid_size >= size); - if (pre_size > 0) _mi_os_free_ex(p, pre_size, commit, stats); - if (post_size > 0) _mi_os_free_ex((uint8_t*)aligned_p + mid_size, post_size, commit, stats); + if (pre_size > 0) mi_os_mem_free(p, pre_size, commit, stats); + if (post_size > 0) mi_os_mem_free((uint8_t*)aligned_p + mid_size, post_size, commit, stats); // we can return the aligned pointer on `mmap` (and sbrk) systems p = aligned_p; } From cfe3d04299be2436d515f9c448db3e461f40228a Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Wed, 15 Mar 2023 19:15:53 -0700 Subject: [PATCH 17/42] cleanup --- src/bitmap.h | 2 +- src/prim/prim.h | 14 +++++++------- src/static.c | 21 +++++++++++---------- 3 files changed, 19 insertions(+), 18 deletions(-) diff --git a/src/bitmap.h b/src/bitmap.h index 39ca55b2..435c5f0a 100644 --- a/src/bitmap.h +++ b/src/bitmap.h @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2019-2020 Microsoft Research, Daan Leijen +Copyright (c) 2019-2023 Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. diff --git a/src/prim/prim.h b/src/prim/prim.h index 5ed0f2e0..5a58a79f 100644 --- a/src/prim/prim.h +++ b/src/prim/prim.h @@ -22,10 +22,10 @@ typedef struct mi_os_mem_config_s { } mi_os_mem_config_t; // Initialize -void _mi_prim_mem_init( mi_os_mem_config_t* config ); +void _mi_prim_mem_init( mi_os_mem_config_t* config ); // Free OS memory -void _mi_prim_free(void* addr, size_t size ); +void _mi_prim_free(void* addr, size_t size ); // Allocate OS memory. Return NULL on error. // The `try_alignment` is just a hint and the returned pointer does not have to be aligned. @@ -34,14 +34,14 @@ void _mi_prim_free(void* addr, size_t size ); void* _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large); // Commit memory. Returns error code or 0 on success. -int _mi_prim_commit(void* addr, size_t size, bool commit); +int _mi_prim_commit(void* addr, size_t size, bool commit); // Reset memory. The range keeps being accessible but the content might be reset. // Returns error code or 0 on success. -int _mi_prim_reset(void* addr, size_t size); +int _mi_prim_reset(void* addr, size_t size); // Protect memory. Returns error code or 0 on success. -int _mi_prim_protect(void* addr, size_t size, bool protect); +int _mi_prim_protect(void* addr, size_t size, bool protect); // Allocate huge (1GiB) pages possibly associated with a NUMA node. // pre: size > 0 and a multiple of 1GiB. @@ -59,13 +59,13 @@ size_t _mi_prim_numa_node_count(void); mi_msecs_t _mi_prim_clock_now(void); // Return process information (only for statistics) -void _mi_prim_process_info(mi_msecs_t* utime, mi_msecs_t* stime, +void _mi_prim_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults); // Default stderr output. (only for warnings etc. with verbose enabled) // msg != NULL && _mi_strlen(msg) > 0 -void _mi_prim_out_stderr( const char* msg ); +void _mi_prim_out_stderr( const char* msg ); // Get an environment variable. (only for options) // name != NULL, result != NULL, result_size >= 64 diff --git a/src/static.c b/src/static.c index 4b3abc28..0de72fe3 100644 --- a/src/static.c +++ b/src/static.c @@ -20,20 +20,21 @@ terms of the MIT license. A copy of the license can be found in the file // containing the whole library. If it is linked first // it will override all the standard library allocation // functions (on Unix's). -#include "stats.c" -#include "random.c" -#include "os.c" -#include "bitmap.c" -#include "arena.c" -#include "region.c" -#include "segment.c" -#include "page.c" -#include "heap.c" #include "alloc.c" #include "alloc-aligned.c" -#include "alloc-posix.c" #if MI_OSX_ZONE #include "alloc-override-osx.c" #endif +#include "alloc-posix.c" +#include "arena.c" +#include "bitmap.c" +#include "heap.c" #include "init.c" #include "options.c" +#include "os.c" +#include "page.c" +#include "prim/prim.c" +#include "random.c" +#include "region.c" +#include "segment.c" +#include "stats.c" From 9fb4f2a501450beb394af85d476d9f412be0a680 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Wed, 15 Mar 2023 19:25:18 -0700 Subject: [PATCH 18/42] update vs2019 ide --- ide/vs2019/mimalloc-override.vcxproj | 1 + ide/vs2019/mimalloc-override.vcxproj.filters | 1 + ide/vs2019/mimalloc.vcxproj | 1 + ide/vs2019/mimalloc.vcxproj.filters | 3 +++ 4 files changed, 6 insertions(+) diff --git a/ide/vs2019/mimalloc-override.vcxproj b/ide/vs2019/mimalloc-override.vcxproj index 5fa59569..5fb809c8 100644 --- a/ide/vs2019/mimalloc-override.vcxproj +++ b/ide/vs2019/mimalloc-override.vcxproj @@ -236,6 +236,7 @@ + diff --git a/ide/vs2019/mimalloc-override.vcxproj.filters b/ide/vs2019/mimalloc-override.vcxproj.filters index c06fd1de..737e0600 100644 --- a/ide/vs2019/mimalloc-override.vcxproj.filters +++ b/ide/vs2019/mimalloc-override.vcxproj.filters @@ -49,6 +49,7 @@ Source Files + diff --git a/ide/vs2019/mimalloc.vcxproj b/ide/vs2019/mimalloc.vcxproj index c12955c3..9e372ca4 100644 --- a/ide/vs2019/mimalloc.vcxproj +++ b/ide/vs2019/mimalloc.vcxproj @@ -225,6 +225,7 @@ + diff --git a/ide/vs2019/mimalloc.vcxproj.filters b/ide/vs2019/mimalloc.vcxproj.filters index 4cd0eb2e..5ab88914 100644 --- a/ide/vs2019/mimalloc.vcxproj.filters +++ b/ide/vs2019/mimalloc.vcxproj.filters @@ -52,6 +52,9 @@ Source Files + + Source Files + From 824fd8a7b100be3c3c6db0fe0b8591ad0e334719 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Wed, 15 Mar 2023 20:31:52 -0700 Subject: [PATCH 19/42] fix issue #707; rename a local template parameter (destroy) to work around two-phase template resolve in msvc 2019 --- ide/vs2017/mimalloc.vcxproj | 2 +- ide/vs2019/mimalloc.vcxproj | 2 +- ide/vs2022/mimalloc.vcxproj | 12 ++++++------ include/mimalloc.h | 10 +++++----- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/ide/vs2017/mimalloc.vcxproj b/ide/vs2017/mimalloc.vcxproj index d29c2c7f..70bf4366 100644 --- a/ide/vs2017/mimalloc.vcxproj +++ b/ide/vs2017/mimalloc.vcxproj @@ -129,7 +129,7 @@ true ../../include _CRT_SECURE_NO_WARNINGS;MI_DEBUG=3;%(PreprocessorDefinitions); - CompileAsC + CompileAsCpp false stdcpp14 diff --git a/ide/vs2019/mimalloc.vcxproj b/ide/vs2019/mimalloc.vcxproj index c12955c3..729a5c63 100644 --- a/ide/vs2019/mimalloc.vcxproj +++ b/ide/vs2019/mimalloc.vcxproj @@ -114,7 +114,7 @@ Level4 Disabled true - true + Default ../../include MI_DEBUG=3;%(PreprocessorDefinitions); CompileAsCpp diff --git a/ide/vs2022/mimalloc.vcxproj b/ide/vs2022/mimalloc.vcxproj index 9081881c..49eb5180 100644 --- a/ide/vs2022/mimalloc.vcxproj +++ b/ide/vs2022/mimalloc.vcxproj @@ -95,12 +95,12 @@ Level4 Disabled true - true + Default ../../include MI_DEBUG=3;%(PreprocessorDefinitions); CompileAsCpp false - Default + stdcpp20 @@ -114,7 +114,7 @@ Level4 Disabled true - true + Default ../../include MI_DEBUG=4;%(PreprocessorDefinitions); CompileAsCpp @@ -141,7 +141,7 @@ Level4 MaxSpeed true - true + Default ../../include %(PreprocessorDefinitions);NDEBUG AssemblyAndSourceCode @@ -151,7 +151,7 @@ Default CompileAsCpp true - Default + stdcpp20 true @@ -169,7 +169,7 @@ Level4 MaxSpeed true - true + Default ../../include %(PreprocessorDefinitions);NDEBUG AssemblyAndSourceCode diff --git a/include/mimalloc.h b/include/mimalloc.h index 27f6b331..4fc7a752 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -470,13 +470,13 @@ template bool operator==(const mi_stl_allocator& , const template bool operator!=(const mi_stl_allocator& , const mi_stl_allocator& ) mi_attr_noexcept { return false; } -#if (__cplusplus >= 201103L) || (_MSC_VER >= 1920) // C++11, at least vs2019 +#if (__cplusplus >= 201103L) || (_MSC_VER >= 1900) // C++11 #define MI_HAS_HEAP_STL_ALLOCATOR 1 #include // std::shared_ptr // Common base class for STL allocators in a specific heap -template struct _mi_heap_stl_allocator_common : public _mi_stl_allocator_common { +template struct _mi_heap_stl_allocator_common : public _mi_stl_allocator_common { using typename _mi_stl_allocator_common::size_type; using typename _mi_stl_allocator_common::value_type; using typename _mi_stl_allocator_common::pointer; @@ -495,7 +495,7 @@ template struct _mi_heap_stl_allocator_common : public _m #endif void collect(bool force) { mi_heap_collect(this->heap.get(), force); } - template bool is_equal(const _mi_heap_stl_allocator_common& x) const { return (this->heap == x.heap); } + template bool is_equal(const _mi_heap_stl_allocator_common& x) const { return (this->heap == x.heap); } protected: std::shared_ptr heap; @@ -503,10 +503,10 @@ protected: _mi_heap_stl_allocator_common() { mi_heap_t* hp = mi_heap_new(); - this->heap.reset(hp, (destroy ? &heap_destroy : &heap_delete)); /* calls heap_delete/destroy when the refcount drops to zero */ + this->heap.reset(hp, (_mi_destroy ? &heap_destroy : &heap_delete)); /* calls heap_delete/destroy when the refcount drops to zero */ } _mi_heap_stl_allocator_common(const _mi_heap_stl_allocator_common& x) mi_attr_noexcept : heap(x.heap) { } - template _mi_heap_stl_allocator_common(const _mi_heap_stl_allocator_common& x) mi_attr_noexcept : heap(x.heap) { } + template _mi_heap_stl_allocator_common(const _mi_heap_stl_allocator_common& x) mi_attr_noexcept : heap(x.heap) { } private: static void heap_delete(mi_heap_t* hp) { if (hp != NULL) { mi_heap_delete(hp); } } From c4c96d2f8d9f62f6a5149be1f7aea2c457108ddf Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Wed, 15 Mar 2023 20:38:10 -0700 Subject: [PATCH 20/42] update older vs ide projects --- ide/vs2017/mimalloc-override.vcxproj | 1 + ide/vs2017/mimalloc-override.vcxproj.filters | 3 +++ ide/vs2017/mimalloc.vcxproj | 1 + ide/vs2017/mimalloc.vcxproj.filters | 3 +++ include/mimalloc-internal.h | 1 + 5 files changed, 9 insertions(+) diff --git a/ide/vs2017/mimalloc-override.vcxproj b/ide/vs2017/mimalloc-override.vcxproj index f3b7dd1e..f308225b 100644 --- a/ide/vs2017/mimalloc-override.vcxproj +++ b/ide/vs2017/mimalloc-override.vcxproj @@ -236,6 +236,7 @@ + diff --git a/ide/vs2017/mimalloc-override.vcxproj.filters b/ide/vs2017/mimalloc-override.vcxproj.filters index e045ed8c..a67dddad 100644 --- a/ide/vs2017/mimalloc-override.vcxproj.filters +++ b/ide/vs2017/mimalloc-override.vcxproj.filters @@ -82,5 +82,8 @@ Source Files + + Source Files + \ No newline at end of file diff --git a/ide/vs2017/mimalloc.vcxproj b/ide/vs2017/mimalloc.vcxproj index 70bf4366..e7be785c 100644 --- a/ide/vs2017/mimalloc.vcxproj +++ b/ide/vs2017/mimalloc.vcxproj @@ -233,6 +233,7 @@ + diff --git a/ide/vs2017/mimalloc.vcxproj.filters b/ide/vs2017/mimalloc.vcxproj.filters index 500292c5..27cd4b5e 100644 --- a/ide/vs2017/mimalloc.vcxproj.filters +++ b/ide/vs2017/mimalloc.vcxproj.filters @@ -62,6 +62,9 @@ Source Files + + Source Files + diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 36de1342..33aafa70 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -699,6 +699,7 @@ static inline size_t mi_ctz(uintptr_t x) { #elif defined(_MSC_VER) #include // LONG_MAX +#include // BitScanReverse64 #define MI_HAVE_FAST_BITSCAN static inline size_t mi_clz(uintptr_t x) { if (x==0) return MI_INTPTR_BITS; From 7d834864bb59c31f8ced90d27530d51f5e05ae64 Mon Sep 17 00:00:00 2001 From: Daan Date: Thu, 16 Mar 2023 11:35:11 -0700 Subject: [PATCH 21/42] fix macOSX compilation --- src/init.c | 2 +- src/prim/prim-unix.c | 2 +- src/prim/prim.h | 18 +++++++++++------- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/src/init.c b/src/init.c index 31ec87cb..d73c1e1e 100644 --- a/src/init.c +++ b/src/init.c @@ -399,7 +399,7 @@ void _mi_thread_done(mi_heap_t* heap) void _mi_heap_set_default_direct(mi_heap_t* heap) { mi_assert_internal(heap != NULL); #if defined(MI_TLS_SLOT) - mi_tls_slot_set(MI_TLS_SLOT,heap); + mi_prim_tls_slot_set(MI_TLS_SLOT,heap); #elif defined(MI_TLS_PTHREAD_SLOT_OFS) *mi_tls_pthread_heap_slot() = heap; #elif defined(MI_TLS_PTHREAD) diff --git a/src/prim/prim-unix.c b/src/prim/prim-unix.c index 9270e088..997c0356 100644 --- a/src/prim/prim-unix.c +++ b/src/prim/prim-unix.c @@ -796,4 +796,4 @@ void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) { MI_UNUSED(heap); } -#endif \ No newline at end of file +#endif diff --git a/src/prim/prim.h b/src/prim/prim.h index 5a58a79f..967c6698 100644 --- a/src/prim/prim.h +++ b/src/prim/prim.h @@ -252,16 +252,20 @@ static inline mi_heap_t* mi_prim_get_default_heap(void) { #elif defined(MI_TLS_PTHREAD_SLOT_OFS) -static inline mi_heap_t* mi_prim_get_default_heap(void) { - mi_heap_t* heap; +static inline mi_heap_t** mi_prim_tls_pthread_heap_slot(void) { pthread_t self = pthread_self(); #if defined(__DragonFly__) - if (self==NULL) { heap = _mi_heap_main_get(); } else + if (self==NULL) return NULL; #endif - { - heap = *((mi_heap_t**)((uint8_t*)self + MI_TLS_PTHREAD_SLOT_OFS)); - } - return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap); + return (mi_heap_t**)((uint8_t*)self + MI_TLS_PTHREAD_SLOT_OFS); +} + +static inline mi_heap_t* mi_prim_get_default_heap(void) { + mi_heap_t** pheap = mi_prim_tls_pthread_heap_slot(); + if mi_unlikely(pheap == NULL) return _mi_heap_main_get(); + mi_heap_t* heap = *pheap; + if mi_unlikely(heap == NULL) return (mi_heap_t*)&_mi_heap_empty; + return heap; } #elif defined(MI_TLS_PTHREAD) From 134b23b9210795734b4cd3127616ca02e3461150 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Thu, 16 Mar 2023 17:42:00 -0700 Subject: [PATCH 22/42] fix asan/valgrind api fill test --- test/test-api-fill.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test/test-api-fill.c b/test/test-api-fill.c index a32dfa27..2ad06808 100644 --- a/test/test-api-fill.c +++ b/test/test-api-fill.c @@ -271,7 +271,7 @@ int main(void) { mi_free(p); }; - + #if !(MI_TRACK_VALGRIND || MI_TRACK_ASAN) CHECK_BODY("fill-freed-small") { size_t malloc_size = MI_SMALL_SIZE_MAX / 2; uint8_t* p = (uint8_t*)mi_malloc(malloc_size); @@ -286,6 +286,7 @@ int main(void) { // First sizeof(void*) bytes will contain housekeeping data, skip these result = check_debug_fill_freed(p + sizeof(void*), malloc_size - sizeof(void*)); }; + #endif #endif // --------------------------------------------------- @@ -309,7 +310,7 @@ bool check_zero_init(uint8_t* p, size_t size) { #if MI_DEBUG >= 2 bool check_debug_fill_uninit(uint8_t* p, size_t size) { -#if MI_TRACK_VALGRIND +#if MI_TRACK_VALGRIND || MI_TRACK_ASAN (void)p; (void)size; return true; // when compiled with valgrind we don't init on purpose #else From 8a1f6c82b238b62b9489ff252d97d3c419306a30 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Thu, 16 Mar 2023 17:47:00 -0700 Subject: [PATCH 23/42] move prim files in subdirectories --- src/prim/prim.c | 6 +++--- src/prim/{prim-unix.c => unix/prim.c} | 4 +++- src/prim/{prim-wasi.c => wasi/prim.c} | 4 +++- src/prim/{prim-windows.c => windows/prim.c} | 4 +++- 4 files changed, 12 insertions(+), 6 deletions(-) rename src/prim/{prim-unix.c => unix/prim.c} (99%) rename src/prim/{prim-wasi.c => wasi/prim.c} (99%) rename src/prim/{prim-windows.c => windows/prim.c} (99%) diff --git a/src/prim/prim.c b/src/prim/prim.c index 83b7abc1..eec13c48 100644 --- a/src/prim/prim.c +++ b/src/prim/prim.c @@ -9,10 +9,10 @@ terms of the MIT license. A copy of the license can be found in the file // depending on the OS. #if defined(_WIN32) -#include "prim-windows.c" // VirtualAlloc (Windows) +#include "windows/prim.c" // VirtualAlloc (Windows) #elif defined(__wasi__) #define MI_USE_SBRK -#include "prim-wasi.h" // memory-grow or sbrk (Wasm) +#include "wasi/prim.h" // memory-grow or sbrk (Wasm) #else -#include "prim-unix.c" // mmap() (Linux, macOSX, BSD, Illumnos, Haiku, DragonFly, etc.) +#include "unix/prim.c" // mmap() (Linux, macOSX, BSD, Illumnos, Haiku, DragonFly, etc.) #endif diff --git a/src/prim/prim-unix.c b/src/prim/unix/prim.c similarity index 99% rename from src/prim/prim-unix.c rename to src/prim/unix/prim.c index 997c0356..d1cd4301 100644 --- a/src/prim/prim-unix.c +++ b/src/prim/unix/prim.c @@ -5,6 +5,8 @@ terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ +// This file is included in `src/prim/prim.c` + #ifndef _DEFAULT_SOURCE #define _DEFAULT_SOURCE // ensure mmap flags and syscall are defined #endif @@ -21,7 +23,7 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc.h" #include "mimalloc-internal.h" #include "mimalloc-atomic.h" -#include "prim.h" +#include "../prim.h" #include // mmap #include // sysconf diff --git a/src/prim/prim-wasi.c b/src/prim/wasi/prim.c similarity index 99% rename from src/prim/prim-wasi.c rename to src/prim/wasi/prim.c index f4d3be58..b8ac1a1b 100644 --- a/src/prim/prim-wasi.c +++ b/src/prim/wasi/prim.c @@ -5,10 +5,12 @@ terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ +// This file is included in `src/prim/prim.c` + #include "mimalloc.h" #include "mimalloc-internal.h" #include "mimalloc-atomic.h" -#include "prim.h" +#include "../prim.h" //--------------------------------------------- // Initialize diff --git a/src/prim/prim-windows.c b/src/prim/windows/prim.c similarity index 99% rename from src/prim/prim-windows.c rename to src/prim/windows/prim.c index 008b9fa4..2fa445a1 100644 --- a/src/prim/prim-windows.c +++ b/src/prim/windows/prim.c @@ -5,10 +5,12 @@ terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ +// This file is included in `src/prim/prim.c` + #include "mimalloc.h" #include "mimalloc-internal.h" #include "mimalloc-atomic.h" -#include "prim.h" +#include "../prim.h" #include // strerror #include // fputs, stderr From 072316bd33e84453b133fe1749a3b757040cb1f8 Mon Sep 17 00:00:00 2001 From: Xinglong He Date: Sat, 11 Mar 2023 17:30:21 -0800 Subject: [PATCH 24/42] add etw support --- ide/vs2022/mimalloc-override.vcxproj | 5 + ide/vs2022/mimalloc.vcxproj | 1 + include/mimalloc-etw-gen.h | 905 +++++++++++++++++++++++++++ include/mimalloc-etw-gen.man | Bin 0 -> 3950 bytes include/mimalloc-etw.h | 4 + include/mimalloc-track.h | 12 + include/mimalloc-types.h | 4 +- include/mimalloc.wprp | 56 ++ src/init.c | 5 + 9 files changed, 991 insertions(+), 1 deletion(-) create mode 100644 include/mimalloc-etw-gen.h create mode 100644 include/mimalloc-etw-gen.man create mode 100644 include/mimalloc-etw.h create mode 100644 include/mimalloc.wprp diff --git a/ide/vs2022/mimalloc-override.vcxproj b/ide/vs2022/mimalloc-override.vcxproj index a1d25d28..1eb72952 100644 --- a/ide/vs2022/mimalloc-override.vcxproj +++ b/ide/vs2022/mimalloc-override.vcxproj @@ -212,6 +212,8 @@ + + @@ -252,6 +254,9 @@ + + + diff --git a/ide/vs2022/mimalloc.vcxproj b/ide/vs2022/mimalloc.vcxproj index 15c7e039..9e0ffc85 100644 --- a/ide/vs2022/mimalloc.vcxproj +++ b/ide/vs2022/mimalloc.vcxproj @@ -246,6 +246,7 @@ + diff --git a/include/mimalloc-etw-gen.h b/include/mimalloc-etw-gen.h new file mode 100644 index 00000000..4e0a092a --- /dev/null +++ b/include/mimalloc-etw-gen.h @@ -0,0 +1,905 @@ +//**********************************************************************` +//* This is an include file generated by Message Compiler. *` +//* *` +//* Copyright (c) Microsoft Corporation. All Rights Reserved. *` +//**********************************************************************` +#pragma once + +//***************************************************************************** +// +// Notes on the ETW event code generated by MC: +// +// - Structures and arrays of structures are treated as an opaque binary blob. +// The caller is responsible for packing the data for the structure into a +// single region of memory, with no padding between values. The macro will +// have an extra parameter for the length of the blob. +// - Arrays of nul-terminated strings must be packed by the caller into a +// single binary blob containing the correct number of strings, with a nul +// after each string. The size of the blob is specified in characters, and +// includes the final nul. +// - Arrays of SID are treated as a single binary blob. The caller is +// responsible for packing the SID values into a single region of memory with +// no padding. +// - The length attribute on the data element in the manifest is significant +// for values with intype win:UnicodeString, win:AnsiString, or win:Binary. +// The length attribute must be specified for win:Binary, and is optional for +// win:UnicodeString and win:AnsiString (if no length is given, the strings +// are assumed to be nul-terminated). For win:UnicodeString, the length is +// measured in characters, not bytes. +// - For an array of win:UnicodeString, win:AnsiString, or win:Binary, the +// length attribute applies to every value in the array, so every value in +// the array must have the same length. The values in the array are provided +// to the macro via a single pointer -- the caller is responsible for packing +// all of the values into a single region of memory with no padding between +// values. +// - Values of type win:CountedUnicodeString, win:CountedAnsiString, and +// win:CountedBinary can be generated and collected on Vista or later. +// However, they may not decode properly without the Windows 10 2018 Fall +// Update. +// - Arrays of type win:CountedUnicodeString, win:CountedAnsiString, and +// win:CountedBinary must be packed by the caller into a single region of +// memory. The format for each item is a UINT16 byte-count followed by that +// many bytes of data. When providing the array to the generated macro, you +// must provide the total size of the packed array data, including the UINT16 +// sizes for each item. In the case of win:CountedUnicodeString, the data +// size is specified in WCHAR (16-bit) units. In the case of +// win:CountedAnsiString and win:CountedBinary, the data size is specified in +// bytes. +// +//***************************************************************************** + +#include +#include +#include + +#ifndef ETW_INLINE + #ifdef _ETW_KM_ + // In kernel mode, save stack space by never inlining templates. + #define ETW_INLINE DECLSPEC_NOINLINE __inline + #else + // In user mode, save code size by inlining templates as appropriate. + #define ETW_INLINE __inline + #endif +#endif // ETW_INLINE + +#if defined(__cplusplus) +extern "C" { +#endif + +// +// MCGEN_DISABLE_PROVIDER_CODE_GENERATION macro: +// Define this macro to have the compiler skip the generated functions in this +// header. +// +#ifndef MCGEN_DISABLE_PROVIDER_CODE_GENERATION + +// +// MCGEN_USE_KERNEL_MODE_APIS macro: +// Controls whether the generated code uses kernel-mode or user-mode APIs. +// - Set to 0 to use Windows user-mode APIs such as EventRegister. +// - Set to 1 to use Windows kernel-mode APIs such as EtwRegister. +// Default is based on whether the _ETW_KM_ macro is defined (i.e. by wdm.h). +// Note that the APIs can also be overridden directly, e.g. by setting the +// MCGEN_EVENTWRITETRANSFER or MCGEN_EVENTREGISTER macros. +// +#ifndef MCGEN_USE_KERNEL_MODE_APIS + #ifdef _ETW_KM_ + #define MCGEN_USE_KERNEL_MODE_APIS 1 + #else + #define MCGEN_USE_KERNEL_MODE_APIS 0 + #endif +#endif // MCGEN_USE_KERNEL_MODE_APIS + +// +// MCGEN_HAVE_EVENTSETINFORMATION macro: +// Controls how McGenEventSetInformation uses the EventSetInformation API. +// - Set to 0 to disable the use of EventSetInformation +// (McGenEventSetInformation will always return an error). +// - Set to 1 to directly invoke MCGEN_EVENTSETINFORMATION. +// - Set to 2 to to locate EventSetInformation at runtime via GetProcAddress +// (user-mode) or MmGetSystemRoutineAddress (kernel-mode). +// Default is determined as follows: +// - If MCGEN_EVENTSETINFORMATION has been customized, set to 1 +// (i.e. use MCGEN_EVENTSETINFORMATION). +// - Else if the target OS version has EventSetInformation, set to 1 +// (i.e. use MCGEN_EVENTSETINFORMATION). +// - Else set to 2 (i.e. try to dynamically locate EventSetInformation). +// Note that an McGenEventSetInformation function will only be generated if one +// or more provider in a manifest has provider traits. +// +#ifndef MCGEN_HAVE_EVENTSETINFORMATION + #ifdef MCGEN_EVENTSETINFORMATION // if MCGEN_EVENTSETINFORMATION has been customized, + #define MCGEN_HAVE_EVENTSETINFORMATION 1 // directly invoke MCGEN_EVENTSETINFORMATION(...). + #elif MCGEN_USE_KERNEL_MODE_APIS // else if using kernel-mode APIs, + #if NTDDI_VERSION >= 0x06040000 // if target OS is Windows 10 or later, + #define MCGEN_HAVE_EVENTSETINFORMATION 1 // directly invoke MCGEN_EVENTSETINFORMATION(...). + #else // else + #define MCGEN_HAVE_EVENTSETINFORMATION 2 // find "EtwSetInformation" via MmGetSystemRoutineAddress. + #endif // else (using user-mode APIs) + #else // if target OS and SDK is Windows 8 or later, + #if WINVER >= 0x0602 && defined(EVENT_FILTER_TYPE_SCHEMATIZED) + #define MCGEN_HAVE_EVENTSETINFORMATION 1 // directly invoke MCGEN_EVENTSETINFORMATION(...). + #else // else + #define MCGEN_HAVE_EVENTSETINFORMATION 2 // find "EventSetInformation" via GetModuleHandleExW/GetProcAddress. + #endif + #endif +#endif // MCGEN_HAVE_EVENTSETINFORMATION + +// +// MCGEN Override Macros +// +// The following override macros may be defined before including this header +// to control the APIs used by this header: +// +// - MCGEN_EVENTREGISTER +// - MCGEN_EVENTUNREGISTER +// - MCGEN_EVENTSETINFORMATION +// - MCGEN_EVENTWRITETRANSFER +// +// If the the macro is undefined, the MC implementation will default to the +// corresponding ETW APIs. For example, if the MCGEN_EVENTREGISTER macro is +// undefined, the EventRegister[MyProviderName] macro will use EventRegister +// in user mode and will use EtwRegister in kernel mode. +// +// To prevent issues from conflicting definitions of these macros, the value +// of the override macro will be used as a suffix in certain internal function +// names. Because of this, the override macros must follow certain rules: +// +// - The macro must be defined before any MC-generated header is included and +// must not be undefined or redefined after any MC-generated header is +// included. Different translation units (i.e. different .c or .cpp files) +// may set the macros to different values, but within a translation unit +// (within a single .c or .cpp file), the macro must be set once and not +// changed. +// - The override must be an object-like macro, not a function-like macro +// (i.e. the override macro must not have a parameter list). +// - The override macro's value must be a simple identifier, i.e. must be +// something that starts with a letter or '_' and contains only letters, +// numbers, and '_' characters. +// - If the override macro's value is the name of a second object-like macro, +// the second object-like macro must follow the same rules. (The override +// macro's value can also be the name of a function-like macro, in which +// case the function-like macro does not need to follow the same rules.) +// +// For example, the following will cause compile errors: +// +// #define MCGEN_EVENTWRITETRANSFER MyNamespace::MyClass::MyFunction // Value has non-identifier characters (colon). +// #define MCGEN_EVENTWRITETRANSFER GetEventWriteFunctionPointer(7) // Value has non-identifier characters (parentheses). +// #define MCGEN_EVENTWRITETRANSFER(h,e,a,r,c,d) EventWrite(h,e,c,d) // Override is defined as a function-like macro. +// #define MY_OBJECT_LIKE_MACRO MyNamespace::MyClass::MyEventWriteFunction +// #define MCGEN_EVENTWRITETRANSFER MY_OBJECT_LIKE_MACRO // Evaluates to something with non-identifier characters (colon). +// +// The following would be ok: +// +// #define MCGEN_EVENTWRITETRANSFER MyEventWriteFunction1 // OK, suffix will be "MyEventWriteFunction1". +// #define MY_OBJECT_LIKE_MACRO MyEventWriteFunction2 +// #define MCGEN_EVENTWRITETRANSFER MY_OBJECT_LIKE_MACRO // OK, suffix will be "MyEventWriteFunction2". +// #define MY_FUNCTION_LIKE_MACRO(h,e,a,r,c,d) MyNamespace::MyClass::MyEventWriteFunction3(h,e,c,d) +// #define MCGEN_EVENTWRITETRANSFER MY_FUNCTION_LIKE_MACRO // OK, suffix will be "MY_FUNCTION_LIKE_MACRO". +// +#ifndef MCGEN_EVENTREGISTER + #if MCGEN_USE_KERNEL_MODE_APIS + #define MCGEN_EVENTREGISTER EtwRegister + #else + #define MCGEN_EVENTREGISTER EventRegister + #endif +#endif // MCGEN_EVENTREGISTER +#ifndef MCGEN_EVENTUNREGISTER + #if MCGEN_USE_KERNEL_MODE_APIS + #define MCGEN_EVENTUNREGISTER EtwUnregister + #else + #define MCGEN_EVENTUNREGISTER EventUnregister + #endif +#endif // MCGEN_EVENTUNREGISTER +#ifndef MCGEN_EVENTSETINFORMATION + #if MCGEN_USE_KERNEL_MODE_APIS + #define MCGEN_EVENTSETINFORMATION EtwSetInformation + #else + #define MCGEN_EVENTSETINFORMATION EventSetInformation + #endif +#endif // MCGEN_EVENTSETINFORMATION +#ifndef MCGEN_EVENTWRITETRANSFER + #if MCGEN_USE_KERNEL_MODE_APIS + #define MCGEN_EVENTWRITETRANSFER EtwWriteTransfer + #else + #define MCGEN_EVENTWRITETRANSFER EventWriteTransfer + #endif +#endif // MCGEN_EVENTWRITETRANSFER + +// +// MCGEN_EVENT_ENABLED macro: +// Override to control how the EventWrite[EventName] macros determine whether +// an event is enabled. The default behavior is for EventWrite[EventName] to +// use the EventEnabled[EventName] macros. +// +#ifndef MCGEN_EVENT_ENABLED +#define MCGEN_EVENT_ENABLED(EventName) EventEnabled##EventName() +#endif + +// +// MCGEN_EVENT_ENABLED_FORCONTEXT macro: +// Override to control how the EventWrite[EventName]_ForContext macros +// determine whether an event is enabled. The default behavior is for +// EventWrite[EventName]_ForContext to use the +// EventEnabled[EventName]_ForContext macros. +// +#ifndef MCGEN_EVENT_ENABLED_FORCONTEXT +#define MCGEN_EVENT_ENABLED_FORCONTEXT(pContext, EventName) EventEnabled##EventName##_ForContext(pContext) +#endif + +// +// MCGEN_ENABLE_CHECK macro: +// Determines whether the specified event would be considered as enabled +// based on the state of the specified context. Slightly faster than calling +// McGenEventEnabled directly. +// +#ifndef MCGEN_ENABLE_CHECK +#define MCGEN_ENABLE_CHECK(Context, Descriptor) (Context.IsEnabled && McGenEventEnabled(&Context, &Descriptor)) +#endif + +#if !defined(MCGEN_TRACE_CONTEXT_DEF) +#define MCGEN_TRACE_CONTEXT_DEF +// This structure is for use by MC-generated code and should not be used directly. +typedef struct _MCGEN_TRACE_CONTEXT +{ + TRACEHANDLE RegistrationHandle; + TRACEHANDLE Logger; // Used as pointer to provider traits. + ULONGLONG MatchAnyKeyword; + ULONGLONG MatchAllKeyword; + ULONG Flags; + ULONG IsEnabled; + UCHAR Level; + UCHAR Reserve; + USHORT EnableBitsCount; + PULONG EnableBitMask; + const ULONGLONG* EnableKeyWords; + const UCHAR* EnableLevel; +} MCGEN_TRACE_CONTEXT, *PMCGEN_TRACE_CONTEXT; +#endif // MCGEN_TRACE_CONTEXT_DEF + +#if !defined(MCGEN_LEVEL_KEYWORD_ENABLED_DEF) +#define MCGEN_LEVEL_KEYWORD_ENABLED_DEF +// +// Determines whether an event with a given Level and Keyword would be +// considered as enabled based on the state of the specified context. +// Note that you may want to use MCGEN_ENABLE_CHECK instead of calling this +// function directly. +// +FORCEINLINE +BOOLEAN +McGenLevelKeywordEnabled( + _In_ PMCGEN_TRACE_CONTEXT EnableInfo, + _In_ UCHAR Level, + _In_ ULONGLONG Keyword + ) +{ + // + // Check if the event Level is lower than the level at which + // the channel is enabled. + // If the event Level is 0 or the channel is enabled at level 0, + // all levels are enabled. + // + + if ((Level <= EnableInfo->Level) || // This also covers the case of Level == 0. + (EnableInfo->Level == 0)) { + + // + // Check if Keyword is enabled + // + + if ((Keyword == (ULONGLONG)0) || + ((Keyword & EnableInfo->MatchAnyKeyword) && + ((Keyword & EnableInfo->MatchAllKeyword) == EnableInfo->MatchAllKeyword))) { + return TRUE; + } + } + + return FALSE; +} +#endif // MCGEN_LEVEL_KEYWORD_ENABLED_DEF + +#if !defined(MCGEN_EVENT_ENABLED_DEF) +#define MCGEN_EVENT_ENABLED_DEF +// +// Determines whether the specified event would be considered as enabled based +// on the state of the specified context. Note that you may want to use +// MCGEN_ENABLE_CHECK instead of calling this function directly. +// +FORCEINLINE +BOOLEAN +McGenEventEnabled( + _In_ PMCGEN_TRACE_CONTEXT EnableInfo, + _In_ PCEVENT_DESCRIPTOR EventDescriptor + ) +{ + return McGenLevelKeywordEnabled(EnableInfo, EventDescriptor->Level, EventDescriptor->Keyword); +} +#endif // MCGEN_EVENT_ENABLED_DEF + +#if !defined(MCGEN_CONTROL_CALLBACK) +#define MCGEN_CONTROL_CALLBACK + +// This function is for use by MC-generated code and should not be used directly. +DECLSPEC_NOINLINE __inline +VOID +__stdcall +McGenControlCallbackV2( + _In_ LPCGUID SourceId, + _In_ ULONG ControlCode, + _In_ UCHAR Level, + _In_ ULONGLONG MatchAnyKeyword, + _In_ ULONGLONG MatchAllKeyword, + _In_opt_ PEVENT_FILTER_DESCRIPTOR FilterData, + _Inout_opt_ PVOID CallbackContext + ) +/*++ + +Routine Description: + + This is the notification callback for Windows Vista and later. + +Arguments: + + SourceId - The GUID that identifies the session that enabled the provider. + + ControlCode - The parameter indicates whether the provider + is being enabled or disabled. + + Level - The level at which the event is enabled. + + MatchAnyKeyword - The bitmask of keywords that the provider uses to + determine the category of events that it writes. + + MatchAllKeyword - This bitmask additionally restricts the category + of events that the provider writes. + + FilterData - The provider-defined data. + + CallbackContext - The context of the callback that is defined when the provider + called EtwRegister to register itself. + +Remarks: + + ETW calls this function to notify provider of enable/disable + +--*/ +{ + PMCGEN_TRACE_CONTEXT Ctx = (PMCGEN_TRACE_CONTEXT)CallbackContext; + ULONG Ix; +#ifndef MCGEN_PRIVATE_ENABLE_CALLBACK_V2 + UNREFERENCED_PARAMETER(SourceId); + UNREFERENCED_PARAMETER(FilterData); +#endif + + if (Ctx == NULL) { + return; + } + + switch (ControlCode) { + + case EVENT_CONTROL_CODE_ENABLE_PROVIDER: + Ctx->Level = Level; + Ctx->MatchAnyKeyword = MatchAnyKeyword; + Ctx->MatchAllKeyword = MatchAllKeyword; + Ctx->IsEnabled = EVENT_CONTROL_CODE_ENABLE_PROVIDER; + + for (Ix = 0; Ix < Ctx->EnableBitsCount; Ix += 1) { + if (McGenLevelKeywordEnabled(Ctx, Ctx->EnableLevel[Ix], Ctx->EnableKeyWords[Ix]) != FALSE) { + Ctx->EnableBitMask[Ix >> 5] |= (1 << (Ix % 32)); + } else { + Ctx->EnableBitMask[Ix >> 5] &= ~(1 << (Ix % 32)); + } + } + break; + + case EVENT_CONTROL_CODE_DISABLE_PROVIDER: + Ctx->IsEnabled = EVENT_CONTROL_CODE_DISABLE_PROVIDER; + Ctx->Level = 0; + Ctx->MatchAnyKeyword = 0; + Ctx->MatchAllKeyword = 0; + if (Ctx->EnableBitsCount > 0) { +#pragma warning(suppress: 26451) // Arithmetic overflow cannot occur, no matter the value of EnableBitCount + RtlZeroMemory(Ctx->EnableBitMask, (((Ctx->EnableBitsCount - 1) / 32) + 1) * sizeof(ULONG)); + } + break; + + default: + break; + } + +#ifdef MCGEN_PRIVATE_ENABLE_CALLBACK_V2 + // + // Call user defined callback + // + MCGEN_PRIVATE_ENABLE_CALLBACK_V2( + SourceId, + ControlCode, + Level, + MatchAnyKeyword, + MatchAllKeyword, + FilterData, + CallbackContext + ); +#endif // MCGEN_PRIVATE_ENABLE_CALLBACK_V2 + + return; +} + +#endif // MCGEN_CONTROL_CALLBACK + +#ifndef _mcgen_PENABLECALLBACK + #if MCGEN_USE_KERNEL_MODE_APIS + #define _mcgen_PENABLECALLBACK PETWENABLECALLBACK + #else + #define _mcgen_PENABLECALLBACK PENABLECALLBACK + #endif +#endif // _mcgen_PENABLECALLBACK + +#if !defined(_mcgen_PASTE2) +// This macro is for use by MC-generated code and should not be used directly. +#define _mcgen_PASTE2(a, b) _mcgen_PASTE2_imp(a, b) +#define _mcgen_PASTE2_imp(a, b) a##b +#endif // _mcgen_PASTE2 + +#if !defined(_mcgen_PASTE3) +// This macro is for use by MC-generated code and should not be used directly. +#define _mcgen_PASTE3(a, b, c) _mcgen_PASTE3_imp(a, b, c) +#define _mcgen_PASTE3_imp(a, b, c) a##b##_##c +#endif // _mcgen_PASTE3 + +// +// Macro validation +// + +// Validate MCGEN_EVENTREGISTER: + +// Trigger an error if MCGEN_EVENTREGISTER is not an unqualified (simple) identifier: +struct _mcgen_PASTE2(MCGEN_EVENTREGISTER_definition_must_be_an_unqualified_identifier_, MCGEN_EVENTREGISTER); + +// Trigger an error if MCGEN_EVENTREGISTER is redefined: +typedef struct _mcgen_PASTE2(MCGEN_EVENTREGISTER_definition_must_be_an_unqualified_identifier_, MCGEN_EVENTREGISTER) + MCGEN_EVENTREGISTER_must_not_be_redefined_between_headers; + +// Trigger an error if MCGEN_EVENTREGISTER is defined as a function-like macro: +typedef void MCGEN_EVENTREGISTER_must_not_be_a_functionLike_macro_MCGEN_EVENTREGISTER; +typedef int _mcgen_PASTE2(MCGEN_EVENTREGISTER_must_not_be_a_functionLike_macro_, MCGEN_EVENTREGISTER); + +// Validate MCGEN_EVENTUNREGISTER: + +// Trigger an error if MCGEN_EVENTUNREGISTER is not an unqualified (simple) identifier: +struct _mcgen_PASTE2(MCGEN_EVENTUNREGISTER_definition_must_be_an_unqualified_identifier_, MCGEN_EVENTUNREGISTER); + +// Trigger an error if MCGEN_EVENTUNREGISTER is redefined: +typedef struct _mcgen_PASTE2(MCGEN_EVENTUNREGISTER_definition_must_be_an_unqualified_identifier_, MCGEN_EVENTUNREGISTER) + MCGEN_EVENTUNREGISTER_must_not_be_redefined_between_headers; + +// Trigger an error if MCGEN_EVENTUNREGISTER is defined as a function-like macro: +typedef void MCGEN_EVENTUNREGISTER_must_not_be_a_functionLike_macro_MCGEN_EVENTUNREGISTER; +typedef int _mcgen_PASTE2(MCGEN_EVENTUNREGISTER_must_not_be_a_functionLike_macro_, MCGEN_EVENTUNREGISTER); + +// Validate MCGEN_EVENTSETINFORMATION: + +// Trigger an error if MCGEN_EVENTSETINFORMATION is not an unqualified (simple) identifier: +struct _mcgen_PASTE2(MCGEN_EVENTSETINFORMATION_definition_must_be_an_unqualified_identifier_, MCGEN_EVENTSETINFORMATION); + +// Trigger an error if MCGEN_EVENTSETINFORMATION is redefined: +typedef struct _mcgen_PASTE2(MCGEN_EVENTSETINFORMATION_definition_must_be_an_unqualified_identifier_, MCGEN_EVENTSETINFORMATION) + MCGEN_EVENTSETINFORMATION_must_not_be_redefined_between_headers; + +// Trigger an error if MCGEN_EVENTSETINFORMATION is defined as a function-like macro: +typedef void MCGEN_EVENTSETINFORMATION_must_not_be_a_functionLike_macro_MCGEN_EVENTSETINFORMATION; +typedef int _mcgen_PASTE2(MCGEN_EVENTSETINFORMATION_must_not_be_a_functionLike_macro_, MCGEN_EVENTSETINFORMATION); + +// Validate MCGEN_EVENTWRITETRANSFER: + +// Trigger an error if MCGEN_EVENTWRITETRANSFER is not an unqualified (simple) identifier: +struct _mcgen_PASTE2(MCGEN_EVENTWRITETRANSFER_definition_must_be_an_unqualified_identifier_, MCGEN_EVENTWRITETRANSFER); + +// Trigger an error if MCGEN_EVENTWRITETRANSFER is redefined: +typedef struct _mcgen_PASTE2(MCGEN_EVENTWRITETRANSFER_definition_must_be_an_unqualified_identifier_, MCGEN_EVENTWRITETRANSFER) + MCGEN_EVENTWRITETRANSFER_must_not_be_redefined_between_headers;; + +// Trigger an error if MCGEN_EVENTWRITETRANSFER is defined as a function-like macro: +typedef void MCGEN_EVENTWRITETRANSFER_must_not_be_a_functionLike_macro_MCGEN_EVENTWRITETRANSFER; +typedef int _mcgen_PASTE2(MCGEN_EVENTWRITETRANSFER_must_not_be_a_functionLike_macro_, MCGEN_EVENTWRITETRANSFER); + +#ifndef McGenEventWrite_def +#define McGenEventWrite_def + +// This macro is for use by MC-generated code and should not be used directly. +#define McGenEventWrite _mcgen_PASTE2(McGenEventWrite_, MCGEN_EVENTWRITETRANSFER) + +// This function is for use by MC-generated code and should not be used directly. +DECLSPEC_NOINLINE __inline +ULONG __stdcall +McGenEventWrite( + _In_ PMCGEN_TRACE_CONTEXT Context, + _In_ PCEVENT_DESCRIPTOR Descriptor, + _In_opt_ LPCGUID ActivityId, + _In_range_(1, 128) ULONG EventDataCount, + _Pre_cap_(EventDataCount) EVENT_DATA_DESCRIPTOR* EventData + ) +{ + const USHORT UNALIGNED* Traits; + + // Some customized MCGEN_EVENTWRITETRANSFER macros might ignore ActivityId. + UNREFERENCED_PARAMETER(ActivityId); + + Traits = (const USHORT UNALIGNED*)(UINT_PTR)Context->Logger; + + if (Traits == NULL) { + EventData[0].Ptr = 0; + EventData[0].Size = 0; + EventData[0].Reserved = 0; + } else { + EventData[0].Ptr = (ULONG_PTR)Traits; + EventData[0].Size = *Traits; + EventData[0].Reserved = 2; // EVENT_DATA_DESCRIPTOR_TYPE_PROVIDER_METADATA + } + + return MCGEN_EVENTWRITETRANSFER( + Context->RegistrationHandle, + Descriptor, + ActivityId, + NULL, + EventDataCount, + EventData); +} +#endif // McGenEventWrite_def + +#if !defined(McGenEventRegisterUnregister) +#define McGenEventRegisterUnregister + +// This macro is for use by MC-generated code and should not be used directly. +#define McGenEventRegister _mcgen_PASTE2(McGenEventRegister_, MCGEN_EVENTREGISTER) + +#pragma warning(push) +#pragma warning(disable:6103) +// This function is for use by MC-generated code and should not be used directly. +DECLSPEC_NOINLINE __inline +ULONG __stdcall +McGenEventRegister( + _In_ LPCGUID ProviderId, + _In_opt_ _mcgen_PENABLECALLBACK EnableCallback, + _In_opt_ PVOID CallbackContext, + _Inout_ PREGHANDLE RegHandle + ) +/*++ + +Routine Description: + + This function registers the provider with ETW. + +Arguments: + + ProviderId - Provider ID to register with ETW. + + EnableCallback - Callback to be used. + + CallbackContext - Context for the callback. + + RegHandle - Pointer to registration handle. + +Remarks: + + Should not be called if the provider is already registered (i.e. should not + be called if *RegHandle != 0). Repeatedly registering a provider is a bug + and may indicate a race condition. However, for compatibility with previous + behavior, this function will return SUCCESS in this case. + +--*/ +{ + ULONG Error; + + if (*RegHandle != 0) + { + Error = 0; // ERROR_SUCCESS + } + else + { + Error = MCGEN_EVENTREGISTER(ProviderId, EnableCallback, CallbackContext, RegHandle); + } + + return Error; +} +#pragma warning(pop) + +// This macro is for use by MC-generated code and should not be used directly. +#define McGenEventUnregister _mcgen_PASTE2(McGenEventUnregister_, MCGEN_EVENTUNREGISTER) + +// This function is for use by MC-generated code and should not be used directly. +DECLSPEC_NOINLINE __inline +ULONG __stdcall +McGenEventUnregister(_Inout_ PREGHANDLE RegHandle) +/*++ + +Routine Description: + + Unregister from ETW and set *RegHandle = 0. + +Arguments: + + RegHandle - the pointer to the provider registration handle + +Remarks: + + If provider has not been registered (i.e. if *RegHandle == 0), + return SUCCESS. It is safe to call McGenEventUnregister even if the + call to McGenEventRegister returned an error. + +--*/ +{ + ULONG Error; + + if(*RegHandle == 0) + { + Error = 0; // ERROR_SUCCESS + } + else + { + Error = MCGEN_EVENTUNREGISTER(*RegHandle); + *RegHandle = (REGHANDLE)0; + } + + return Error; +} + +#endif // McGenEventRegisterUnregister + +#ifndef _mcgen_EVENT_BIT_SET + #if defined(_M_IX86) || defined(_M_X64) + // This macro is for use by MC-generated code and should not be used directly. + #define _mcgen_EVENT_BIT_SET(EnableBits, BitPosition) ((((const unsigned char*)EnableBits)[BitPosition >> 3] & (1u << (BitPosition & 7))) != 0) + #else // CPU type + // This macro is for use by MC-generated code and should not be used directly. + #define _mcgen_EVENT_BIT_SET(EnableBits, BitPosition) ((EnableBits[BitPosition >> 5] & (1u << (BitPosition & 31))) != 0) + #endif // CPU type +#endif // _mcgen_EVENT_BIT_SET + +#endif // MCGEN_DISABLE_PROVIDER_CODE_GENERATION + +//+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +// Provider "microsoft-windows-mimalloc" event count 2 +//+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +// Provider GUID = 138f4dbb-ee04-4899-aa0a-572ad4475779 +EXTERN_C __declspec(selectany) const GUID ETW_MI_Provider = {0x138f4dbb, 0xee04, 0x4899, {0xaa, 0x0a, 0x57, 0x2a, 0xd4, 0x47, 0x57, 0x79}}; + +#ifndef ETW_MI_Provider_Traits +#define ETW_MI_Provider_Traits NULL +#endif // ETW_MI_Provider_Traits + +// +// Event Descriptors +// +EXTERN_C __declspec(selectany) const EVENT_DESCRIPTOR ETW_MI_ALLOC = {0x64, 0x1, 0x0, 0x4, 0x0, 0x0, 0x0}; +#define ETW_MI_ALLOC_value 0x64 +EXTERN_C __declspec(selectany) const EVENT_DESCRIPTOR ETW_MI_FREE = {0x65, 0x1, 0x0, 0x4, 0x0, 0x0, 0x0}; +#define ETW_MI_FREE_value 0x65 + +// +// MCGEN_DISABLE_PROVIDER_CODE_GENERATION macro: +// Define this macro to have the compiler skip the generated functions in this +// header. +// +#ifndef MCGEN_DISABLE_PROVIDER_CODE_GENERATION + +// +// Event Enablement Bits +// These variables are for use by MC-generated code and should not be used directly. +// +EXTERN_C __declspec(selectany) DECLSPEC_CACHEALIGN ULONG microsoft_windows_mimallocEnableBits[1]; +EXTERN_C __declspec(selectany) const ULONGLONG microsoft_windows_mimallocKeywords[1] = {0x0}; +EXTERN_C __declspec(selectany) const unsigned char microsoft_windows_mimallocLevels[1] = {4}; + +// +// Provider context +// +EXTERN_C __declspec(selectany) MCGEN_TRACE_CONTEXT ETW_MI_Provider_Context = {0, (ULONG_PTR)ETW_MI_Provider_Traits, 0, 0, 0, 0, 0, 0, 1, microsoft_windows_mimallocEnableBits, microsoft_windows_mimallocKeywords, microsoft_windows_mimallocLevels}; + +// +// Provider REGHANDLE +// +#define microsoft_windows_mimallocHandle (ETW_MI_Provider_Context.RegistrationHandle) + +// +// This macro is set to 0, indicating that the EventWrite[Name] macros do not +// have an Activity parameter. This is controlled by the -km and -um options. +// +#define ETW_MI_Provider_EventWriteActivity 0 + +// +// Register with ETW using the control GUID specified in the manifest. +// Invoke this macro during module initialization (i.e. program startup, +// DLL process attach, or driver load) to initialize the provider. +// Note that if this function returns an error, the error means that +// will not work, but no action needs to be taken -- even if EventRegister +// returns an error, it is generally safe to use EventWrite and +// EventUnregister macros (they will be no-ops if EventRegister failed). +// +#ifndef EventRegistermicrosoft_windows_mimalloc +#define EventRegistermicrosoft_windows_mimalloc() McGenEventRegister(&ETW_MI_Provider, McGenControlCallbackV2, &ETW_MI_Provider_Context, µsoft_windows_mimallocHandle) +#endif + +// +// Register with ETW using a specific control GUID (i.e. a GUID other than what +// is specified in the manifest). Advanced scenarios only. +// +#ifndef EventRegisterByGuidmicrosoft_windows_mimalloc +#define EventRegisterByGuidmicrosoft_windows_mimalloc(Guid) McGenEventRegister(&(Guid), McGenControlCallbackV2, &ETW_MI_Provider_Context, µsoft_windows_mimallocHandle) +#endif + +// +// Unregister with ETW and close the provider. +// Invoke this macro during module shutdown (i.e. program exit, DLL process +// detach, or driver unload) to unregister the provider. +// Note that you MUST call EventUnregister before DLL or driver unload +// (not optional): failure to unregister a provider before DLL or driver unload +// will result in crashes. +// +#ifndef EventUnregistermicrosoft_windows_mimalloc +#define EventUnregistermicrosoft_windows_mimalloc() McGenEventUnregister(µsoft_windows_mimallocHandle) +#endif + +// +// MCGEN_ENABLE_FORCONTEXT_CODE_GENERATION macro: +// Define this macro to enable support for caller-allocated provider context. +// +#ifdef MCGEN_ENABLE_FORCONTEXT_CODE_GENERATION + +// +// Advanced scenarios: Caller-allocated provider context. +// Use when multiple differently-configured provider handles are needed, +// e.g. for container-aware drivers, one context per container. +// +// Usage: +// +// - Caller enables the feature before including this header, e.g. +// #define MCGEN_ENABLE_FORCONTEXT_CODE_GENERATION 1 +// - Caller allocates memory, e.g. pContext = malloc(sizeof(McGenContext_microsoft_windows_mimalloc)); +// - Caller registers the provider, e.g. EventRegistermicrosoft_windows_mimalloc_ForContext(pContext); +// - Caller writes events, e.g. EventWriteMyEvent_ForContext(pContext, ...); +// - Caller unregisters, e.g. EventUnregistermicrosoft_windows_mimalloc_ForContext(pContext); +// - Caller frees memory, e.g. free(pContext); +// + +typedef struct tagMcGenContext_microsoft_windows_mimalloc { + // The fields of this structure are subject to change and should + // not be accessed directly. To access the provider's REGHANDLE, + // use microsoft_windows_mimallocHandle_ForContext(pContext). + MCGEN_TRACE_CONTEXT Context; + ULONG EnableBits[1]; +} McGenContext_microsoft_windows_mimalloc; + +#define EventRegistermicrosoft_windows_mimalloc_ForContext(pContext) _mcgen_PASTE2(_mcgen_RegisterForContext_microsoft_windows_mimalloc_, MCGEN_EVENTREGISTER)(&ETW_MI_Provider, pContext) +#define EventRegisterByGuidmicrosoft_windows_mimalloc_ForContext(Guid, pContext) _mcgen_PASTE2(_mcgen_RegisterForContext_microsoft_windows_mimalloc_, MCGEN_EVENTREGISTER)(&(Guid), pContext) +#define EventUnregistermicrosoft_windows_mimalloc_ForContext(pContext) McGenEventUnregister(&(pContext)->Context.RegistrationHandle) + +// +// Provider REGHANDLE for caller-allocated context. +// +#define microsoft_windows_mimallocHandle_ForContext(pContext) ((pContext)->Context.RegistrationHandle) + +// This function is for use by MC-generated code and should not be used directly. +// Initialize and register the caller-allocated context. +__inline +ULONG __stdcall +_mcgen_PASTE2(_mcgen_RegisterForContext_microsoft_windows_mimalloc_, MCGEN_EVENTREGISTER)( + _In_ LPCGUID pProviderId, + _Out_ McGenContext_microsoft_windows_mimalloc* pContext) +{ + RtlZeroMemory(pContext, sizeof(*pContext)); + pContext->Context.Logger = (ULONG_PTR)ETW_MI_Provider_Traits; + pContext->Context.EnableBitsCount = 1; + pContext->Context.EnableBitMask = pContext->EnableBits; + pContext->Context.EnableKeyWords = microsoft_windows_mimallocKeywords; + pContext->Context.EnableLevel = microsoft_windows_mimallocLevels; + return McGenEventRegister( + pProviderId, + McGenControlCallbackV2, + &pContext->Context, + &pContext->Context.RegistrationHandle); +} + +// This function is for use by MC-generated code and should not be used directly. +// Trigger a compile error if called with the wrong parameter type. +FORCEINLINE +_Ret_ McGenContext_microsoft_windows_mimalloc* +_mcgen_CheckContextType_microsoft_windows_mimalloc(_In_ McGenContext_microsoft_windows_mimalloc* pContext) +{ + return pContext; +} + +#endif // MCGEN_ENABLE_FORCONTEXT_CODE_GENERATION + +// +// Enablement check macro for event "ETW_MI_ALLOC" +// +#define EventEnabledETW_MI_ALLOC() _mcgen_EVENT_BIT_SET(microsoft_windows_mimallocEnableBits, 0) +#define EventEnabledETW_MI_ALLOC_ForContext(pContext) _mcgen_EVENT_BIT_SET(_mcgen_CheckContextType_microsoft_windows_mimalloc(pContext)->EnableBits, 0) + +// +// Event write macros for event "ETW_MI_ALLOC" +// +#define EventWriteETW_MI_ALLOC(Address, Size) \ + MCGEN_EVENT_ENABLED(ETW_MI_ALLOC) \ + ? _mcgen_TEMPLATE_FOR_ETW_MI_ALLOC(&ETW_MI_Provider_Context, &ETW_MI_ALLOC, Address, Size) : 0 +#define EventWriteETW_MI_ALLOC_AssumeEnabled(Address, Size) \ + _mcgen_TEMPLATE_FOR_ETW_MI_ALLOC(&ETW_MI_Provider_Context, &ETW_MI_ALLOC, Address, Size) +#define EventWriteETW_MI_ALLOC_ForContext(pContext, Address, Size) \ + MCGEN_EVENT_ENABLED_FORCONTEXT(pContext, ETW_MI_ALLOC) \ + ? _mcgen_TEMPLATE_FOR_ETW_MI_ALLOC(&(pContext)->Context, &ETW_MI_ALLOC, Address, Size) : 0 +#define EventWriteETW_MI_ALLOC_ForContextAssumeEnabled(pContext, Address, Size) \ + _mcgen_TEMPLATE_FOR_ETW_MI_ALLOC(&_mcgen_CheckContextType_microsoft_windows_mimalloc(pContext)->Context, &ETW_MI_ALLOC, Address, Size) + +// This macro is for use by MC-generated code and should not be used directly. +#define _mcgen_TEMPLATE_FOR_ETW_MI_ALLOC _mcgen_PASTE2(McTemplateU0xx_, MCGEN_EVENTWRITETRANSFER) + +// +// Enablement check macro for event "ETW_MI_FREE" +// +#define EventEnabledETW_MI_FREE() _mcgen_EVENT_BIT_SET(microsoft_windows_mimallocEnableBits, 0) +#define EventEnabledETW_MI_FREE_ForContext(pContext) _mcgen_EVENT_BIT_SET(_mcgen_CheckContextType_microsoft_windows_mimalloc(pContext)->EnableBits, 0) + +// +// Event write macros for event "ETW_MI_FREE" +// +#define EventWriteETW_MI_FREE(Address, Size) \ + MCGEN_EVENT_ENABLED(ETW_MI_FREE) \ + ? _mcgen_TEMPLATE_FOR_ETW_MI_FREE(&ETW_MI_Provider_Context, &ETW_MI_FREE, Address, Size) : 0 +#define EventWriteETW_MI_FREE_AssumeEnabled(Address, Size) \ + _mcgen_TEMPLATE_FOR_ETW_MI_FREE(&ETW_MI_Provider_Context, &ETW_MI_FREE, Address, Size) +#define EventWriteETW_MI_FREE_ForContext(pContext, Address, Size) \ + MCGEN_EVENT_ENABLED_FORCONTEXT(pContext, ETW_MI_FREE) \ + ? _mcgen_TEMPLATE_FOR_ETW_MI_FREE(&(pContext)->Context, &ETW_MI_FREE, Address, Size) : 0 +#define EventWriteETW_MI_FREE_ForContextAssumeEnabled(pContext, Address, Size) \ + _mcgen_TEMPLATE_FOR_ETW_MI_FREE(&_mcgen_CheckContextType_microsoft_windows_mimalloc(pContext)->Context, &ETW_MI_FREE, Address, Size) + +// This macro is for use by MC-generated code and should not be used directly. +#define _mcgen_TEMPLATE_FOR_ETW_MI_FREE _mcgen_PASTE2(McTemplateU0xx_, MCGEN_EVENTWRITETRANSFER) + +#endif // MCGEN_DISABLE_PROVIDER_CODE_GENERATION + +// +// MCGEN_DISABLE_PROVIDER_CODE_GENERATION macro: +// Define this macro to have the compiler skip the generated functions in this +// header. +// +#ifndef MCGEN_DISABLE_PROVIDER_CODE_GENERATION + +// +// Template Functions +// + +// +// Function for template "ETW_CUSTOM_HEAP_ALLOC_DATA" (and possibly others). +// This function is for use by MC-generated code and should not be used directly. +// +#ifndef McTemplateU0xx_def +#define McTemplateU0xx_def +ETW_INLINE +ULONG +_mcgen_PASTE2(McTemplateU0xx_, MCGEN_EVENTWRITETRANSFER)( + _In_ PMCGEN_TRACE_CONTEXT Context, + _In_ PCEVENT_DESCRIPTOR Descriptor, + _In_ const unsigned __int64 _Arg0, + _In_ const unsigned __int64 _Arg1 + ) +{ +#define McTemplateU0xx_ARGCOUNT 2 + + EVENT_DATA_DESCRIPTOR EventData[McTemplateU0xx_ARGCOUNT + 1]; + + EventDataDescCreate(&EventData[1],&_Arg0, sizeof(const unsigned __int64) ); + + EventDataDescCreate(&EventData[2],&_Arg1, sizeof(const unsigned __int64) ); + + return McGenEventWrite(Context, Descriptor, NULL, McTemplateU0xx_ARGCOUNT + 1, EventData); +} +#endif // McTemplateU0xx_def + +#endif // MCGEN_DISABLE_PROVIDER_CODE_GENERATION + +#if defined(__cplusplus) +} +#endif diff --git a/include/mimalloc-etw-gen.man b/include/mimalloc-etw-gen.man new file mode 100644 index 0000000000000000000000000000000000000000..9ef58029233f5296ce71f657e46e068ef2d16418 GIT binary patch literal 3950 zcmeH~TTc@~6vxlAiQi%6*=?zyV6#dHLL{MTq(mP~$kK(vBp$7`^o?Hg#Z5!5#ou-Dw<5J|RS>^SNDo1B-C{k&d z{*Ih*qErEXQD4+~w&avuMYw0$3NH21DbzV=>r!QmQfFXQq%E^3gZBi=3h!!P6}*b6 zD$nrpvaGUNmUW)T7K;?x3>?Lq^HIf+B^DPKJGJZIBhfr4^wo#x%X%Hk!jrf4g7Z#- zuLjsR?v+U_E>>kM19%5`d>|`4`^2Gr%){A<`Ru0|G7_oJ+dGyoc;C<}pMdIRl{5;AVfa^LBY*I&i@?N&gl1psp zVw;O+4G+6}WZE60maAX%%=y;Ex{ha4;-^)sk@lThB`Z%3z1|XCJ9C(;m_J|zkFve-J?G} z#{2c=cm(Eko5J^#8FN$4*%CAheaE?v+$)eImh~~Q9_^f)bgyLCc{0lo?V3$0_iR{; zzP1jmx^lkVR*g>kR6a(jcWRwo{krJTky0ci)Q^50w&7VU^9$rvq-ZtAx0c}f$8FyS z<##Y@{{KH#Ypze`?P8vfO8kpt?cY~;5&7wHd&t>`oGo&sobB_vv{s1w&scnaO|Ova a?+rcGzA^hhqLVtGu0d~0k>&qOI=(+V`!f#! literal 0 HcmV?d00001 diff --git a/include/mimalloc-etw.h b/include/mimalloc-etw.h new file mode 100644 index 00000000..3c894056 --- /dev/null +++ b/include/mimalloc-etw.h @@ -0,0 +1,4 @@ +#pragma once +#include +#include "mimalloc-etw-gen.h" + diff --git a/include/mimalloc-track.h b/include/mimalloc-track.h index 272ca1b8..5e050512 100644 --- a/include/mimalloc-track.h +++ b/include/mimalloc-track.h @@ -71,6 +71,18 @@ defined, undefined, or not accessible at all: #define mi_track_mem_undefined(p,size) ASAN_UNPOISON_MEMORY_REGION(p,size) #define mi_track_mem_noaccess(p,size) ASAN_POISON_MEMORY_REGION(p,size) +#elif MI_ETW +#define MI_TRACK_ENABLED 1 +#define MI_TRACK_TOOL "ETW" + +#include "mimalloc-etw.h" + +#define mi_track_malloc_size(p,reqsize,size,zero) EventWriteETW_MI_ALLOC((UINT64)p, size) +#define mi_track_free_size(p,size) EventWriteETW_MI_FREE((UINT64)p, size) +#define mi_track_mem_defined(p,size) +#define mi_track_mem_undefined(p,size) +#define mi_track_mem_noaccess(p,size) + #else #define MI_TRACK_ENABLED 0 diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 9b3f5972..f8d891cb 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -32,6 +32,8 @@ terms of the MIT license. A copy of the license can be found in the file // Define MI_TRACK_ to enable tracking support // #define MI_TRACK_VALGRIND 1 // #define MI_TRACK_ASAN 1 +// Define MI_ETW to enable ETW provider + #define MI_ETW 1 // Define MI_STAT as 1 to maintain statistics; set it to 2 to have detailed statistics (but costs some performance). // #define MI_STAT 1 @@ -60,7 +62,7 @@ terms of the MIT license. A copy of the license can be found in the file // Reserve extra padding at the end of each block to be more resilient against heap block overflows. // The padding can detect buffer overflow on free. -#if !defined(MI_PADDING) && (MI_SECURE>=3 || MI_DEBUG>=1 || MI_TRACK_VALGRIND || MI_TRACK_ASAN) +#if !defined(MI_PADDING) && (MI_SECURE>=3 || MI_DEBUG>=1 || MI_TRACK_VALGRIND || MI_TRACK_ASAN || MI_ETW) #define MI_PADDING 1 #endif diff --git a/include/mimalloc.wprp b/include/mimalloc.wprp new file mode 100644 index 00000000..7f284e9d --- /dev/null +++ b/include/mimalloc.wprp @@ -0,0 +1,56 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/init.c b/src/init.c index d73c1e1e..5574548e 100644 --- a/src/init.c +++ b/src/init.c @@ -11,6 +11,7 @@ terms of the MIT license. A copy of the license can be found in the file #include // memcpy, memset #include // atexit + // Empty page used to initialize the small free pages array const mi_page_t _mi_page_empty = { 0, false, false, false, false, @@ -549,6 +550,10 @@ void mi_process_init(void) mi_attr_noexcept { mi_reserve_os_memory((size_t)ksize*MI_KiB, true, true); } } + +#ifdef MI_ETW + EventRegistermicrosoft_windows_mimalloc(); +#endif } // Called when the process is done (through `at_exit`) From 1a99efc671bd040b40b558f15d9906b2d24aa653 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Thu, 16 Mar 2023 20:08:43 -0700 Subject: [PATCH 25/42] integrate ETW windows event tracing into mimalloc as another track tool --- CMakeLists.txt | 16 ++++++ include/mimalloc-etw.h | 4 -- include/mimalloc-track.h | 48 +++++++++++++----- include/mimalloc-types.h | 5 +- src/init.c | 5 +- src/prim/readme.md | 6 ++- .../prim/windows/etw.h | 0 .../prim/windows/etw.man | Bin 3950 -> 3926 bytes {include => src/prim/windows}/mimalloc.wprp | 7 ++- src/prim/windows/readme.md | 17 +++++++ 10 files changed, 80 insertions(+), 28 deletions(-) delete mode 100644 include/mimalloc-etw.h rename include/mimalloc-etw-gen.h => src/prim/windows/etw.h (100%) rename include/mimalloc-etw-gen.man => src/prim/windows/etw.man (97%) rename {include => src/prim/windows}/mimalloc.wprp (91%) create mode 100644 src/prim/windows/readme.md diff --git a/CMakeLists.txt b/CMakeLists.txt index 28dfe830..7aba5553 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,6 +12,7 @@ option(MI_XMALLOC "Enable abort() call on memory allocation failure by option(MI_SHOW_ERRORS "Show error and warning messages by default (only enabled by default in DEBUG mode)" OFF) option(MI_TRACK_VALGRIND "Compile with Valgrind support (adds a small overhead)" OFF) option(MI_TRACK_ASAN "Compile with address sanitizer support (adds a small overhead)" OFF) +option(MI_TRACK_ETW "Compile with Windows event tracing (ETW) support (adds a small overhead)" OFF) option(MI_USE_CXX "Use the C++ compiler to compile the library (instead of the C compiler)" OFF) option(MI_SEE_ASM "Generate assembly files" OFF) option(MI_OSX_INTERPOSE "Use interpose to override standard malloc on macOS" ON) @@ -165,6 +166,21 @@ if(MI_TRACK_ASAN) endif() endif() +if(MI_TRACK_ETW) + if NOT WIN32 + set(MI_TRACK_ETW OFF) + message(WARNING "Can only enable ETW support on Windows (MI_TRACK_ETW=OFF)") + endif() + if (MI_TRACK_VALGRIND OR MI_TRACK_ASAN) + set(MI_TRACK_ETW OFF) + message(WARNING "Cannot enable ETW support with also Valgrind or ASAN support enabled (MI_TRACK_ETW=OFF)") + endif() + if(MI_TRACK_ETW) + message(STATUS "Compile with Windows event tracing support (MI_TRACK_ETW=ON)") + list(APPEND mi_defines MI_TRACK_ETW=1) + endif() +endif() + if(MI_SEE_ASM) message(STATUS "Generate assembly listings (MI_SEE_ASM=ON)") list(APPEND mi_cflags -save-temps) diff --git a/include/mimalloc-etw.h b/include/mimalloc-etw.h deleted file mode 100644 index 3c894056..00000000 --- a/include/mimalloc-etw.h +++ /dev/null @@ -1,4 +0,0 @@ -#pragma once -#include -#include "mimalloc-etw-gen.h" - diff --git a/include/mimalloc-track.h b/include/mimalloc-track.h index 5e050512..f78e8daa 100644 --- a/include/mimalloc-track.h +++ b/include/mimalloc-track.h @@ -27,10 +27,12 @@ Optional: #define mi_track_align(p,alignedp,offset,size) #define mi_track_resize(p,oldsize,newsize) + #define mi_track_init() The `mi_track_align` is called right after a `mi_track_malloc` for aligned pointers in a block. The corresponding `mi_track_free` still uses the block start pointer and original size (corresponding to the `mi_track_malloc`). The `mi_track_resize` is currently unused but could be called on reallocations within a block. +`mi_track_init` is called at program start. The following macros are for tools like asan and valgrind to track whether memory is defined, undefined, or not accessible at all: @@ -42,6 +44,7 @@ defined, undefined, or not accessible at all: -------------------------------------------------------------------------------------------------------*/ #if MI_TRACK_VALGRIND +// valgrind tool #define MI_TRACK_ENABLED 1 #define MI_TRACK_HEAP_DESTROY 1 // track free of individual blocks on heap_destroy @@ -58,6 +61,7 @@ defined, undefined, or not accessible at all: #define mi_track_mem_noaccess(p,size) VALGRIND_MAKE_MEM_NOACCESS(p,size) #elif MI_TRACK_ASAN +// address sanitizer #define MI_TRACK_ENABLED 1 #define MI_TRACK_HEAP_DESTROY 0 @@ -71,19 +75,23 @@ defined, undefined, or not accessible at all: #define mi_track_mem_undefined(p,size) ASAN_UNPOISON_MEMORY_REGION(p,size) #define mi_track_mem_noaccess(p,size) ASAN_POISON_MEMORY_REGION(p,size) -#elif MI_ETW -#define MI_TRACK_ENABLED 1 -#define MI_TRACK_TOOL "ETW" +#elif MI_TRACK_ETW +// windows event tracing -#include "mimalloc-etw.h" +#define MI_TRACK_ENABLED 1 +#define MI_TRACK_HEAP_DESTROY 0 +#define MI_TRACK_TOOL "ETW" -#define mi_track_malloc_size(p,reqsize,size,zero) EventWriteETW_MI_ALLOC((UINT64)p, size) -#define mi_track_free_size(p,size) EventWriteETW_MI_FREE((UINT64)p, size) -#define mi_track_mem_defined(p,size) -#define mi_track_mem_undefined(p,size) -#define mi_track_mem_noaccess(p,size) +#define WIN32_LEAN_AND_MEAN +#include +#include "../src/prim/windows/etw.h" + +#define mi_track_init() EventRegistermicrosoft_windows_mimalloc(); +#define mi_track_malloc_size(p,reqsize,size,zero) EventWriteETW_MI_ALLOC((UINT64)(p), size) +#define mi_track_free_size(p,size) EventWriteETW_MI_FREE((UINT64)(p), size) #else +// no tracking #define MI_TRACK_ENABLED 0 #define MI_TRACK_HEAP_DESTROY 0 @@ -91,11 +99,6 @@ defined, undefined, or not accessible at all: #define mi_track_malloc_size(p,reqsize,size,zero) #define mi_track_free_size(p,_size) -#define mi_track_align(p,alignedp,offset,size) -#define mi_track_resize(p,oldsize,newsize) -#define mi_track_mem_defined(p,size) -#define mi_track_mem_undefined(p,size) -#define mi_track_mem_noaccess(p,size) #endif @@ -110,6 +113,23 @@ defined, undefined, or not accessible at all: #define mi_track_align(p,alignedp,offset,size) mi_track_mem_noaccess(p,offset) #endif +#ifndef mi_track_init +#define mi_track_init() +#endif + +#ifndef mi_track_mem_defined +#define mi_track_mem_defined(p,size) +#endif + +#ifndef mi_track_mem_undefined +#define mi_track_mem_undefined(p,size) +#endif + +#ifndef mi_track_mem_noaccess +#define mi_track_mem_noaccess(p,size) +#endif + + #if MI_PADDING #define mi_track_malloc(p,reqsize,zero) \ if ((p)!=NULL) { \ diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index f8d891cb..ebf764ab 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -32,8 +32,7 @@ terms of the MIT license. A copy of the license can be found in the file // Define MI_TRACK_ to enable tracking support // #define MI_TRACK_VALGRIND 1 // #define MI_TRACK_ASAN 1 -// Define MI_ETW to enable ETW provider - #define MI_ETW 1 +#define MI_TRACK_ETW 1 // Define MI_STAT as 1 to maintain statistics; set it to 2 to have detailed statistics (but costs some performance). // #define MI_STAT 1 @@ -62,7 +61,7 @@ terms of the MIT license. A copy of the license can be found in the file // Reserve extra padding at the end of each block to be more resilient against heap block overflows. // The padding can detect buffer overflow on free. -#if !defined(MI_PADDING) && (MI_SECURE>=3 || MI_DEBUG>=1 || MI_TRACK_VALGRIND || MI_TRACK_ASAN || MI_ETW) +#if !defined(MI_PADDING) && (MI_SECURE>=3 || MI_DEBUG>=1 || (MI_TRACK_VALGRIND || MI_TRACK_ASAN || MI_TRACK_ETW)) #define MI_PADDING 1 #endif diff --git a/src/init.c b/src/init.c index 5574548e..495d26fd 100644 --- a/src/init.c +++ b/src/init.c @@ -534,6 +534,7 @@ void mi_process_init(void) mi_attr_noexcept { #endif mi_stats_reset(); // only call stat reset *after* thread init (or the heap tld == NULL) + mi_track_init(); if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { size_t pages = mi_option_get_clamp(mi_option_reserve_huge_os_pages, 0, 128*1024); @@ -550,10 +551,6 @@ void mi_process_init(void) mi_attr_noexcept { mi_reserve_os_memory((size_t)ksize*MI_KiB, true, true); } } - -#ifdef MI_ETW - EventRegistermicrosoft_windows_mimalloc(); -#endif } // Called when the process is done (through `at_exit`) diff --git a/src/prim/readme.md b/src/prim/readme.md index 14248496..eb02f274 100644 --- a/src/prim/readme.md +++ b/src/prim/readme.md @@ -1,6 +1,8 @@ +## Portability Primitives + This is the portability layer where all primitives needed from the OS are defined. - `prim.h`: API definition -- `prim.c`: Selects one of `prim-unix.c`, `prim-wasi.c`, or `prim-windows.c` depending on the host platform. +- `prim.c`: Selects one of `unix/prim.c`, `wasi/prim.c`, or `windows/prim.c` depending on the host platform. -Note: still work in progress, there may be other places in the sources that still depend on OS ifdef's. \ No newline at end of file +Note: still work in progress, there may still be places in the sources that still depend on OS ifdef's. \ No newline at end of file diff --git a/include/mimalloc-etw-gen.h b/src/prim/windows/etw.h similarity index 100% rename from include/mimalloc-etw-gen.h rename to src/prim/windows/etw.h diff --git a/include/mimalloc-etw-gen.man b/src/prim/windows/etw.man similarity index 97% rename from include/mimalloc-etw-gen.man rename to src/prim/windows/etw.man index 9ef58029233f5296ce71f657e46e068ef2d16418..cfd1f8a9eaacd50af63f1e28f9540aa88c20f90c 100644 GIT binary patch delta 26 hcmaDScTH}>F8)-85{7aHJ%(I{M20*Dg^dq;`2cy32w?yK delta 50 zcmca6_fBrYF7;f7Oom*BM1~w7%x6euh-XM;C}AjP&}B#mvho=8z_NK8PxkTw0BG(F ARR910 diff --git a/include/mimalloc.wprp b/src/prim/windows/mimalloc.wprp similarity index 91% rename from include/mimalloc.wprp rename to src/prim/windows/mimalloc.wprp index 7f284e9d..b00cd7ad 100644 --- a/include/mimalloc.wprp +++ b/src/prim/windows/mimalloc.wprp @@ -29,7 +29,12 @@ - + + + + + + diff --git a/src/prim/windows/readme.md b/src/prim/windows/readme.md new file mode 100644 index 00000000..70292231 --- /dev/null +++ b/src/prim/windows/readme.md @@ -0,0 +1,17 @@ +## Primitives: + +- `prim.c` contains Windows primitives for OS allocation. + +## Event Tracing for Windows (ETW) + +- `etw.h` is generated from `etw.man` which contains the manifest for mimalloc events. + (100 is an allocation, 101 is for a free) + +- `mimalloc.wprp` is a profile for the Windows Performance Recorder (WPR). + In an admin prompt, you can use: + ``` + > wpr -start src\prim\windows\mimalloc.wprp -filemode + > + > wpr -stop test.etl + ``` + and then open `test.etl` in the Windows Performance Analyzer (WPA). \ No newline at end of file From 63f88cb43d1b381222f8d371646e75f535e86733 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Thu, 16 Mar 2023 20:10:46 -0700 Subject: [PATCH 26/42] rename --- src/prim/windows/{mimalloc.wprp => etw-mimalloc.wprp} | 0 src/prim/windows/readme.md | 4 ++-- 2 files changed, 2 insertions(+), 2 deletions(-) rename src/prim/windows/{mimalloc.wprp => etw-mimalloc.wprp} (100%) diff --git a/src/prim/windows/mimalloc.wprp b/src/prim/windows/etw-mimalloc.wprp similarity index 100% rename from src/prim/windows/mimalloc.wprp rename to src/prim/windows/etw-mimalloc.wprp diff --git a/src/prim/windows/readme.md b/src/prim/windows/readme.md index 70292231..217c3d17 100644 --- a/src/prim/windows/readme.md +++ b/src/prim/windows/readme.md @@ -7,10 +7,10 @@ - `etw.h` is generated from `etw.man` which contains the manifest for mimalloc events. (100 is an allocation, 101 is for a free) -- `mimalloc.wprp` is a profile for the Windows Performance Recorder (WPR). +- `etw-mimalloc.wprp` is a profile for the Windows Performance Recorder (WPR). In an admin prompt, you can use: ``` - > wpr -start src\prim\windows\mimalloc.wprp -filemode + > wpr -start src\prim\windows\etw-mimalloc.wprp -filemode > > wpr -stop test.etl ``` From 3ebcc0bac45a1e270f1a1cc03e9af1b9d91fce33 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Thu, 16 Mar 2023 20:13:21 -0700 Subject: [PATCH 27/42] fix syntax in cmakelists --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7aba5553..68b7cab4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -167,7 +167,7 @@ if(MI_TRACK_ASAN) endif() if(MI_TRACK_ETW) - if NOT WIN32 + if(NOT WIN32) set(MI_TRACK_ETW OFF) message(WARNING "Can only enable ETW support on Windows (MI_TRACK_ETW=OFF)") endif() From 17a20f280b86d087f5ed84f5c23c1c5ab1d7a51e Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Thu, 16 Mar 2023 20:16:31 -0700 Subject: [PATCH 28/42] dont track ETW by default --- include/mimalloc-types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index ebf764ab..3577b23a 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -32,7 +32,7 @@ terms of the MIT license. A copy of the license can be found in the file // Define MI_TRACK_ to enable tracking support // #define MI_TRACK_VALGRIND 1 // #define MI_TRACK_ASAN 1 -#define MI_TRACK_ETW 1 +// #define MI_TRACK_ETW 1 // Define MI_STAT as 1 to maintain statistics; set it to 2 to have detailed statistics (but costs some performance). // #define MI_STAT 1 From 85a2bb5c608a451b143dc3dc27a278924229f0e4 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Sun, 19 Mar 2023 19:11:43 -0700 Subject: [PATCH 29/42] update process info primitive api --- src/prim/prim.c | 2 +- src/prim/prim.h | 16 +++++++++++++--- src/prim/unix/prim.c | 36 ++++++++++++++---------------------- src/prim/wasi/prim.c | 12 ++++-------- src/prim/windows/prim.c | 16 ++++++++-------- src/stats.c | 36 +++++++++++++++++++----------------- 6 files changed, 59 insertions(+), 59 deletions(-) diff --git a/src/prim/prim.c b/src/prim/prim.c index eec13c48..109ab8e8 100644 --- a/src/prim/prim.c +++ b/src/prim/prim.c @@ -12,7 +12,7 @@ terms of the MIT license. A copy of the license can be found in the file #include "windows/prim.c" // VirtualAlloc (Windows) #elif defined(__wasi__) #define MI_USE_SBRK -#include "wasi/prim.h" // memory-grow or sbrk (Wasm) +#include "wasi/prim.c" // memory-grow or sbrk (Wasm) #else #include "unix/prim.c" // mmap() (Linux, macOSX, BSD, Illumnos, Haiku, DragonFly, etc.) #endif diff --git a/src/prim/prim.h b/src/prim/prim.h index 967c6698..3130d489 100644 --- a/src/prim/prim.h +++ b/src/prim/prim.h @@ -59,9 +59,18 @@ size_t _mi_prim_numa_node_count(void); mi_msecs_t _mi_prim_clock_now(void); // Return process information (only for statistics) -void _mi_prim_process_info(mi_msecs_t* utime, mi_msecs_t* stime, - size_t* current_rss, size_t* peak_rss, - size_t* current_commit, size_t* peak_commit, size_t* page_faults); +typedef struct mi_process_info_s { + mi_msecs_t elapsed; + mi_msecs_t utime; + mi_msecs_t stime; + size_t current_rss; + size_t peak_rss; + size_t current_commit; + size_t peak_commit; + size_t page_faults; +} mi_process_info_t; + +void _mi_prim_process_info(mi_process_info_t* pinfo); // Default stderr output. (only for warnings etc. with verbose enabled) // msg != NULL && _mi_strlen(msg) > 0 @@ -202,6 +211,7 @@ This is inlined here as it is on the fast path for allocation functions. On most platforms (Windows, Linux, FreeBSD, NetBSD, etc), this just returns a __thread local variable (`_mi_heap_default`). With the initial-exec TLS model this ensures that the storage will always be available (allocated on the thread stacks). + On some platforms though we cannot use that when overriding `malloc` since the underlying TLS implementation (or the loader) will call itself `malloc` on a first access and recurse. We try to circumvent this in an efficient way: diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c index d1cd4301..1040c791 100644 --- a/src/prim/unix/prim.c +++ b/src/prim/unix/prim.c @@ -541,19 +541,15 @@ static mi_msecs_t timeval_secs(const struct timeval* tv) { return ((mi_msecs_t)tv->tv_sec * 1000L) + ((mi_msecs_t)tv->tv_usec / 1000L); } -void _mi_prim_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) +void _mi_prim_process_info(mi_process_info_t* pinfo) { struct rusage rusage; getrusage(RUSAGE_SELF, &rusage); - *utime = timeval_secs(&rusage.ru_utime); - *stime = timeval_secs(&rusage.ru_stime); + pinfo->utime = timeval_secs(&rusage.ru_utime); + pinfo->stime = timeval_secs(&rusage.ru_stime); #if !defined(__HAIKU__) - *page_faults = rusage.ru_majflt; -#endif - // estimate commit using our stats - *peak_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.peak)); - *current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.current)); - *current_rss = *current_commit; // estimate + pinfo->page_faults = rusage.ru_majflt; +#endif #if defined(__HAIKU__) // Haiku does not have (yet?) a way to // get these stats per process @@ -562,19 +558,20 @@ void _mi_prim_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* current ssize_t c; get_thread_info(find_thread(0), &tid); while (get_next_area_info(tid.team, &c, &mem) == B_OK) { - *peak_rss += mem.ram_size; + pinfo->peak_rss += mem.ram_size; } - *page_faults = 0; + pinfo->page_faults = 0; #elif defined(__APPLE__) - *peak_rss = rusage.ru_maxrss; // BSD reports in bytes + pinfo->peak_rss = rusage.ru_maxrss; // BSD reports in bytes struct mach_task_basic_info info; mach_msg_type_number_t infoCount = MACH_TASK_BASIC_INFO_COUNT; if (task_info(mach_task_self(), MACH_TASK_BASIC_INFO, (task_info_t)&info, &infoCount) == KERN_SUCCESS) { - *current_rss = (size_t)info.resident_size; + pinfo->current_rss = (size_t)info.resident_size; } #else - *peak_rss = rusage.ru_maxrss * 1024; // Linux reports in KiB + pinfo->peak_rss = rusage.ru_maxrss * 1024; // Linux reports in KiB #endif + // use defaults for commit } #else @@ -584,15 +581,10 @@ void _mi_prim_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* current #pragma message("define a way to get process info") #endif -void _mi_prim_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) +void _mi_prim_process_info(mi_process_info_t* pinfo) { - *peak_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.peak)); - *current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.current)); - *peak_rss = *peak_commit; - *current_rss = *current_commit; - *page_faults = 0; - *utime = 0; - *stime = 0; + // use defaults + MI_UNUSED(pinfo); } #endif diff --git a/src/prim/wasi/prim.c b/src/prim/wasi/prim.c index b8ac1a1b..89c04d78 100644 --- a/src/prim/wasi/prim.c +++ b/src/prim/wasi/prim.c @@ -194,17 +194,13 @@ mi_msecs_t _mi_prim_clock_now(void) { // Process info //---------------------------------------------------------------- -void _mi_prim_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) +void _mi_prim_process_info(mi_process_info_t* pinfo) { - *peak_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.peak)); - *current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.current)); - *peak_rss = *peak_commit; - *current_rss = *current_commit; - *page_faults = 0; - *utime = 0; - *stime = 0; + // use defaults + MI_UNUSED(pinfo); } + //---------------------------------------------------------------- // Output //---------------------------------------------------------------- diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c index 2fa445a1..1ce44a10 100644 --- a/src/prim/windows/prim.c +++ b/src/prim/windows/prim.c @@ -428,15 +428,15 @@ static mi_msecs_t filetime_msecs(const FILETIME* ftime) { typedef BOOL (WINAPI *PGetProcessMemoryInfo)(HANDLE, PPROCESS_MEMORY_COUNTERS, DWORD); static PGetProcessMemoryInfo pGetProcessMemoryInfo = NULL; -void _mi_prim_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) +void _mi_prim_process_info(mi_process_info_t* pinfo) { FILETIME ct; FILETIME ut; FILETIME st; FILETIME et; GetProcessTimes(GetCurrentProcess(), &ct, &et, &st, &ut); - *utime = filetime_msecs(&ut); - *stime = filetime_msecs(&st); + pinfo->utime = filetime_msecs(&ut); + pinfo->stime = filetime_msecs(&st); // load psapi on demand if (pGetProcessMemoryInfo == NULL) { @@ -452,11 +452,11 @@ void _mi_prim_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* current if (pGetProcessMemoryInfo != NULL) { pGetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info)); } - *current_rss = (size_t)info.WorkingSetSize; - *peak_rss = (size_t)info.PeakWorkingSetSize; - *current_commit = (size_t)info.PagefileUsage; - *peak_commit = (size_t)info.PeakPagefileUsage; - *page_faults = (size_t)info.PageFaultCount; + pinfo->current_rss = (size_t)info.WorkingSetSize; + pinfo->peak_rss = (size_t)info.PeakWorkingSetSize; + pinfo->current_commit = (size_t)info.PagefileUsage; + pinfo->peak_commit = (size_t)info.PeakPagefileUsage; + pinfo->page_faults = (size_t)info.PageFaultCount; } //---------------------------------------------------------------- diff --git a/src/stats.c b/src/stats.c index c9b3bb95..8273740f 100644 --- a/src/stats.c +++ b/src/stats.c @@ -430,21 +430,23 @@ mi_msecs_t _mi_clock_end(mi_msecs_t start) { mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) mi_attr_noexcept { - mi_msecs_t elapsed = _mi_clock_end(mi_process_start); - mi_msecs_t utime = 0; - mi_msecs_t stime = 0; - size_t current_rss0 = 0; - size_t peak_rss0 = 0; - size_t current_commit0 = 0; - size_t peak_commit0 = 0; - size_t page_faults0 = 0; - _mi_prim_process_info(&utime, &stime, ¤t_rss0, &peak_rss0, ¤t_commit0, &peak_commit0, &page_faults0); - if (elapsed_msecs!=NULL) *elapsed_msecs = (elapsed < 0 ? 0 : (elapsed < (mi_msecs_t)PTRDIFF_MAX ? (size_t)elapsed : PTRDIFF_MAX)); - if (user_msecs!=NULL) *user_msecs = (utime < 0 ? 0 : (utime < (mi_msecs_t)PTRDIFF_MAX ? (size_t)utime : PTRDIFF_MAX)); - if (system_msecs!=NULL) *system_msecs = (stime < 0 ? 0 : (stime < (mi_msecs_t)PTRDIFF_MAX ? (size_t)stime : PTRDIFF_MAX)); - if (current_rss!=NULL) *current_rss = current_rss0; - if (peak_rss!=NULL) *peak_rss = peak_rss0; - if (current_commit!=NULL) *current_commit = current_commit0; - if (peak_commit!=NULL) *peak_commit = peak_commit0; - if (page_faults!=NULL) *page_faults = page_faults0; + mi_process_info_t pinfo = { 0 }; + pinfo.elapsed = _mi_clock_end(mi_process_start); + pinfo.utime = 0; + pinfo.stime = 0; + pinfo.current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.current)); + pinfo.peak_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.peak)); + pinfo.current_rss = pinfo.current_commit; + pinfo.peak_rss = pinfo.peak_commit; + pinfo.page_faults = 0; + + _mi_prim_process_info(&pinfo); + if (elapsed_msecs!=NULL) *elapsed_msecs = (pinfo.elapsed < 0 ? 0 : (pinfo.elapsed < (mi_msecs_t)PTRDIFF_MAX ? (size_t)pinfo.elapsed : PTRDIFF_MAX)); + if (user_msecs!=NULL) *user_msecs = (pinfo.utime < 0 ? 0 : (pinfo.utime < (mi_msecs_t)PTRDIFF_MAX ? (size_t)pinfo.utime : PTRDIFF_MAX)); + if (system_msecs!=NULL) *system_msecs = (pinfo.stime < 0 ? 0 : (pinfo.stime < (mi_msecs_t)PTRDIFF_MAX ? (size_t)pinfo.stime : PTRDIFF_MAX)); + if (current_rss!=NULL) *current_rss = pinfo.current_rss; + if (peak_rss!=NULL) *peak_rss = pinfo.peak_rss; + if (current_commit!=NULL) *current_commit = pinfo.current_commit; + if (peak_commit!=NULL) *peak_commit = pinfo.peak_commit; + if (page_faults!=NULL) *page_faults = pinfo.page_faults; } From 6ae6c427001e5b01f4f62d97c0b8c1cdca8c2888 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Sun, 19 Mar 2023 20:21:20 -0700 Subject: [PATCH 30/42] simplify primitives API --- src/os.c | 27 +++++++++++++------ src/prim/prim.h | 7 ++--- src/prim/unix/prim.c | 60 ++++++++++++++++++++++------------------- src/prim/wasi/prim.c | 19 ++++++++----- src/prim/windows/prim.c | 28 ++++++++++--------- 5 files changed, 83 insertions(+), 58 deletions(-) diff --git a/src/os.c b/src/os.c index a91bbb91..85c3652f 100644 --- a/src/os.c +++ b/src/os.c @@ -146,7 +146,10 @@ static void mi_os_mem_free(void* addr, size_t size, bool was_committed, mi_stats MI_UNUSED(tld_stats); mi_assert_internal((size % _mi_os_page_size()) == 0); if (addr == NULL || size == 0) return; // || _mi_os_is_huge_reserved(addr) - _mi_prim_free(addr, size); + int err = _mi_prim_free(addr, size); + if (err != 0) { + _mi_warning_message("unable to free OS memory (error: %d (0x%x), size: 0x%zx bytes, address: %p)\n", err, err, size, addr); + } mi_stats_t* stats = &_mi_stats_main; if (was_committed) { _mi_stat_decrease(&stats->committed, size); } _mi_stat_decrease(&stats->reserved, size); @@ -174,7 +177,11 @@ static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, boo if (!commit) allow_large = false; if (try_alignment == 0) try_alignment = 1; // avoid 0 to ensure there will be no divide by zero when aligning - void* p = _mi_prim_alloc(size, try_alignment, commit, allow_large, is_large); + void* p = NULL; + int err = _mi_prim_alloc(size, try_alignment, commit, allow_large, is_large, &p); + if (err != 0) { + _mi_warning_message("unable to allocate OS memory (error: %d (0x%x), size: 0x%zx bytes, align: 0x%zx, commit: %d, allow large: %d)\n", err, err, size, try_alignment, commit, allow_large); + } /* if (commit && allow_large) { p = _mi_os_try_alloc_from_huge_reserved(size, try_alignment); @@ -211,7 +218,7 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, // if not aligned, free it, overallocate, and unmap around it if (((uintptr_t)p % alignment != 0)) { mi_os_mem_free(p, size, commit, stats); - _mi_warning_message("unable to allocate aligned OS memory directly, fall back to over-allocation (%zu bytes, address: %p, alignment: %zu, commit: %d)\n", size, p, alignment, commit); + _mi_warning_message("unable to allocate aligned OS memory directly, fall back to over-allocation (size: 0x%zx bytes, address: %p, alignment: 0x%zx, commit: %d)\n", size, p, alignment, commit); if (size >= (SIZE_MAX - alignment)) return NULL; // overflow const size_t over_size = size + alignment; @@ -368,7 +375,7 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ int err = _mi_prim_commit(start, csize, commit); if (err != 0) { - _mi_warning_message("%s error: start: %p, csize: 0x%zx, err: %i\n", commit ? "commit" : "decommit", start, csize, err); + _mi_warning_message("cannot %s OS memory (error: %d (0x%d), address: %p, size: 0x%zx bytes)\n", commit ? "commit" : "decommit", err, err, start, csize); } mi_assert_internal(err == 0); return (err == 0); @@ -412,7 +419,7 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) int err = _mi_prim_reset(start, csize); if (err != 0) { - _mi_warning_message("madvise reset error: start: %p, csize: 0x%zx, errno: %i\n", start, csize, err); + _mi_warning_message("cannot reset OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize); } return (err == 0); } @@ -448,7 +455,7 @@ static bool mi_os_protectx(void* addr, size_t size, bool protect) { */ int err = _mi_prim_protect(start,csize,protect); if (err != 0) { - _mi_warning_message("mprotect error: start: %p, csize: 0x%zx, err: %i\n", start, csize, err); + _mi_warning_message("cannot %s OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", (protect ? "protect" : "unprotect"), err, err, start, csize); } return (err == 0); } @@ -523,13 +530,17 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse for (page = 0; page < pages; page++) { // allocate a page void* addr = start + (page * MI_HUGE_OS_PAGE_SIZE); - void* p = _mi_prim_alloc_huge_os_pages(addr, MI_HUGE_OS_PAGE_SIZE, numa_node); + void* p = NULL; + int err = _mi_prim_alloc_huge_os_pages(addr, MI_HUGE_OS_PAGE_SIZE, numa_node, &p); + if (err != 0) { + _mi_warning_message("unable to allocate huge OS page (error: %d (0x%d), address: %p, size: %zx bytes)", err, err, addr, MI_HUGE_OS_PAGE_SIZE); + } // Did we succeed at a contiguous address? if (p != addr) { // no success, issue a warning and break if (p != NULL) { - _mi_warning_message("could not allocate contiguous huge page %zu at %p\n", page, addr); + _mi_warning_message("could not allocate contiguous huge OS page %zu at %p\n", page, addr); _mi_os_free(p, MI_HUGE_OS_PAGE_SIZE, &_mi_stats_main); } break; diff --git a/src/prim/prim.h b/src/prim/prim.h index 3130d489..1a4fb5d8 100644 --- a/src/prim/prim.h +++ b/src/prim/prim.h @@ -11,6 +11,7 @@ terms of the MIT license. A copy of the license can be found in the file // note: on all primitive functions, we always get: // addr != NULL and page aligned // size > 0 and page aligned +// return value is an error code an int where 0 is success. // OS memory configuration typedef struct mi_os_mem_config_s { @@ -25,13 +26,13 @@ typedef struct mi_os_mem_config_s { void _mi_prim_mem_init( mi_os_mem_config_t* config ); // Free OS memory -void _mi_prim_free(void* addr, size_t size ); +int _mi_prim_free(void* addr, size_t size ); // Allocate OS memory. Return NULL on error. // The `try_alignment` is just a hint and the returned pointer does not have to be aligned. // pre: !commit => !allow_large // try_alignment >= _mi_os_page_size() and a power of 2 -void* _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large); +int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, void** addr); // Commit memory. Returns error code or 0 on success. int _mi_prim_commit(void* addr, size_t size, bool commit); @@ -47,7 +48,7 @@ int _mi_prim_protect(void* addr, size_t size, bool protect); // pre: size > 0 and a multiple of 1GiB. // addr is either NULL or an address hint. // numa_node is either negative (don't care), or a numa node number. -void* _mi_prim_alloc_huge_os_pages(void* addr, size_t size, int numa_node); +int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, void** addr); // Return the current NUMA node size_t _mi_prim_numa_node(void); diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c index 1040c791..5a3ca5ab 100644 --- a/src/prim/unix/prim.c +++ b/src/prim/unix/prim.c @@ -96,11 +96,9 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config ) { // free //--------------------------------------------- -void _mi_prim_free(void* addr, size_t size ) { +int _mi_prim_free(void* addr, size_t size ) { bool err = (munmap(addr, size) == -1); - if (err) { - _mi_warning_message("unable to release OS memory: %s, addr: %p, size: %zu\n", strerror(errno), addr, size); - } + return (err ? errno : 0); } @@ -118,19 +116,24 @@ static int unix_madvise(void* addr, size_t size, int advice) { static void* unix_mmap_prim(void* addr, size_t size, size_t try_alignment, int protect_flags, int flags, int fd) { MI_UNUSED(try_alignment); + void* p = NULL; #if defined(MAP_ALIGNED) // BSD if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0) { size_t n = mi_bsr(try_alignment); if (((size_t)1 << n) == try_alignment && n >= 12 && n <= 30) { // alignment is a power of 2 and 4096 <= alignment <= 1GiB flags |= MAP_ALIGNED(n); - void* p = mmap(addr, size, protect_flags, flags | MAP_ALIGNED(n), fd, 0); + p = mmap(addr, size, protect_flags, flags | MAP_ALIGNED(n), fd, 0); + if (p==MAP_FAILED || !_mi_is_aligned(p,try_alignment)) { + int err = errno; + _mi_warning_message("unable to directly request aligned OS memory (error: %d (0x%d), size: 0x%zx bytes, alignment: 0x%zx, hint address: %p)\n", err, err, size, try_alignment, hint); + } if (p!=MAP_FAILED) return p; - // fall back to regular mmap + // fall back to regular mmap } } #elif defined(MAP_ALIGN) // Solaris if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0) { - void* p = mmap((void*)try_alignment, size, protect_flags, flags | MAP_ALIGN, fd, 0); // addr parameter is the required alignment + p = mmap((void*)try_alignment, size, protect_flags, flags | MAP_ALIGN, fd, 0); // addr parameter is the required alignment if (p!=MAP_FAILED) return p; // fall back to regular mmap } @@ -140,14 +143,18 @@ static void* unix_mmap_prim(void* addr, size_t size, size_t try_alignment, int p if (addr == NULL) { void* hint = _mi_os_get_aligned_hint(try_alignment, size); if (hint != NULL) { - void* p = mmap(hint, size, protect_flags, flags, fd, 0); + p = mmap(hint, size, protect_flags, flags, fd, 0); + if (p==MAP_FAILED || !_mi_is_aligned(p,try_alignment)) { + int err = errno; + _mi_warning_message("unable to directly request hinted aligned OS memory (error: %d (0x%d), size: 0x%zx bytes, alignment: 0x%zx, hint address: %p)\n", err, err, size, try_alignment, hint); + } if (p!=MAP_FAILED) return p; - // fall back to regular mmap + // fall back to regular mmap } } #endif // regular mmap - void* p = mmap(addr, size, protect_flags, flags, fd, 0); + p = mmap(addr, size, protect_flags, flags, fd, 0); if (p!=MAP_FAILED) return p; // failed to allocate return NULL; @@ -217,7 +224,7 @@ static void* unix_mmap(void* addr, size_t size, size_t try_alignment, int protec #ifdef MAP_HUGE_1GB if (p == NULL && (lflags & MAP_HUGE_1GB) != 0) { mi_huge_pages_available = false; // don't try huge 1GiB pages again - _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (error %i)\n", errno); + _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (errno: %i)\n", errno); lflags = ((lflags & ~MAP_HUGE_1GB) | MAP_HUGE_2MB); p = unix_mmap_prim(addr, size, try_alignment, protect_flags, lflags, lfd); } @@ -258,20 +265,18 @@ static void* unix_mmap(void* addr, size_t size, size_t try_alignment, int protec #endif } } - if (p == NULL) { - _mi_warning_message("unable to allocate OS memory (%zu bytes, error code: %i, address: %p, large only: %d, allow large: %d)\n", size, errno, addr, large_only, allow_large); - } return p; } // Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned. -void* _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large) { +int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, void** addr) { mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0); mi_assert_internal(commit || !allow_large); mi_assert_internal(try_alignment > 0); int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE); - return unix_mmap(NULL, size, try_alignment, protect_flags, false, allow_large, is_large); + *addr = unix_mmap(NULL, size, try_alignment, protect_flags, false, allow_large, is_large); + return (*addr != NULL ? 0 : errno); } @@ -379,28 +384,29 @@ static long mi_prim_mbind(void* start, unsigned long len, unsigned long mode, co } #endif -void* _mi_prim_alloc_huge_os_pages(void* addr, size_t size, int numa_node) { +int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, void** addr) { bool is_large = true; - void* p = unix_mmap(addr, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large); - if (p == NULL) return NULL; - if (numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes + *addr = unix_mmap(hint_addr, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large); + if (*addr != NULL && numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes unsigned long numa_mask = (1UL << numa_node); // TODO: does `mbind` work correctly for huge OS pages? should we // use `set_mempolicy` before calling mmap instead? // see: - long err = mi_prim_mbind(p, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0); + long err = mi_prim_mbind(*addr, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0); if (err != 0) { - _mi_warning_message("failed to bind huge (1GiB) pages to numa node %d: %s\n", numa_node, strerror(errno)); - } + err = errno; + _mi_warning_message("failed to bind huge (1GiB) pages to numa node %d (error: %d (0x%d))\n", numa_node, err, err); + } } - return p; + return (*addr != NULL ? 0 : errno); } #else -void* _mi_prim_alloc_huge_os_pages(void* addr, size_t size, int numa_node) { - MI_UNUSED(addr); MI_UNUSED(size); MI_UNUSED(numa_node); - return NULL; +int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, void** addr) { + MI_UNUSED(hint_addr); MI_UNUSED(size); MI_UNUSED(numa_node); + *addr = NULL; + return ENOMEM; } #endif diff --git a/src/prim/wasi/prim.c b/src/prim/wasi/prim.c index 89c04d78..f995304f 100644 --- a/src/prim/wasi/prim.c +++ b/src/prim/wasi/prim.c @@ -27,9 +27,10 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config ) { // Free //--------------------------------------------- -void _mi_prim_free(void* addr, size_t size ) { +int _mi_prim_free(void* addr, size_t size ) { MI_UNUSED(addr); MI_UNUSED(size); // wasi heap cannot be shrunk + return 0; } @@ -101,20 +102,23 @@ static void* mi_prim_mem_grow(size_t size, size_t try_alignment) { } } } + /* if (p == NULL) { _mi_warning_message("unable to allocate sbrk/wasm_memory_grow OS memory (%zu bytes, %zu alignment)\n", size, try_alignment); errno = ENOMEM; return NULL; } - mi_assert_internal( try_alignment == 0 || (uintptr_t)p % try_alignment == 0 ); + */ + mi_assert_internal( p == NULL || try_alignment == 0 || (uintptr_t)p % try_alignment == 0 ); return p; } // Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned. -void* _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large) { +int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, void** addr) { MI_UNUSED(allow_large); MI_UNUSED(commit); *is_large = false; - return mi_prim_mem_grow(size, try_alignment); + *addr = mi_prim_mem_grow(size, try_alignment); + return (*addr != NULL ? 0 : ENOMEM); } @@ -142,9 +146,10 @@ int _mi_prim_protect(void* addr, size_t size, bool protect) { // Huge pages and NUMA nodes //--------------------------------------------- -void* _mi_prim_alloc_huge_os_pages(void* addr, size_t size, int numa_node) { - MI_UNUSED(addr); MI_UNUSED(size); MI_UNUSED(numa_node); - return NULL; +int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, void** addr) { + MI_UNUSED(hint_addr); MI_UNUSED(size); MI_UNUSED(numa_node); + *addr = NULL; + return ENOSYS; } size_t _mi_prim_numa_node(void) { diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c index 1ce44a10..1e15273a 100644 --- a/src/prim/windows/prim.c +++ b/src/prim/windows/prim.c @@ -156,7 +156,7 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config ) // Free //--------------------------------------------- -void _mi_prim_free(void* addr, size_t size ) { +int _mi_prim_free(void* addr, size_t size ) { DWORD errcode = 0; bool err = (VirtualFree(addr, 0, MEM_RELEASE) == 0); if (err) { errcode = GetLastError(); } @@ -172,9 +172,7 @@ void _mi_prim_free(void* addr, size_t size ) { if (err) { errcode = GetLastError(); } } } - if (errcode != 0) { - _mi_warning_message("unable to release OS memory: error code 0x%x, addr: %p, size: %zu\n", errcode, addr, size); - } + return (int)errcode; } @@ -240,19 +238,18 @@ static void* win_virtual_alloc(void* addr, size_t size, size_t try_alignment, DW *is_large = ((flags&MEM_LARGE_PAGES) != 0); p = win_virtual_alloc_prim(addr, size, try_alignment, flags); } - if (p == NULL) { - _mi_warning_message("unable to allocate OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x, large only: %d, allow large: %d)\n", size, GetLastError(), addr, try_alignment, flags, large_only, allow_large); - } + //if (p == NULL) { _mi_warning_message("unable to allocate OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x, large only: %d, allow large: %d)\n", size, GetLastError(), addr, try_alignment, flags, large_only, allow_large); } return p; } -void* _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large) { +int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, void** addr) { mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0); mi_assert_internal(commit || !allow_large); mi_assert_internal(try_alignment > 0); int flags = MEM_RESERVE; if (commit) { flags |= MEM_COMMIT; } - return win_virtual_alloc(NULL, size, try_alignment, flags, false, allow_large, is_large); + *addr = win_virtual_alloc(NULL, size, try_alignment, flags, false, allow_large, is_large); + return (*addr != NULL ? 0 : (int)GetLastError()); } @@ -296,7 +293,7 @@ int _mi_prim_protect(void* addr, size_t size, bool protect) { // Huge page allocation //--------------------------------------------- -void* _mi_prim_alloc_huge_os_pages(void* addr, size_t size, int numa_node) +static void* _mi_prim_alloc_huge_os_pagesx(void* hint_addr, size_t size, int numa_node) { const DWORD flags = MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE; @@ -315,7 +312,7 @@ void* _mi_prim_alloc_huge_os_pages(void* addr, size_t size, int numa_node) params[1].Arg.ULong = (unsigned)numa_node; } SIZE_T psize = size; - void* base = addr; + void* base = hint_addr; NTSTATUS err = (*pNtAllocateVirtualMemoryEx)(GetCurrentProcess(), &base, &psize, flags, PAGE_READWRITE, params, param_count); if (err == 0 && base != NULL) { return base; @@ -330,11 +327,16 @@ void* _mi_prim_alloc_huge_os_pages(void* addr, size_t size, int numa_node) if (pVirtualAlloc2 != NULL && numa_node >= 0) { params[0].Type.Type = MiMemExtendedParameterNumaNode; params[0].Arg.ULong = (unsigned)numa_node; - return (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, params, 1); + return (*pVirtualAlloc2)(GetCurrentProcess(), hint_addr, size, flags, PAGE_READWRITE, params, 1); } // otherwise use regular virtual alloc on older windows - return VirtualAlloc(addr, size, flags, PAGE_READWRITE); + return VirtualAlloc(hint_addr, size, flags, PAGE_READWRITE); +} + +int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, void** addr) { + *addr = _mi_prim_alloc_huge_os_pagesx(hint_addr,size,numa_node); + return (*addr != NULL ? 0 : (int)GetLastError()); } From f58357548c9a2b377112174698e400c028cf1910 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Mon, 20 Mar 2023 10:37:39 -0700 Subject: [PATCH 31/42] restructure header files --- ide/vs2022/mimalloc-override.vcxproj | 9 +++++---- ide/vs2022/mimalloc.vcxproj | 9 +++++---- include/{mimalloc-atomic.h => mimalloc/atomic.h} | 0 include/{mimalloc-internal.h => mimalloc/internal.h} | 4 ++-- {src/prim => include/mimalloc}/prim.h | 0 include/{mimalloc-track.h => mimalloc/track.h} | 0 include/{mimalloc-types.h => mimalloc/types.h} | 2 +- src/alloc-aligned.c | 4 ++-- src/alloc-override-osx.c | 2 +- src/alloc-posix.c | 2 +- src/alloc.c | 8 ++++---- src/arena.c | 4 ++-- src/bitmap.c | 2 +- src/heap.c | 7 +++---- src/init.c | 4 ++-- src/options.c | 6 +++--- src/os.c | 6 +++--- src/page.c | 4 ++-- src/prim/unix/prim.c | 6 +++--- src/prim/wasi/prim.c | 6 +++--- src/prim/windows/prim.c | 7 ++++--- src/random.c | 4 ++-- src/region.c | 4 ++-- src/segment.c | 4 ++-- src/static.c | 2 +- src/stats.c | 6 +++--- test/test-api-fill.c | 2 +- test/test-api.c | 4 ++-- 28 files changed, 60 insertions(+), 58 deletions(-) rename include/{mimalloc-atomic.h => mimalloc/atomic.h} (100%) rename include/{mimalloc-internal.h => mimalloc/internal.h} (99%) rename {src/prim => include/mimalloc}/prim.h (100%) rename include/{mimalloc-track.h => mimalloc/track.h} (100%) rename include/{mimalloc-types.h => mimalloc/types.h} (99%) diff --git a/ide/vs2022/mimalloc-override.vcxproj b/ide/vs2022/mimalloc-override.vcxproj index 1eb72952..6eac2fe0 100644 --- a/ide/vs2022/mimalloc-override.vcxproj +++ b/ide/vs2022/mimalloc-override.vcxproj @@ -209,15 +209,16 @@ - - - - + + + + + diff --git a/ide/vs2022/mimalloc.vcxproj b/ide/vs2022/mimalloc.vcxproj index 9e0ffc85..bf8f7d50 100644 --- a/ide/vs2022/mimalloc.vcxproj +++ b/ide/vs2022/mimalloc.vcxproj @@ -241,14 +241,15 @@ - - - - + + + + + diff --git a/include/mimalloc-atomic.h b/include/mimalloc/atomic.h similarity index 100% rename from include/mimalloc-atomic.h rename to include/mimalloc/atomic.h diff --git a/include/mimalloc-internal.h b/include/mimalloc/internal.h similarity index 99% rename from include/mimalloc-internal.h rename to include/mimalloc/internal.h index 33aafa70..51d8aa1e 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc/internal.h @@ -8,8 +8,8 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MIMALLOC_INTERNAL_H #define MIMALLOC_INTERNAL_H -#include "mimalloc-types.h" -#include "mimalloc-track.h" +#include "mimalloc/types.h" +#include "mimalloc/track.h" #if (MI_DEBUG>0) #define mi_trace_message(...) _mi_trace_message(__VA_ARGS__) diff --git a/src/prim/prim.h b/include/mimalloc/prim.h similarity index 100% rename from src/prim/prim.h rename to include/mimalloc/prim.h diff --git a/include/mimalloc-track.h b/include/mimalloc/track.h similarity index 100% rename from include/mimalloc-track.h rename to include/mimalloc/track.h diff --git a/include/mimalloc-types.h b/include/mimalloc/types.h similarity index 99% rename from include/mimalloc-types.h rename to include/mimalloc/types.h index 3577b23a..da4b423f 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc/types.h @@ -10,7 +10,7 @@ terms of the MIT license. A copy of the license can be found in the file #include // ptrdiff_t #include // uintptr_t, uint16_t, etc -#include "mimalloc-atomic.h" // _Atomic +#include "mimalloc/atomic.h" // _Atomic #ifdef _MSC_VER #pragma warning(disable:4214) // bitfield is not int diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index aa9b7bd0..674c74fe 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -6,8 +6,8 @@ terms of the MIT license. A copy of the license can be found in the file -----------------------------------------------------------------------------*/ #include "mimalloc.h" -#include "mimalloc-internal.h" -#include "prim/prim.h" // mi_prim_get_default_heap +#include "mimalloc/internal.h" +#include "mimalloc/prim.h" // mi_prim_get_default_heap #include // memset diff --git a/src/alloc-override-osx.c b/src/alloc-override-osx.c index a2819a8b..a517ddea 100644 --- a/src/alloc-override-osx.c +++ b/src/alloc-override-osx.c @@ -6,7 +6,7 @@ terms of the MIT license. A copy of the license can be found in the file -----------------------------------------------------------------------------*/ #include "mimalloc.h" -#include "mimalloc-internal.h" +#include "mimalloc/internal.h" #if defined(MI_MALLOC_OVERRIDE) diff --git a/src/alloc-posix.c b/src/alloc-posix.c index f0cfe629..b6f09d1a 100644 --- a/src/alloc-posix.c +++ b/src/alloc-posix.c @@ -10,7 +10,7 @@ terms of the MIT license. A copy of the license can be found in the file // for convenience and used when overriding these functions. // ------------------------------------------------------------------------ #include "mimalloc.h" -#include "mimalloc-internal.h" +#include "mimalloc/internal.h" // ------------------------------------------------------ // Posix & Unix functions definitions diff --git a/src/alloc.c b/src/alloc.c index 0bda4db8..301166eb 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -9,9 +9,9 @@ terms of the MIT license. A copy of the license can be found in the file #endif #include "mimalloc.h" -#include "mimalloc-internal.h" -#include "mimalloc-atomic.h" -#include "prim/prim.h" // _mi_prim_thread_id() +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" +#include "mimalloc/prim.h" // _mi_prim_thread_id() #include // memset, strlen (for mi_strdup) #include // malloc, abort @@ -40,7 +40,7 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz // allow use of the block internally // note: when tracking we need to avoid ever touching the MI_PADDING since - // that is tracked by valgrind etc. as non-accessible (through the red-zone, see `mimalloc-track.h`) + // that is tracked by valgrind etc. as non-accessible (through the red-zone, see `mimalloc/track.h`) mi_track_mem_undefined(block, mi_page_usable_block_size(page)); // zero the block? note: we need to zero the full block size (issue #63) diff --git a/src/arena.c b/src/arena.c index 57f48a73..d8178cc1 100644 --- a/src/arena.c +++ b/src/arena.c @@ -21,8 +21,8 @@ which is sometimes needed for embedded devices or shared memory for example. The arena allocation needs to be thread safe and we use an atomic bitmap to allocate. -----------------------------------------------------------------------------*/ #include "mimalloc.h" -#include "mimalloc-internal.h" -#include "mimalloc-atomic.h" +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" #include // memset #include // ENOMEM diff --git a/src/bitmap.c b/src/bitmap.c index 9ba994d7..8483de0b 100644 --- a/src/bitmap.c +++ b/src/bitmap.c @@ -18,7 +18,7 @@ between the fields. (This is used in arena allocation) ---------------------------------------------------------------------------- */ #include "mimalloc.h" -#include "mimalloc-internal.h" +#include "mimalloc/internal.h" #include "bitmap.h" /* ----------------------------------------------------------- diff --git a/src/heap.c b/src/heap.c index b12a9962..fea033dc 100644 --- a/src/heap.c +++ b/src/heap.c @@ -6,10 +6,9 @@ terms of the MIT license. A copy of the license can be found in the file -----------------------------------------------------------------------------*/ #include "mimalloc.h" -#include "mimalloc-internal.h" -#include "mimalloc-atomic.h" -#include "mimalloc-track.h" -#include "prim/prim.h" // mi_prim_get_default_heap +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" +#include "mimalloc/prim.h" // mi_prim_get_default_heap #include // memset, memcpy diff --git a/src/init.c b/src/init.c index 495d26fd..8c5c8049 100644 --- a/src/init.c +++ b/src/init.c @@ -5,8 +5,8 @@ terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ #include "mimalloc.h" -#include "mimalloc-internal.h" -#include "prim/prim.h" +#include "mimalloc/internal.h" +#include "mimalloc/prim.h" #include // memcpy, memset #include // atexit diff --git a/src/options.c b/src/options.c index f0c52c84..816a2919 100644 --- a/src/options.c +++ b/src/options.c @@ -5,9 +5,9 @@ terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ #include "mimalloc.h" -#include "mimalloc-internal.h" -#include "mimalloc-atomic.h" -#include "prim/prim.h" // mi_prim_out_stderr +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" +#include "mimalloc/prim.h" // mi_prim_out_stderr #include // FILE #include // abort diff --git a/src/os.c b/src/os.c index 85c3652f..d5a3398f 100644 --- a/src/os.c +++ b/src/os.c @@ -5,9 +5,9 @@ terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ #include "mimalloc.h" -#include "mimalloc-internal.h" -#include "mimalloc-atomic.h" -#include "prim/prim.h" +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" +#include "mimalloc/prim.h" /* ----------------------------------------------------------- diff --git a/src/page.c b/src/page.c index 1347c845..e43ba402 100644 --- a/src/page.c +++ b/src/page.c @@ -12,8 +12,8 @@ terms of the MIT license. A copy of the license can be found in the file ----------------------------------------------------------- */ #include "mimalloc.h" -#include "mimalloc-internal.h" -#include "mimalloc-atomic.h" +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" /* ----------------------------------------------------------- Definition of page queues for each block size diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c index 5a3ca5ab..0ac69f1a 100644 --- a/src/prim/unix/prim.c +++ b/src/prim/unix/prim.c @@ -21,9 +21,9 @@ terms of the MIT license. A copy of the license can be found in the file #endif #include "mimalloc.h" -#include "mimalloc-internal.h" -#include "mimalloc-atomic.h" -#include "../prim.h" +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" +#include "mimalloc/prim.h" #include // mmap #include // sysconf diff --git a/src/prim/wasi/prim.c b/src/prim/wasi/prim.c index f995304f..cb3ce1a7 100644 --- a/src/prim/wasi/prim.c +++ b/src/prim/wasi/prim.c @@ -8,9 +8,9 @@ terms of the MIT license. A copy of the license can be found in the file // This file is included in `src/prim/prim.c` #include "mimalloc.h" -#include "mimalloc-internal.h" -#include "mimalloc-atomic.h" -#include "../prim.h" +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" +#include "mimalloc/prim.h" //--------------------------------------------- // Initialize diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c index 1e15273a..bea1d437 100644 --- a/src/prim/windows/prim.c +++ b/src/prim/windows/prim.c @@ -8,9 +8,9 @@ terms of the MIT license. A copy of the license can be found in the file // This file is included in `src/prim/prim.c` #include "mimalloc.h" -#include "mimalloc-internal.h" -#include "mimalloc-atomic.h" -#include "../prim.h" +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" +#include "mimalloc/prim.h" #include // strerror #include // fputs, stderr @@ -157,6 +157,7 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config ) //--------------------------------------------- int _mi_prim_free(void* addr, size_t size ) { + MI_UNUSED(size); DWORD errcode = 0; bool err = (VirtualFree(addr, 0, MEM_RELEASE) == 0); if (err) { errcode = GetLastError(); } diff --git a/src/random.c b/src/random.c index 3c8372c8..4fc8b2f8 100644 --- a/src/random.c +++ b/src/random.c @@ -5,8 +5,8 @@ terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ #include "mimalloc.h" -#include "mimalloc-internal.h" -#include "prim/prim.h" // _mi_prim_random_buf +#include "mimalloc/internal.h" +#include "mimalloc/prim.h" // _mi_prim_random_buf #include // memset /* ---------------------------------------------------------------------------- diff --git a/src/region.c b/src/region.c index 3571abb6..29681f4c 100644 --- a/src/region.c +++ b/src/region.c @@ -32,8 +32,8 @@ Possible issues: do this better without adding too much complexity? -----------------------------------------------------------------------------*/ #include "mimalloc.h" -#include "mimalloc-internal.h" -#include "mimalloc-atomic.h" +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" #include // memset diff --git a/src/segment.c b/src/segment.c index c3cb7155..6cdf4fe7 100644 --- a/src/segment.c +++ b/src/segment.c @@ -5,8 +5,8 @@ terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ #include "mimalloc.h" -#include "mimalloc-internal.h" -#include "mimalloc-atomic.h" +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" #include // memset #include diff --git a/src/static.c b/src/static.c index 0de72fe3..d0652526 100644 --- a/src/static.c +++ b/src/static.c @@ -14,7 +14,7 @@ terms of the MIT license. A copy of the license can be found in the file #endif #include "mimalloc.h" -#include "mimalloc-internal.h" +#include "mimalloc/internal.h" // For a static override we create a single object file // containing the whole library. If it is linked first diff --git a/src/stats.c b/src/stats.c index 8273740f..435a2b38 100644 --- a/src/stats.c +++ b/src/stats.c @@ -5,9 +5,9 @@ terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ #include "mimalloc.h" -#include "mimalloc-internal.h" -#include "mimalloc-atomic.h" -#include "prim/prim.h" +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" +#include "mimalloc/prim.h" #include // snprintf #include // memset diff --git a/test/test-api-fill.c b/test/test-api-fill.c index 2ad06808..7ba79880 100644 --- a/test/test-api-fill.c +++ b/test/test-api-fill.c @@ -5,7 +5,7 @@ terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ #include "mimalloc.h" -#include "mimalloc-types.h" +#include "mimalloc/types.h" #include "testhelper.h" diff --git a/test/test-api.c b/test/test-api.c index 20050ce8..c78e1972 100644 --- a/test/test-api.c +++ b/test/test-api.c @@ -33,8 +33,8 @@ we therefore test the API over various inputs. Please add more tests :-) #endif #include "mimalloc.h" -// #include "mimalloc-internal.h" -#include "mimalloc-types.h" // for MI_DEBUG and MI_ALIGNMENT_MAX +// #include "mimalloc/internal.h" +#include "mimalloc/types.h" // for MI_DEBUG and MI_ALIGNMENT_MAX #include "testhelper.h" From c0c762611c309e86e31a04778059101fa6536bb0 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Mon, 20 Mar 2023 10:49:56 -0700 Subject: [PATCH 32/42] add prim/osx directory --- CMakeLists.txt | 20 +++++++++---------- .../osx/alloc-override-zone.c} | 0 src/prim/osx/prim.c | 9 +++++++++ src/prim/prim.c | 6 ++++++ src/static.c | 14 ++++++------- 5 files changed, 32 insertions(+), 17 deletions(-) rename src/{alloc-override-osx.c => prim/osx/alloc-override-zone.c} (100%) create mode 100644 src/prim/osx/prim.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 68b7cab4..5c8e55f1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -38,20 +38,20 @@ include(GNUInstallDirs) include("cmake/mimalloc-config-version.cmake") set(mi_sources - src/stats.c - src/random.c - src/os.c - src/bitmap.c - src/arena.c - src/region.c - src/segment.c - src/page.c src/alloc.c src/alloc-aligned.c src/alloc-posix.c + src/arena.c + src/bitmap.c src/heap.c - src/options.c src/init.c + src/options.c + src/os.c + src/page.c + src/random.c + src/region.c + src/segment.c + src/stats.c src/prim/prim.c) set(mi_cflags "") @@ -92,7 +92,7 @@ if(MI_OVERRIDE) if(MI_OSX_ZONE) # use zone's on macOS message(STATUS " Use malloc zone to override malloc (MI_OSX_ZONE=ON)") - list(APPEND mi_sources src/alloc-override-osx.c) + list(APPEND mi_sources src/prim/osx/alloc-override-zone.c) list(APPEND mi_defines MI_OSX_ZONE=1) if (NOT MI_OSX_INTERPOSE) message(STATUS " WARNING: zone overriding usually also needs interpose (use -DMI_OSX_INTERPOSE=ON)") diff --git a/src/alloc-override-osx.c b/src/prim/osx/alloc-override-zone.c similarity index 100% rename from src/alloc-override-osx.c rename to src/prim/osx/alloc-override-zone.c diff --git a/src/prim/osx/prim.c b/src/prim/osx/prim.c new file mode 100644 index 00000000..8a2f4e8a --- /dev/null +++ b/src/prim/osx/prim.c @@ -0,0 +1,9 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +// We use the unix/prim.c with the mmap API on macOSX +#include "../unix/prim.c" diff --git a/src/prim/prim.c b/src/prim/prim.c index 109ab8e8..9a597d8e 100644 --- a/src/prim/prim.c +++ b/src/prim/prim.c @@ -10,9 +10,15 @@ terms of the MIT license. A copy of the license can be found in the file #if defined(_WIN32) #include "windows/prim.c" // VirtualAlloc (Windows) + +#elif defined(__APPLE__) +#include "osx/prim.c" // macOSX (actually defers to mmap in unix/prim.c) + #elif defined(__wasi__) #define MI_USE_SBRK #include "wasi/prim.c" // memory-grow or sbrk (Wasm) + #else #include "unix/prim.c" // mmap() (Linux, macOSX, BSD, Illumnos, Haiku, DragonFly, etc.) + #endif diff --git a/src/static.c b/src/static.c index d0652526..090f0c25 100644 --- a/src/static.c +++ b/src/static.c @@ -20,11 +20,8 @@ terms of the MIT license. A copy of the license can be found in the file // containing the whole library. If it is linked first // it will override all the standard library allocation // functions (on Unix's). -#include "alloc.c" +#include "alloc.c" // includes alloc-override.c #include "alloc-aligned.c" -#if MI_OSX_ZONE -#include "alloc-override-osx.c" -#endif #include "alloc-posix.c" #include "arena.c" #include "bitmap.c" @@ -32,9 +29,12 @@ terms of the MIT license. A copy of the license can be found in the file #include "init.c" #include "options.c" #include "os.c" -#include "page.c" -#include "prim/prim.c" -#include "random.c" +#include "page.c" // includes page-queue.c +#include "random.c" #include "region.c" #include "segment.c" #include "stats.c" +#include "prim/prim.c" +#if MI_OSX_ZONE +#include "prim/osx/alloc-override-zone.c" +#endif From ec5f4904b0762639fe938e1d906527f763e4df77 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Mon, 20 Mar 2023 11:01:40 -0700 Subject: [PATCH 33/42] more comments --- include/mimalloc/internal.h | 6 ++++++ include/mimalloc/prim.h | 7 +++++++ include/mimalloc/types.h | 11 +++++++++++ 3 files changed, 24 insertions(+) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 51d8aa1e..791eeb99 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -8,6 +8,12 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MIMALLOC_INTERNAL_H #define MIMALLOC_INTERNAL_H + +// -------------------------------------------------------------------------- +// This file contains the interal API's of mimalloc and various utility +// functions and macros. +// -------------------------------------------------------------------------- + #include "mimalloc/types.h" #include "mimalloc/track.h" diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h index 1a4fb5d8..97d8b45d 100644 --- a/include/mimalloc/prim.h +++ b/include/mimalloc/prim.h @@ -8,10 +8,17 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MIMALLOC_PRIM_H #define MIMALLOC_PRIM_H + +// -------------------------------------------------------------------------- +// This file specifies the primitive portability API. +// Each OS/host needs to implement these primitives, see `src/prim` +// for implementations on Window, macOS, WASI, and Linux/Unix. +// // note: on all primitive functions, we always get: // addr != NULL and page aligned // size > 0 and page aligned // return value is an error code an int where 0 is success. +// -------------------------------------------------------------------------- // OS memory configuration typedef struct mi_os_mem_config_s { diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index da4b423f..90b7b3e4 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -8,6 +8,17 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MIMALLOC_TYPES_H #define MIMALLOC_TYPES_H +// -------------------------------------------------------------------------- +// This file contains the main type definitions for mimalloc: +// mi_heap_t : all data for a thread-local heap, contains +// lists of all managed heap pages. +// mi_segment_t : a larger chunk of memory (32GiB) from where pages +// are allocated. +// mi_page_t : a mimalloc page (usually 64KiB or 512KiB) from +// where objects are allocated. +// -------------------------------------------------------------------------- + + #include // ptrdiff_t #include // uintptr_t, uint16_t, etc #include "mimalloc/atomic.h" // _Atomic From 0509d11ac55f28ddd4784fcd1a3f5e42238bbd41 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Mon, 20 Mar 2023 11:02:16 -0700 Subject: [PATCH 34/42] more comments --- include/mimalloc/internal.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 791eeb99..3ee20151 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -50,6 +50,7 @@ terms of the MIT license. A copy of the license can be found in the file #define mi_decl_externc #endif +// pthreads #if !defined(_WIN32) && !defined(__wasi__) #define MI_USE_PTHREADS #include From e24e1125eea4102542f0afcabbe58b01acfd5e7a Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Mon, 20 Mar 2023 11:03:50 -0700 Subject: [PATCH 35/42] bump version to 1.8.0 --- cmake/mimalloc-config-version.cmake | 4 ++-- include/mimalloc.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cmake/mimalloc-config-version.cmake b/cmake/mimalloc-config-version.cmake index dbe8fdaa..1141128c 100644 --- a/cmake/mimalloc-config-version.cmake +++ b/cmake/mimalloc-config-version.cmake @@ -1,6 +1,6 @@ set(mi_version_major 1) -set(mi_version_minor 7) -set(mi_version_patch 9) +set(mi_version_minor 8) +set(mi_version_patch 0) set(mi_version ${mi_version_major}.${mi_version_minor}) set(PACKAGE_VERSION ${mi_version}) diff --git a/include/mimalloc.h b/include/mimalloc.h index 4fc7a752..66545a8c 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MIMALLOC_H #define MIMALLOC_H -#define MI_MALLOC_VERSION 179 // major + 2 digits minor +#define MI_MALLOC_VERSION 180 // major + 2 digits minor // ------------------------------------------------------ // Compiler specific attributes From 2f9b2f51b9869d3f11a7fbbfdb9fb69ead33c0f6 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Mon, 20 Mar 2023 11:11:01 -0700 Subject: [PATCH 36/42] update 2022 ide --- ide/vs2022/mimalloc.vcxproj | 6 ------ 1 file changed, 6 deletions(-) diff --git a/ide/vs2022/mimalloc.vcxproj b/ide/vs2022/mimalloc.vcxproj index bf8f7d50..e6474d0f 100644 --- a/ide/vs2022/mimalloc.vcxproj +++ b/ide/vs2022/mimalloc.vcxproj @@ -205,12 +205,6 @@ false false - - true - true - true - true - true true From 287010578d9b004c54c879cf12a735ccb1b4fb64 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Mon, 20 Mar 2023 11:20:30 -0700 Subject: [PATCH 37/42] update ide project files --- ide/vs2017/mimalloc-override.vcxproj | 8 ++++-- ide/vs2017/mimalloc-override.vcxproj.filters | 24 ++++++++++------ ide/vs2017/mimalloc.vcxproj | 14 ++++----- ide/vs2017/mimalloc.vcxproj.filters | 27 ++++++++++-------- ide/vs2019/mimalloc-override.vcxproj | 8 ++++-- ide/vs2019/mimalloc-override.vcxproj.filters | 28 +++++++++++------- ide/vs2019/mimalloc.vcxproj | 20 +++++++------ ide/vs2019/mimalloc.vcxproj.filters | 30 ++++++++++++-------- ide/vs2022/mimalloc-override.vcxproj | 6 ++++ ide/vs2022/mimalloc.vcxproj | 6 ++++ src/prim/windows/prim.c | 4 --- 11 files changed, 104 insertions(+), 71 deletions(-) diff --git a/ide/vs2017/mimalloc-override.vcxproj b/ide/vs2017/mimalloc-override.vcxproj index f308225b..0d11068b 100644 --- a/ide/vs2017/mimalloc-override.vcxproj +++ b/ide/vs2017/mimalloc-override.vcxproj @@ -209,12 +209,14 @@ - - - + + + + + diff --git a/ide/vs2017/mimalloc-override.vcxproj.filters b/ide/vs2017/mimalloc-override.vcxproj.filters index a67dddad..009962dd 100644 --- a/ide/vs2017/mimalloc-override.vcxproj.filters +++ b/ide/vs2017/mimalloc-override.vcxproj.filters @@ -14,15 +14,6 @@ Header Files - - Header Files - - - Header Files - - - Header Files - Header Files @@ -32,6 +23,21 @@ Header Files + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + diff --git a/ide/vs2017/mimalloc.vcxproj b/ide/vs2017/mimalloc.vcxproj index e7be785c..05024448 100644 --- a/ide/vs2017/mimalloc.vcxproj +++ b/ide/vs2017/mimalloc.vcxproj @@ -215,12 +215,6 @@ false false - - true - true - true - true - true true @@ -249,12 +243,14 @@ - - - + + + + + diff --git a/ide/vs2017/mimalloc.vcxproj.filters b/ide/vs2017/mimalloc.vcxproj.filters index 27cd4b5e..249757b6 100644 --- a/ide/vs2017/mimalloc.vcxproj.filters +++ b/ide/vs2017/mimalloc.vcxproj.filters @@ -35,9 +35,6 @@ Source Files - - Source Files - Source Files @@ -70,23 +67,29 @@ Header Files - - Header Files - - - Header Files - Header Files - - Header Files - Header Files Header Files + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + \ No newline at end of file diff --git a/ide/vs2019/mimalloc-override.vcxproj b/ide/vs2019/mimalloc-override.vcxproj index 5fb809c8..d80133e7 100644 --- a/ide/vs2019/mimalloc-override.vcxproj +++ b/ide/vs2019/mimalloc-override.vcxproj @@ -209,12 +209,14 @@ - - - + + + + + diff --git a/ide/vs2019/mimalloc-override.vcxproj.filters b/ide/vs2019/mimalloc-override.vcxproj.filters index 737e0600..357a9a2f 100644 --- a/ide/vs2019/mimalloc-override.vcxproj.filters +++ b/ide/vs2019/mimalloc-override.vcxproj.filters @@ -49,30 +49,38 @@ Source Files - + + Source Files + Header Files - - Header Files - - - Header Files - Header Files Header Files - - Header Files - Source Files + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + diff --git a/ide/vs2019/mimalloc.vcxproj b/ide/vs2019/mimalloc.vcxproj index b18674ad..79146c99 100644 --- a/ide/vs2019/mimalloc.vcxproj +++ b/ide/vs2019/mimalloc.vcxproj @@ -205,12 +205,6 @@ false false - - true - true - true - true - true true @@ -226,6 +220,12 @@ + + true + true + true + true + @@ -241,12 +241,14 @@ - - - + + + + + diff --git a/ide/vs2019/mimalloc.vcxproj.filters b/ide/vs2019/mimalloc.vcxproj.filters index 5ab88914..9b215312 100644 --- a/ide/vs2019/mimalloc.vcxproj.filters +++ b/ide/vs2019/mimalloc.vcxproj.filters @@ -10,9 +10,6 @@ Source Files - - Source Files - Source Files @@ -55,29 +52,38 @@ Source Files + + Source Files + Header Files - - Header Files - - - Header Files - Header Files Header Files - - Header Files - Source Files + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + diff --git a/ide/vs2022/mimalloc-override.vcxproj b/ide/vs2022/mimalloc-override.vcxproj index 6eac2fe0..50a3d6b9 100644 --- a/ide/vs2022/mimalloc-override.vcxproj +++ b/ide/vs2022/mimalloc-override.vcxproj @@ -241,6 +241,12 @@ + + true + true + true + true + diff --git a/ide/vs2022/mimalloc.vcxproj b/ide/vs2022/mimalloc.vcxproj index e6474d0f..9a7bf18c 100644 --- a/ide/vs2022/mimalloc.vcxproj +++ b/ide/vs2022/mimalloc.vcxproj @@ -220,6 +220,12 @@ + + true + true + true + true + diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c index bea1d437..e3dc33e3 100644 --- a/src/prim/windows/prim.c +++ b/src/prim/windows/prim.c @@ -11,12 +11,8 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc/internal.h" #include "mimalloc/atomic.h" #include "mimalloc/prim.h" -#include // strerror #include // fputs, stderr -#ifdef _MSC_VER -#pragma warning(disable:4996) // strerror -#endif //--------------------------------------------- // Dynamically bind Windows API points for portability From 65402836aeb4363290cd7c66f6818ef9b5efd5ae Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Mon, 20 Mar 2023 11:30:52 -0700 Subject: [PATCH 38/42] comments --- include/mimalloc/atomic.h | 2 +- include/mimalloc/internal.h | 2 +- include/mimalloc/types.h | 2 +- src/prim/readme.md | 5 +++-- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/include/mimalloc/atomic.h b/include/mimalloc/atomic.h index c66f8049..971b374a 100644 --- a/include/mimalloc/atomic.h +++ b/include/mimalloc/atomic.h @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2018-2021 Microsoft Research, Daan Leijen +Copyright (c) 2018-2023 Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 3ee20151..32c71ff9 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2018-2022, Microsoft Research, Daan Leijen +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index 90b7b3e4..28343d21 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2018-2021, Microsoft Research, Daan Leijen +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. diff --git a/src/prim/readme.md b/src/prim/readme.md index eb02f274..380dd3a7 100644 --- a/src/prim/readme.md +++ b/src/prim/readme.md @@ -2,7 +2,8 @@ This is the portability layer where all primitives needed from the OS are defined. -- `prim.h`: API definition -- `prim.c`: Selects one of `unix/prim.c`, `wasi/prim.c`, or `windows/prim.c` depending on the host platform. +- `include/mimalloc/prim.h`: primitive portability API definition. +- `prim.c`: Selects one of `unix/prim.c`, `wasi/prim.c`, or `windows/prim.c` depending on the host platform + (and on macOS, `osx/prim.c` defers to `unix/prim.c`). Note: still work in progress, there may still be places in the sources that still depend on OS ifdef's. \ No newline at end of file From 54ad5e76fd07a7381ca7dd92a646cc98ee1dea8a Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Mon, 20 Mar 2023 11:43:31 -0700 Subject: [PATCH 39/42] fix warnings for issues #709 --- src/heap.c | 13 ++++++++++++- src/page.c | 4 ++++ src/stats.c | 18 ++++++++++-------- 3 files changed, 26 insertions(+), 9 deletions(-) diff --git a/src/heap.c b/src/heap.c index fea033dc..0c372c5b 100644 --- a/src/heap.c +++ b/src/heap.c @@ -31,15 +31,18 @@ static bool mi_heap_visit_pages(mi_heap_t* heap, heap_page_visitor_fun* fn, void // visit all pages #if MI_DEBUG>1 size_t total = heap->page_count; - #endif size_t count = 0; + #endif + for (size_t i = 0; i <= MI_BIN_FULL; i++) { mi_page_queue_t* pq = &heap->pages[i]; mi_page_t* page = pq->first; while(page != NULL) { mi_page_t* next = page->next; // save next in case the page gets removed from the queue mi_assert_internal(mi_page_heap(page) == heap); + #if MI_DEBUG>1 count++; + #endif if (!fn(heap, pq, page, arg1, arg2)) return false; page = next; // and continue } @@ -516,9 +519,13 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v uintptr_t free_map[MI_MAX_BLOCKS / sizeof(uintptr_t)]; memset(free_map, 0, sizeof(free_map)); + #if MI_DEBUG>1 size_t free_count = 0; + #endif for (mi_block_t* block = page->free; block != NULL; block = mi_block_next(page,block)) { + #if MI_DEBUG>1 free_count++; + #endif mi_assert_internal((uint8_t*)block >= pstart && (uint8_t*)block < (pstart + psize)); size_t offset = (uint8_t*)block - pstart; mi_assert_internal(offset % bsize == 0); @@ -531,7 +538,9 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v mi_assert_internal(page->capacity == (free_count + page->used)); // walk through all blocks skipping the free ones + #if MI_DEBUG>1 size_t used_count = 0; + #endif for (size_t i = 0; i < page->capacity; i++) { size_t bitidx = (i / sizeof(uintptr_t)); size_t bit = i - (bitidx * sizeof(uintptr_t)); @@ -540,7 +549,9 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v i += (sizeof(uintptr_t) - 1); // skip a run of free blocks } else if ((m & ((uintptr_t)1 << bit)) == 0) { + #if MI_DEBUG>1 used_count++; + #endif uint8_t* block = pstart + (i * bsize); if (!visitor(mi_page_heap(page), area, block, ubsize, arg)) return false; } diff --git a/src/page.c b/src/page.c index e43ba402..531293f3 100644 --- a/src/page.c +++ b/src/page.c @@ -699,12 +699,16 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* pq, bool first_try) { // search through the pages in "next fit" order + #if MI_STAT size_t count = 0; + #endif mi_page_t* page = pq->first; while (page != NULL) { mi_page_t* next = page->next; // remember next + #if MI_STAT count++; + #endif // 0. collect freed blocks by us and other threads _mi_page_free_collect(page, false); diff --git a/src/stats.c b/src/stats.c index 435a2b38..0ab2acd2 100644 --- a/src/stats.c +++ b/src/stats.c @@ -430,17 +430,19 @@ mi_msecs_t _mi_clock_end(mi_msecs_t start) { mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) mi_attr_noexcept { - mi_process_info_t pinfo = { 0 }; - pinfo.elapsed = _mi_clock_end(mi_process_start); - pinfo.utime = 0; - pinfo.stime = 0; + mi_process_info_t pinfo; + _mi_memzero(&pinfo,sizeof(pinfo)); + pinfo.elapsed = _mi_clock_end(mi_process_start); pinfo.current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.current)); - pinfo.peak_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.peak)); - pinfo.current_rss = pinfo.current_commit; - pinfo.peak_rss = pinfo.peak_commit; - pinfo.page_faults = 0; + pinfo.peak_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.peak)); + pinfo.current_rss = pinfo.current_commit; + pinfo.peak_rss = pinfo.peak_commit; + pinfo.utime = 0; + pinfo.stime = 0; + pinfo.page_faults = 0; _mi_prim_process_info(&pinfo); + if (elapsed_msecs!=NULL) *elapsed_msecs = (pinfo.elapsed < 0 ? 0 : (pinfo.elapsed < (mi_msecs_t)PTRDIFF_MAX ? (size_t)pinfo.elapsed : PTRDIFF_MAX)); if (user_msecs!=NULL) *user_msecs = (pinfo.utime < 0 ? 0 : (pinfo.utime < (mi_msecs_t)PTRDIFF_MAX ? (size_t)pinfo.utime : PTRDIFF_MAX)); if (system_msecs!=NULL) *system_msecs = (pinfo.stime < 0 ? 0 : (pinfo.stime < (mi_msecs_t)PTRDIFF_MAX ? (size_t)pinfo.stime : PTRDIFF_MAX)); From 30df80b05ad30d6d66b85c6d100ab9312bfef3ec Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Mon, 20 Mar 2023 11:50:05 -0700 Subject: [PATCH 40/42] proper prototype --- src/prim/osx/alloc-override-zone.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/prim/osx/alloc-override-zone.c b/src/prim/osx/alloc-override-zone.c index a517ddea..80bcfa93 100644 --- a/src/prim/osx/alloc-override-zone.c +++ b/src/prim/osx/alloc-override-zone.c @@ -420,7 +420,7 @@ __attribute__((constructor(0))) #else __attribute__((constructor)) // seems not supported by g++-11 on the M1 #endif -static void _mi_macos_override_malloc() { +static void _mi_macos_override_malloc(void) { malloc_zone_t* purgeable_zone = NULL; #if defined(MAC_OS_X_VERSION_10_6) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6) From 0b4c3da2e90c90dc3f379618195e2e9388c9201c Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Mon, 20 Mar 2023 13:55:39 -0700 Subject: [PATCH 41/42] make process init race free (issue #701) --- include/mimalloc/atomic.h | 9 +++++++++ src/init.c | 3 ++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/include/mimalloc/atomic.h b/include/mimalloc/atomic.h index 971b374a..fe79fbca 100644 --- a/include/mimalloc/atomic.h +++ b/include/mimalloc/atomic.h @@ -275,6 +275,15 @@ static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub) { return (intptr_t)mi_atomic_addi(p, -sub); } +typedef _Atomic(uintptr_t) mi_atomic_once_t; + +// Returns true only on the first invocation +static inline bool mi_atomic_once( mi_atomic_once_t* once ) { + if (mi_atomic_load_relaxed(once) != 0) return false; // quick test + uintptr_t expected = 0; + return mi_atomic_cas_strong_acq_rel(once, &expected, 1); // try to set to 1 +} + // Yield #if defined(__cplusplus) #include diff --git a/src/init.c b/src/init.c index 8c5c8049..9a11f6e0 100644 --- a/src/init.c +++ b/src/init.c @@ -511,7 +511,8 @@ static void mi_detect_cpu_features(void) { // Initialize the process; called by thread_init or the process loader void mi_process_init(void) mi_attr_noexcept { // ensure we are called once - if (_mi_process_is_initialized) return; + static mi_atomic_once_t process_init; + if (!mi_atomic_once(&process_init)) return; _mi_verbose_message("process init: 0x%zx\n", _mi_thread_id()); _mi_process_is_initialized = true; mi_process_setup_auto_thread_done(); From c92e9e7bf76add4b7de35de8ac243dc144b92e56 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Mon, 20 Mar 2023 14:01:09 -0700 Subject: [PATCH 42/42] add comment that thread id's should not be zero, issue #698 --- include/mimalloc/prim.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h index 97d8b45d..68f0871e 100644 --- a/include/mimalloc/prim.h +++ b/include/mimalloc/prim.h @@ -104,12 +104,13 @@ void _mi_prim_thread_associate_default_heap(mi_heap_t* heap); //------------------------------------------------------------------- -// Thread id +// Thread id: `_mi_prim_thread_id()` // // Getting the thread id should be performant as it is called in the // fast path of `_mi_free` and we specialize for various platforms as // inlined definitions. Regular code should call `init.c:_mi_thread_id()`. -// We only require _mi_prim_thread_id() to return a unique id for each thread. +// We only require _mi_prim_thread_id() to return a unique id +// for each thread (unequal to zero). //------------------------------------------------------------------- static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept;