diff --git a/CMakeLists.txt b/CMakeLists.txt index 7018b73c..fa692806 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -101,8 +101,8 @@ if(MI_OVERRIDE) message(STATUS " WARNING: interpose usually also needs zone overriding (use -DMI_OSX_INTERPOSE=ON)") endif() endif() - if((NOT MI_USE_CXX) AND MI_OVERRIDE) - message(STATUS " WARNING: if overriding C++ new/delete, it is best to build mimalloc with a C++ compiler (use -DMI_USE_CXX=ON)") + if(MI_USE_CXX AND MI_OSX_INTERPOSE) + message(STATUS " WARNING: if dynamically overriding malloc/free, it is more reliable to build mimalloc as C code (use -DMI_USE_CXX=OFF)") endif() endif() endif() @@ -173,7 +173,7 @@ if(MI_DEBUG_UBSAN) if(CMAKE_BUILD_TYPE MATCHES "Debug") if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") message(STATUS "Build with undefined-behavior sanitizer (MI_DEBUG_UBSAN=ON)") - list(APPEND mi_cflags -fsanitize=undefined -g) + list(APPEND mi_cflags -fsanitize=undefined -g -fno-sanitize-recover=undefined) list(APPEND CMAKE_EXE_LINKER_FLAGS -fsanitize=undefined) if (NOT MI_USE_CXX) message(STATUS "(switch to use C++ due to MI_DEBUG_UBSAN)") @@ -190,7 +190,7 @@ endif() if(MI_USE_CXX) message(STATUS "Use the C++ compiler to compile (MI_USE_CXX=ON)") set_source_files_properties(${mi_sources} PROPERTIES LANGUAGE CXX ) - set_source_files_properties(src/static.c test/test-api.c test/test-stress PROPERTIES LANGUAGE CXX ) + set_source_files_properties(src/static.c test/test-api.c test/test-api-fill test/test-stress PROPERTIES LANGUAGE CXX ) if(CMAKE_CXX_COMPILER_ID MATCHES "AppleClang|Clang") list(APPEND mi_cflags -Wno-deprecated) endif() diff --git a/bin/minject.exe b/bin/minject.exe index e5f87800..625816f1 100644 Binary files a/bin/minject.exe and b/bin/minject.exe differ diff --git a/bin/minject32.exe b/bin/minject32.exe index d0181028..6857ad0c 100644 Binary files a/bin/minject32.exe and b/bin/minject32.exe differ diff --git a/cmake/mimalloc-config-version.cmake b/cmake/mimalloc-config-version.cmake index 552551db..8063afe6 100644 --- a/cmake/mimalloc-config-version.cmake +++ b/cmake/mimalloc-config-version.cmake @@ -1,6 +1,6 @@ set(mi_version_major 2) set(mi_version_minor 0) -set(mi_version_patch 4) +set(mi_version_patch 6) set(mi_version ${mi_version_major}.${mi_version_minor}) set(PACKAGE_VERSION ${mi_version}) diff --git a/include/mimalloc-new-delete.h b/include/mimalloc-new-delete.h index ba208f05..2749a0be 100644 --- a/include/mimalloc-new-delete.h +++ b/include/mimalloc-new-delete.h @@ -25,6 +25,9 @@ terms of the MIT license. A copy of the license can be found in the file void operator delete(void* p) noexcept { mi_free(p); }; void operator delete[](void* p) noexcept { mi_free(p); }; + void operator delete (void* p, const std::nothrow_t&) noexcept { mi_free(p); } + void operator delete[](void* p, const std::nothrow_t&) noexcept { mi_free(p); } + void* operator new(std::size_t n) noexcept(false) { return mi_new(n); } void* operator new[](std::size_t n) noexcept(false) { return mi_new(n); } @@ -41,9 +44,11 @@ terms of the MIT license. A copy of the license can be found in the file void operator delete[](void* p, std::align_val_t al) noexcept { mi_free_aligned(p, static_cast(al)); } void operator delete (void* p, std::size_t n, std::align_val_t al) noexcept { mi_free_size_aligned(p, n, static_cast(al)); }; void operator delete[](void* p, std::size_t n, std::align_val_t al) noexcept { mi_free_size_aligned(p, n, static_cast(al)); }; - - void* operator new( std::size_t n, std::align_val_t al) noexcept(false) { return mi_new_aligned(n, static_cast(al)); } - void* operator new[]( std::size_t n, std::align_val_t al) noexcept(false) { return mi_new_aligned(n, static_cast(al)); } + void operator delete (void* p, std::align_val_t al, const std::nothrow_t& tag) noexcept { mi_free_aligned(p, static_cast(al)); } + void operator delete[](void* p, std::align_val_t al, const std::nothrow_t& tag) noexcept { mi_free_aligned(p, static_cast(al)); } + + void* operator new (std::size_t n, std::align_val_t al) noexcept(false) { return mi_new_aligned(n, static_cast(al)); } + void* operator new[](std::size_t n, std::align_val_t al) noexcept(false) { return mi_new_aligned(n, static_cast(al)); } void* operator new (std::size_t n, std::align_val_t al, const std::nothrow_t&) noexcept { return mi_new_aligned_nothrow(n, static_cast(al)); } void* operator new[](std::size_t n, std::align_val_t al, const std::nothrow_t&) noexcept { return mi_new_aligned_nothrow(n, static_cast(al)); } #endif diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 5906c557..94bb5d30 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -173,7 +173,6 @@ typedef int32_t mi_ssize_t; #define MI_MEDIUM_OBJ_WSIZE_MAX (MI_MEDIUM_OBJ_SIZE_MAX/MI_INTPTR_SIZE) #define MI_LARGE_OBJ_SIZE_MAX (MI_SEGMENT_SIZE/2) // 32MiB on 64-bit #define MI_LARGE_OBJ_WSIZE_MAX (MI_LARGE_OBJ_SIZE_MAX/MI_INTPTR_SIZE) -#define MI_HUGE_OBJ_SIZE_MAX (2*MI_INTPTR_SIZE*MI_SEGMENT_SIZE) // (must match MI_REGION_MAX_ALLOC_SIZE in memory.c) // Maximum number of size classes. (spaced exponentially in 12.5% increments) #define MI_BIN_HUGE (73U) @@ -189,7 +188,7 @@ typedef int32_t mi_ssize_t; #define MI_MAX_SLICE_OFFSET ((MI_ALIGNMENT_MAX / MI_SEGMENT_SLICE_SIZE) - 1) // Used as a special value to encode block sizes in 32 bits. -#define MI_HUGE_BLOCK_SIZE ((uint32_t)MI_HUGE_OBJ_SIZE_MAX) +#define MI_HUGE_BLOCK_SIZE ((uint32_t)(2*MI_GiB)) // blocks up to this size are always allocated aligned #define MI_MAX_ALIGN_GUARANTEE (8*MI_MAX_ALIGN_SIZE) diff --git a/include/mimalloc.h b/include/mimalloc.h index 08805845..5e4c3743 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MIMALLOC_H #define MIMALLOC_H -#define MI_MALLOC_VERSION 204 // major + 2 digits minor +#define MI_MALLOC_VERSION 206 // major + 2 digits minor // ------------------------------------------------------ // Compiler specific attributes diff --git a/readme.md b/readme.md index 635d983e..2e5a2882 100644 --- a/readme.md +++ b/readme.md @@ -12,8 +12,8 @@ is a general purpose allocator with excellent [performance](#performance) charac Initially developed by Daan Leijen for the run-time systems of the [Koka](https://koka-lang.github.io) and [Lean](https://github.com/leanprover/lean) languages. -Latest release tag: `v2.0.3` (beta, 2021-11-14). -Latest stable tag: `v1.7.3` (2021-11-14). +Latest release tag: `v2.0.5` (alpha, 2022-02-14). +Latest stable tag: `v1.7.5` (2022-02-14). mimalloc is a drop-in replacement for `malloc` and can be used in other programs without code changes, for example, on dynamically linked ELF-based systems (Linux, BSD, etc.) you can use it as: @@ -77,6 +77,12 @@ Note: the `v2.x` beta has a new algorithm for managing internal mimalloc pages t and fragmentation compared to mimalloc `v1.x` (especially for large workloads). Should otherwise have similar performance (see [below](#performance)); please report if you observe any significant performance regression. +* 2022-02-14, `v1.7.5`, `v2.0.5` (alpha): fix malloc override on + Windows 11, fix compilation with musl, potentially reduced + committed memory, add `bin/minject` for Windows, + improved wasm support, faster aligned allocation, + various small fixes. + * 2021-11-14, `v1.7.3`, `v2.0.3` (beta): improved WASM support, improved macOS support and performance (including M1), improved performance for v2 for large objects, Python integration improvements, more standard installation directories, various small fixes. diff --git a/src/alloc-override-osx.c b/src/alloc-override-osx.c index a88186bc..41d0a386 100644 --- a/src/alloc-override-osx.c +++ b/src/alloc-override-osx.c @@ -43,7 +43,7 @@ extern malloc_zone_t* malloc_default_purgeable_zone(void) __attribute__((weak_im static size_t zone_size(malloc_zone_t* zone, const void* p) { MI_UNUSED(zone); - //if (!mi_is_in_heap_region(p)){ return 0; } // not our pointer, bail out + if (!mi_is_in_heap_region(p)){ return 0; } // not our pointer, bail out return mi_usable_size(p); } @@ -64,7 +64,7 @@ static void* zone_valloc(malloc_zone_t* zone, size_t size) { static void zone_free(malloc_zone_t* zone, void* p) { MI_UNUSED(zone); - mi_free(p); + mi_cfree(p); } static void* zone_realloc(malloc_zone_t* zone, void* p, size_t newsize) { @@ -373,7 +373,7 @@ __attribute__((used)) static const struct mi_interpose_s _mi_zone_interposes[] MI_INTERPOSE_MI(_malloc_fork_child), MI_INTERPOSE_MI(_malloc_fork_parent), MI_INTERPOSE_MI(_malloc_fork_prepare), - + MI_INTERPOSE_ZONE(zone_batch_free), MI_INTERPOSE_ZONE(zone_batch_malloc), MI_INTERPOSE_ZONE(zone_calloc), diff --git a/src/alloc-override.c b/src/alloc-override.c index 6bbe4aac..12e9e0d6 100644 --- a/src/alloc-override.c +++ b/src/alloc-override.c @@ -16,6 +16,7 @@ terms of the MIT license. A copy of the license can be found in the file #if defined(MI_MALLOC_OVERRIDE) && !(defined(_WIN32)) #if defined(__APPLE__) +#include mi_decl_externc void vfree(void* p); mi_decl_externc size_t malloc_size(const void* p); mi_decl_externc size_t malloc_good_size(size_t size); @@ -77,7 +78,9 @@ typedef struct mi_nothrow_s { int _tag; } mi_nothrow_t; MI_INTERPOSE_MI(valloc), MI_INTERPOSE_MI(malloc_size), MI_INTERPOSE_MI(malloc_good_size), + #if defined(MAC_OS_X_VERSION_10_15) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_15 MI_INTERPOSE_MI(aligned_alloc), + #endif #ifdef MI_OSX_ZONE // we interpose malloc_default_zone in alloc-override-osx.c so we can use mi_free safely MI_INTERPOSE_MI(free), @@ -91,15 +94,18 @@ typedef struct mi_nothrow_s { int _tag; } mi_nothrow_t; #ifdef __cplusplus extern "C" { - void _ZdlPv(void* p); // delete - void _ZdaPv(void* p); // delete[] - void _ZdlPvm(void* p, size_t n); // delete - void _ZdaPvm(void* p, size_t n); // delete[] - void* _Znwm(size_t n); // new - void* _Znam(size_t n); // new[] - void* _ZnwmRKSt9nothrow_t(size_t n, mi_nothrow_t tag); // new nothrow - void* _ZnamRKSt9nothrow_t(size_t n, mi_nothrow_t tag); // new[] nothrow - } + #endif + void _ZdlPv(void* p); // delete + void _ZdaPv(void* p); // delete[] + void _ZdlPvm(void* p, size_t n); // delete + void _ZdaPvm(void* p, size_t n); // delete[] + void* _Znwm(size_t n); // new + void* _Znam(size_t n); // new[] + void* _ZnwmRKSt9nothrow_t(size_t n, mi_nothrow_t tag); // new nothrow + void* _ZnamRKSt9nothrow_t(size_t n, mi_nothrow_t tag); // new[] nothrow + #ifdef __cplusplus + } + #endif __attribute__((used)) static struct mi_interpose_s _mi_cxx_interposes[] __attribute__((section("__DATA, __interpose"))) = { MI_INTERPOSE_FUN(_ZdlPv,mi_free), @@ -111,7 +117,6 @@ typedef struct mi_nothrow_s { int _tag; } mi_nothrow_t; MI_INTERPOSE_FUN(_ZnwmRKSt9nothrow_t,mi_new_nothrow), MI_INTERPOSE_FUN(_ZnamRKSt9nothrow_t,mi_new_nothrow), }; - #endif // __cplusplus #elif defined(_MSC_VER) // cannot override malloc unless using a dll. @@ -161,7 +166,9 @@ typedef struct mi_nothrow_s { int _tag; } mi_nothrow_t; void operator delete[](void* p, std::align_val_t al) noexcept { mi_free_aligned(p, static_cast(al)); } void operator delete (void* p, std::size_t n, std::align_val_t al) noexcept { mi_free_size_aligned(p, n, static_cast(al)); }; void operator delete[](void* p, std::size_t n, std::align_val_t al) noexcept { mi_free_size_aligned(p, n, static_cast(al)); }; - + void operator delete (void* p, std::align_val_t al, const std::nothrow_t& tag) noexcept { mi_free_aligned(p, static_cast(al)); } + void operator delete[](void* p, std::align_val_t al, const std::nothrow_t& tag) noexcept { mi_free_aligned(p, static_cast(al)); } + void* operator new( std::size_t n, std::align_val_t al) noexcept(false) { return mi_new_aligned(n, static_cast(al)); } void* operator new[]( std::size_t n, std::align_val_t al) noexcept(false) { return mi_new_aligned(n, static_cast(al)); } void* operator new (std::size_t n, std::align_val_t al, const std::nothrow_t&) noexcept { return mi_new_aligned_nothrow(n, static_cast(al)); } diff --git a/src/alloc.c b/src/alloc.c index 5a93a9e8..92f04daf 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -45,7 +45,7 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz #if (MI_STAT>0) const size_t bsize = mi_page_usable_block_size(page); - if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { + if (bsize <= MI_MEDIUM_OBJ_SIZE_MAX) { mi_heap_stat_increase(heap, normal, bsize); mi_heap_stat_counter_increase(heap, normal_count, 1); #if (MI_STAT>1) @@ -372,20 +372,26 @@ static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block // only maintain stats for smaller objects if requested #if (MI_STAT>0) static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) { -#if (MI_STAT < 2) + #if (MI_STAT < 2) MI_UNUSED(block); -#endif + #endif mi_heap_t* const heap = mi_heap_get_default(); - const size_t bsize = mi_page_usable_block_size(page); -#if (MI_STAT>1) + const size_t bsize = mi_page_usable_block_size(page); + #if (MI_STAT>1) const size_t usize = mi_page_usable_size_of(page, block); mi_heap_stat_decrease(heap, malloc, usize); -#endif - if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { + #endif + if (bsize <= MI_MEDIUM_OBJ_SIZE_MAX) { mi_heap_stat_decrease(heap, normal, bsize); -#if (MI_STAT > 1) + #if (MI_STAT > 1) mi_heap_stat_decrease(heap, normal_bins[_mi_bin(bsize)], 1); -#endif + #endif + } + else if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { + mi_heap_stat_decrease(heap, large, bsize); + } + else { + mi_heap_stat_decrease(heap, huge, bsize); } } #else @@ -547,6 +553,7 @@ static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* ms #if (MI_DEBUG>0 || MI_SECURE>=4) if (mi_unlikely(_mi_ptr_cookie(segment) != segment->cookie)) { _mi_error_message(EINVAL, "%s: pointer does not point to a valid heap space: %p\n", msg, p); + return NULL; } #endif return segment; diff --git a/src/options.c b/src/options.c index ba2a1d14..d7f4c29f 100644 --- a/src/options.c +++ b/src/options.c @@ -19,8 +19,8 @@ terms of the MIT license. A copy of the license can be found in the file #endif -static size_t mi_max_error_count = 16; // stop outputting errors after this -static size_t mi_max_warning_count = 16; // stop outputting warnings after this +static long mi_max_error_count = 16; // stop outputting errors after this (use < 0 for no limit) +static long mi_max_warning_count = 16; // stop outputting warnings after this (use < 0 for no limit) static void mi_add_stderr_output(void); @@ -163,10 +163,22 @@ void mi_option_disable(mi_option_t option) { static void mi_out_stderr(const char* msg, void* arg) { MI_UNUSED(arg); + if (msg == NULL) return; #ifdef _WIN32 // on windows with redirection, the C runtime cannot handle locale dependent output // after the main thread closes so we use direct console output. - if (!_mi_preloading()) { _cputs(msg); } + if (!_mi_preloading()) { + // _cputs(msg); // _cputs cannot be used at is aborts if it fails to lock the console + static HANDLE hcon = INVALID_HANDLE_VALUE; + if (hcon == INVALID_HANDLE_VALUE) { + hcon = GetStdHandle(STD_ERROR_HANDLE); + } + const size_t len = strlen(msg); + if (hcon != INVALID_HANDLE_VALUE && len > 0 && len < UINT32_MAX) { + DWORD written = 0; + WriteConsoleA(hcon, msg, (DWORD)len, &written, NULL); + } + } #else fputs(msg, stderr); #endif @@ -322,11 +334,22 @@ void _mi_fprintf( mi_output_fun* out, void* arg, const char* fmt, ... ) { va_end(args); } +static void mi_vfprintf_thread(mi_output_fun* out, void* arg, const char* prefix, const char* fmt, va_list args) { + if (prefix != NULL && strlen(prefix) <= 32 && !_mi_is_main_thread()) { + char tprefix[64]; + snprintf(tprefix, sizeof(tprefix), "%sthread 0x%zx: ", prefix, _mi_thread_id()); + mi_vfprintf(out, arg, tprefix, fmt, args); + } + else { + mi_vfprintf(out, arg, prefix, fmt, args); + } +} + void _mi_trace_message(const char* fmt, ...) { if (mi_option_get(mi_option_verbose) <= 1) return; // only with verbose level 2 or higher va_list args; va_start(args, fmt); - mi_vfprintf(NULL, NULL, "mimalloc: ", fmt, args); + mi_vfprintf_thread(NULL, NULL, "mimalloc: ", fmt, args); va_end(args); } @@ -339,17 +362,21 @@ void _mi_verbose_message(const char* fmt, ...) { } static void mi_show_error_message(const char* fmt, va_list args) { - if (!mi_option_is_enabled(mi_option_show_errors) && !mi_option_is_enabled(mi_option_verbose)) return; - if (mi_atomic_increment_acq_rel(&error_count) > mi_max_error_count) return; - mi_vfprintf(NULL, NULL, "mimalloc: error: ", fmt, args); + if (!mi_option_is_enabled(mi_option_verbose)) { + if (!mi_option_is_enabled(mi_option_show_errors)) return; + if (mi_max_error_count >= 0 && (long)mi_atomic_increment_acq_rel(&error_count) > mi_max_error_count) return; + } + mi_vfprintf_thread(NULL, NULL, "mimalloc: error: ", fmt, args); } void _mi_warning_message(const char* fmt, ...) { - if (!mi_option_is_enabled(mi_option_show_errors) && !mi_option_is_enabled(mi_option_verbose)) return; - if (mi_atomic_increment_acq_rel(&warning_count) > mi_max_warning_count) return; + if (!mi_option_is_enabled(mi_option_verbose)) { + if (!mi_option_is_enabled(mi_option_show_errors)) return; + if (mi_max_warning_count >= 0 && (long)mi_atomic_increment_acq_rel(&warning_count) > mi_max_warning_count) return; + } va_list args; va_start(args,fmt); - mi_vfprintf(NULL, NULL, "mimalloc: warning: ", fmt, args); + mi_vfprintf_thread(NULL, NULL, "mimalloc: warning: ", fmt, args); va_end(args); } diff --git a/src/os.c b/src/os.c index a7cc3cab..4a597632 100644 --- a/src/os.c +++ b/src/os.c @@ -67,7 +67,8 @@ terms of the MIT license. A copy of the license can be found in the file On windows initializes support for aligned allocation and large OS pages (if MIMALLOC_LARGE_OS_PAGES is true). ----------------------------------------------------------- */ -bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats); +bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats); +bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats); static void* mi_align_up_ptr(void* p, size_t alignment) { return (void*)_mi_align_up((uintptr_t)p, alignment); @@ -283,24 +284,38 @@ static bool mi_os_mem_free(void* addr, size_t size, bool was_committed, mi_stats if (addr == NULL || size == 0) return true; // || _mi_os_is_huge_reserved(addr) bool err = false; #if defined(_WIN32) + DWORD errcode = 0; err = (VirtualFree(addr, 0, MEM_RELEASE) == 0); + if (err) { errcode = GetLastError(); } + if (errcode == ERROR_INVALID_ADDRESS) { + // In mi_os_mem_alloc_aligned the fallback path may have returned a pointer inside + // the memory region returned by VirtualAlloc; in that case we need to free using + // the start of the region. + MEMORY_BASIC_INFORMATION info = { 0, 0 }; + VirtualQuery(addr, &info, sizeof(info)); + if (info.AllocationBase < addr) { + errcode = 0; + err = (VirtualFree(info.AllocationBase, 0, MEM_RELEASE) == 0); + if (err) { errcode = GetLastError(); } + } + } + if (errcode != 0) { + _mi_warning_message("unable to release OS memory: error code 0x%x, addr: %p, size: %zu\n", errcode, addr, size); + } #elif defined(MI_USE_SBRK) || defined(__wasi__) - err = 0; // sbrk heap cannot be shrunk + err = false; // sbrk heap cannot be shrunk #else err = (munmap(addr, size) == -1); -#endif - if (was_committed) _mi_stat_decrease(&stats->committed, size); - _mi_stat_decrease(&stats->reserved, size); if (err) { - _mi_warning_message("munmap failed: %s, addr 0x%8li, size %lu\n", strerror(errno), (size_t)addr, size); - return false; - } - else { - return true; + _mi_warning_message("unable to release OS memory: %s, addr: %p, size: %zu\n", strerror(errno), addr, size); } +#endif + if (was_committed) { _mi_stat_decrease(&stats->committed, size); } + _mi_stat_decrease(&stats->reserved, size); + return !err; } -#if !defined(MI_USE_SBRK) && !defined(__wasi__) +#if !(defined(__wasi__) || defined(MI_USE_SBRK) || defined(MAP_ALIGNED)) static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size); #endif @@ -320,15 +335,8 @@ static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment if (hint != NULL) { void* p = VirtualAlloc(hint, size, flags, PAGE_READWRITE); if (p != NULL) return p; - // for robustness always fall through in case of an error - /* - DWORD err = GetLastError(); - if (err != ERROR_INVALID_ADDRESS && // If linked with multiple instances, we may have tried to allocate at an already allocated area (#210) - err != ERROR_INVALID_PARAMETER) { // Windows7 instability (#230) - return NULL; - } - */ - _mi_warning_message("unable to allocate hinted aligned OS memory (%zu bytes, error code: %x, address: %p, alignment: %d, flags: %x)\n", size, GetLastError(), hint, try_alignment, flags); + _mi_warning_message("unable to allocate hinted aligned OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x)\n", size, GetLastError(), hint, try_alignment, flags); + // fall through on error } } #endif @@ -342,7 +350,7 @@ static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment param.Pointer = &reqs; void* p = (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, ¶m, 1); if (p != NULL) return p; - _mi_warning_message("unable to allocate aligned OS memory (%zu bytes, error code: %x, address: %p, alignment: %d, flags: %x)\n", size, GetLastError(), addr, try_alignment, flags); + _mi_warning_message("unable to allocate aligned OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x)\n", size, GetLastError(), addr, try_alignment, flags); // fall through on error } #endif @@ -354,6 +362,7 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, mi_assert_internal(!(large_only && !allow_large)); static _Atomic(size_t) large_page_try_ok; // = 0; void* p = NULL; + // Try to allocate large OS pages (2MiB) if allowed or required. if ((large_only || use_large_os_page(size, try_alignment)) && allow_large && (flags&MEM_COMMIT)!=0 && (flags&MEM_RESERVE)!=0) { size_t try_ok = mi_atomic_load_acquire(&large_page_try_ok); @@ -373,12 +382,13 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, } } } + // Fall back to regular page allocation if (p == NULL) { *is_large = ((flags&MEM_LARGE_PAGES) != 0); p = mi_win_virtual_allocx(addr, size, try_alignment, flags); } if (p == NULL) { - _mi_warning_message("unable to allocate OS memory (%zu bytes, error code: %i, address: %p, large only: %d, allow large: %d)\n", size, GetLastError(), addr, large_only, allow_large); + _mi_warning_message("unable to allocate OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x, large only: %d, allow large: %d)\n", size, GetLastError(), addr, try_alignment, flags, large_only, allow_large); } return p; } @@ -659,7 +669,7 @@ static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) if (hint%try_alignment != 0) return NULL; return (void*)hint; } -#elif defined(__wasi__) || defined(MI_USE_SBRK) +#elif defined(__wasi__) || defined(MI_USE_SBRK) || defined(MAP_ALIGNED) // no need for mi_os_get_aligned_hint #else static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) { @@ -692,7 +702,7 @@ static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, boo #if defined(_WIN32) int flags = MEM_RESERVE; - if (commit) flags |= MEM_COMMIT; + if (commit) { flags |= MEM_COMMIT; } p = mi_win_virtual_alloc(NULL, size, try_alignment, flags, false, allow_large, is_large); #elif defined(MI_USE_SBRK) || defined(__wasi__) MI_UNUSED(allow_large); @@ -716,6 +726,7 @@ static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, boo static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, bool* is_large, mi_stats_t* stats) { mi_assert_internal(alignment >= _mi_os_page_size() && ((alignment & (alignment - 1)) == 0)); mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0); + mi_assert_internal(is_large != NULL); if (!commit) allow_large = false; if (!(alignment >= _mi_os_page_size() && ((alignment & (alignment - 1)) == 0))) return NULL; size = _mi_align_up(size, _mi_os_page_size()); @@ -723,45 +734,27 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, // try first with a hint (this will be aligned directly on Win 10+ or BSD) void* p = mi_os_mem_alloc(size, alignment, commit, allow_large, is_large, stats); if (p == NULL) return NULL; - + // if not aligned, free it, overallocate, and unmap around it if (((uintptr_t)p % alignment != 0)) { mi_os_mem_free(p, size, commit, stats); + _mi_warning_message("unable to allocate aligned OS memory directly, fall back to over-allocation (%zu bytes, address: %p, alignment: %zu, commit: %d)\n", size, p, alignment, commit); if (size >= (SIZE_MAX - alignment)) return NULL; // overflow - size_t over_size = size + alignment; + const size_t over_size = size + alignment; #if _WIN32 - // over-allocate and than re-allocate exactly at an aligned address in there. - // this may fail due to threads allocating at the same time so we - // retry this at most 3 times before giving up. - // (we can not decommit around the overallocation on Windows, because we can only - // free the original pointer, not one pointing inside the area) - int flags = MEM_RESERVE; - if (commit) flags |= MEM_COMMIT; - for (int tries = 0; tries < 3; tries++) { - // over-allocate to determine a virtual memory range - p = mi_os_mem_alloc(over_size, alignment, commit, false, is_large, stats); - if (p == NULL) return NULL; // error - if (((uintptr_t)p % alignment) == 0) { - // if p happens to be aligned, just decommit the left-over area - _mi_os_decommit((uint8_t*)p + size, over_size - size, stats); - break; - } - else { - // otherwise free and allocate at an aligned address in there - mi_os_mem_free(p, over_size, commit, stats); - void* aligned_p = mi_align_up_ptr(p, alignment); - p = mi_win_virtual_alloc(aligned_p, size, alignment, flags, false, allow_large, is_large); - if (p != NULL) { - _mi_stat_increase(&stats->reserved, size); - if (commit) { _mi_stat_increase(&stats->committed, size); } - } - if (p == aligned_p) break; // success! - if (p != NULL) { // should not happen? - mi_os_mem_free(p, size, commit, stats); - p = NULL; - } - } + // over-allocate uncommitted (virtual) memory + p = mi_os_mem_alloc(over_size, 0 /*alignment*/, false /* commit? */, false /* allow_large */, is_large, stats); + if (p == NULL) return NULL; + + // set p to the aligned part in the full region + // note: this is dangerous on Windows as VirtualFree needs the actual region pointer + // but in mi_os_mem_free we handle this (hopefully exceptional) situation. + p = mi_align_up_ptr(p, alignment); + + // explicitly commit only the aligned part + if (commit) { + _mi_os_commit(p, size, NULL, stats); } #else // overallocate... diff --git a/src/page.c b/src/page.c index 2d9a7033..1849dc8f 100644 --- a/src/page.c +++ b/src/page.c @@ -252,7 +252,7 @@ static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size } mi_assert_internal(pq==NULL || _mi_page_segment(page)->kind != MI_SEGMENT_HUGE); mi_page_init(heap, page, block_size, heap->tld); - _mi_stat_increase(&heap->tld->stats.pages, 1); + mi_heap_stat_increase(heap, pages, 1); if (pq!=NULL) mi_page_queue_push(heap, pq, page); // huge pages use pq==NULL mi_assert_expensive(_mi_page_is_valid(page)); return page; @@ -368,17 +368,6 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) { mi_page_set_has_aligned(page, false); mi_heap_t* heap = mi_page_heap(page); - const size_t bsize = mi_page_block_size(page); - if (bsize > MI_MEDIUM_OBJ_SIZE_MAX) { - if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { - _mi_stat_decrease(&heap->tld->stats.large, bsize); - } - else { - // not strictly necessary as we never get here for a huge page - mi_assert_internal(false); - _mi_stat_decrease(&heap->tld->stats.huge, bsize); - } - } // remove from the page list // (no need to do _mi_heap_delayed_free first as all blocks are already free) @@ -705,7 +694,7 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p page = next; } // for each page - mi_stat_counter_increase(heap->tld->stats.searches, count); + mi_heap_stat_counter_increase(heap, searches, count); if (page == NULL) { _mi_heap_collect_retired(heap, false); // perhaps make a page available? @@ -791,7 +780,7 @@ static mi_page_t* mi_large_huge_page_alloc(mi_heap_t* heap, size_t size) { mi_page_queue_t* pq = (is_huge ? NULL : mi_page_queue(heap, block_size)); mi_page_t* page = mi_page_fresh_alloc(heap, pq, block_size); if (page != NULL) { - const size_t bsize = mi_page_block_size(page); // note: not `mi_page_usable_block_size` as `size` includes padding + const size_t bsize = mi_page_usable_block_size(page); // note: includes padding mi_assert_internal(mi_page_immediate_available(page)); mi_assert_internal(bsize >= size); @@ -806,12 +795,12 @@ static mi_page_t* mi_large_huge_page_alloc(mi_heap_t* heap, size_t size) { mi_assert_internal(_mi_page_segment(page)->kind != MI_SEGMENT_HUGE); } if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { - _mi_stat_increase(&heap->tld->stats.large, bsize); - _mi_stat_counter_increase(&heap->tld->stats.large_count, 1); + mi_heap_stat_increase(heap, large, bsize); + mi_heap_stat_counter_increase(heap, large_count, 1); } else { - _mi_stat_increase(&heap->tld->stats.huge, bsize); - _mi_stat_counter_increase(&heap->tld->stats.huge_count, 1); + mi_heap_stat_increase(heap, huge, bsize); + mi_heap_stat_counter_increase(heap, huge_count, 1); } } return page; diff --git a/src/random.c b/src/random.c index 0b44c8b9..5057a623 100644 --- a/src/random.c +++ b/src/random.c @@ -195,6 +195,7 @@ static bool os_random_buf(void* buf, size_t buf_len) { #elif defined(__APPLE__) #include #if defined(MAC_OS_X_VERSION_10_10) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_10 +#include #include #endif static bool os_random_buf(void* buf, size_t buf_len) { diff --git a/src/segment.c b/src/segment.c index c4cf9875..8d3eebe5 100644 --- a/src/segment.c +++ b/src/segment.c @@ -762,7 +762,8 @@ static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_i } // and also for the last one (if not set already) (the last one is needed for coalescing) - mi_slice_t* last = &segment->slices[slice_index + slice_count - 1]; + // note: the cast is needed for ubsan since the index can be larger than MI_SLICES_PER_SEGMENT for huge allocations (see #543) + mi_slice_t* last = &((mi_slice_t*)segment->slices)[slice_index + slice_count - 1]; if (last < mi_segment_slices_end(segment) && last >= slice) { last->slice_offset = (uint32_t)(sizeof(mi_slice_t)*(slice_count-1)); last->slice_count = 0; diff --git a/test/main-override-static.c b/test/main-override-static.c index 33671a1a..393199e1 100644 --- a/test/main-override-static.c +++ b/test/main-override-static.c @@ -8,172 +8,6 @@ #include // redefines malloc etc. -#include -#include - -#define MI_INTPTR_SIZE 8 -#define MI_LARGE_WSIZE_MAX (4*1024*1024 / MI_INTPTR_SIZE) - -#define MI_BIN_HUGE 100 -//#define MI_ALIGN2W - -// Bit scan reverse: return the index of the highest bit. -static inline uint8_t mi_bsr32(uint32_t x); - -#if defined(_MSC_VER) -#include -#include -static inline uint8_t mi_bsr32(uint32_t x) { - uint32_t idx; - _BitScanReverse((DWORD*)&idx, x); - return idx; -} -#elif defined(__GNUC__) || defined(__clang__) -static inline uint8_t mi_bsr32(uint32_t x) { - return (31 - __builtin_clz(x)); -} -#else -static inline uint8_t mi_bsr32(uint32_t x) { - // de Bruijn multiplication, see - static const uint8_t debruijn[32] = { - 31, 0, 22, 1, 28, 23, 18, 2, 29, 26, 24, 10, 19, 7, 3, 12, - 30, 21, 27, 17, 25, 9, 6, 11, 20, 16, 8, 5, 15, 4, 14, 13, - }; - x |= x >> 1; - x |= x >> 2; - x |= x >> 4; - x |= x >> 8; - x |= x >> 16; - x++; - return debruijn[(x*0x076be629) >> 27]; -} -#endif - -/* -// Bit scan reverse: return the index of the highest bit. -uint8_t _mi_bsr(uintptr_t x) { - if (x == 0) return 0; - #if MI_INTPTR_SIZE==8 - uint32_t hi = (x >> 32); - return (hi == 0 ? mi_bsr32((uint32_t)x) : 32 + mi_bsr32(hi)); - #elif MI_INTPTR_SIZE==4 - return mi_bsr32(x); - #else - # error "define bsr for non-32 or 64-bit platforms" - #endif -} -*/ - - -static inline size_t _mi_wsize_from_size(size_t size) { - return (size + sizeof(uintptr_t) - 1) / sizeof(uintptr_t); -} - -// Return the bin for a given field size. -// Returns MI_BIN_HUGE if the size is too large. -// We use `wsize` for the size in "machine word sizes", -// i.e. byte size == `wsize*sizeof(void*)`. -extern inline uint8_t _mi_bin8(size_t size) { - size_t wsize = _mi_wsize_from_size(size); - uint8_t bin; - if (wsize <= 1) { - bin = 1; - } - #if defined(MI_ALIGN4W) - else if (wsize <= 4) { - bin = (uint8_t)((wsize+1)&~1); // round to double word sizes - } - #elif defined(MI_ALIGN2W) - else if (wsize <= 8) { - bin = (uint8_t)((wsize+1)&~1); // round to double word sizes - } - #else - else if (wsize <= 8) { - bin = (uint8_t)wsize; - } - #endif - else if (wsize > MI_LARGE_WSIZE_MAX) { - bin = MI_BIN_HUGE; - } - else { - #if defined(MI_ALIGN4W) - if (wsize <= 16) { wsize = (wsize+3)&~3; } // round to 4x word sizes - #endif - wsize--; - // find the highest bit - uint8_t b = mi_bsr32((uint32_t)wsize); - // and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation). - // - adjust with 3 because we use do not round the first 8 sizes - // which each get an exact bin - bin = ((b << 2) + (uint8_t)((wsize >> (b - 2)) & 0x03)) - 3; - } - return bin; -} - -extern inline uint8_t _mi_bin4(size_t size) { - size_t wsize = _mi_wsize_from_size(size); - uint8_t bin; - if (wsize <= 1) { - bin = 1; - } - #if defined(MI_ALIGN4W) - else if (wsize <= 4) { - bin = (uint8_t)((wsize+1)&~1); // round to double word sizes - } - #elif defined(MI_ALIGN2W) - else if (wsize <= 8) { - bin = (uint8_t)((wsize+1)&~1); // round to double word sizes - } - #else - else if (wsize <= 8) { - bin = (uint8_t)wsize; - } - #endif - else if (wsize > MI_LARGE_WSIZE_MAX) { - bin = MI_BIN_HUGE; - } - else { - uint8_t b = mi_bsr32((uint32_t)wsize); - bin = ((b << 1) + (uint8_t)((wsize >> (b - 1)) & 0x01)) + 3; - } - return bin; -} - -size_t _mi_binx4(size_t bsize) { - if (bsize==0) return 0; - uint8_t b = mi_bsr32((uint32_t)bsize); - if (b <= 1) return bsize; - size_t bin = ((b << 1) | (bsize >> (b - 1))&0x01); - return bin; -} - -size_t _mi_binx8(size_t bsize) { - if (bsize<=1) return bsize; - uint8_t b = mi_bsr32((uint32_t)bsize); - if (b <= 2) return bsize; - size_t bin = ((b << 2) | (bsize >> (b - 2))&0x03) - 5; - return bin; -} - -void mi_bins() { - //printf(" QNULL(1), /* 0 */ \\\n "); - size_t last_bin = 0; - size_t min_bsize = 0; - size_t last_bsize = 0; - for (size_t bsize = 1; bsize < 2*1024; bsize++) { - size_t size = bsize * 64 * 1024; - size_t bin = _mi_binx8(bsize); - if (bin != last_bin) { - printf("min bsize: %6zd, max bsize: %6zd, bin: %6zd\n", min_bsize, last_bsize, last_bin); - //printf("QNULL(%6zd), ", wsize); - //if (last_bin%8 == 0) printf("/* %i */ \\\n ", last_bin); - last_bin = bin; - min_bsize = bsize; - } - last_bsize = bsize; - } -} - static void double_free1(); static void double_free2(); static void double_free3(); @@ -186,7 +20,7 @@ static void test_aslr(void); static void test_process_info(void); static void test_reserved(void); static void negative_stat(void); - +static void alloc_huge(void); int main() { mi_version(); @@ -203,6 +37,7 @@ int main() { // invalid_free(); // test_reserved(); // negative_stat(); + alloc_huge(); void* p1 = malloc(78); void* p2 = malloc(24); @@ -216,7 +51,7 @@ int main() { free(p1); free(p2); free(s); - + /* now test if override worked by allocating/freeing across the api's*/ //p1 = mi_malloc(32); //free(p1); @@ -391,4 +226,182 @@ static void negative_stat(void) { *p = 100; mi_free(p); mi_stats_print_out(NULL, NULL); -} \ No newline at end of file +} + +static void alloc_huge(void) { + void* p = mi_malloc(67108872); + mi_free(p); +} + + +// ---------------------------- +// bin size experiments +// ------------------------------ + +#if 0 +#include +#include + +#define MI_INTPTR_SIZE 8 +#define MI_LARGE_WSIZE_MAX (4*1024*1024 / MI_INTPTR_SIZE) + +#define MI_BIN_HUGE 100 +//#define MI_ALIGN2W + +// Bit scan reverse: return the index of the highest bit. +static inline uint8_t mi_bsr32(uint32_t x); + +#if defined(_MSC_VER) +#include +#include +static inline uint8_t mi_bsr32(uint32_t x) { + uint32_t idx; + _BitScanReverse((DWORD*)&idx, x); + return idx; +} +#elif defined(__GNUC__) || defined(__clang__) +static inline uint8_t mi_bsr32(uint32_t x) { + return (31 - __builtin_clz(x)); +} +#else +static inline uint8_t mi_bsr32(uint32_t x) { + // de Bruijn multiplication, see + static const uint8_t debruijn[32] = { + 31, 0, 22, 1, 28, 23, 18, 2, 29, 26, 24, 10, 19, 7, 3, 12, + 30, 21, 27, 17, 25, 9, 6, 11, 20, 16, 8, 5, 15, 4, 14, 13, + }; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + x++; + return debruijn[(x*0x076be629) >> 27]; +} +#endif + +/* +// Bit scan reverse: return the index of the highest bit. +uint8_t _mi_bsr(uintptr_t x) { + if (x == 0) return 0; + #if MI_INTPTR_SIZE==8 + uint32_t hi = (x >> 32); + return (hi == 0 ? mi_bsr32((uint32_t)x) : 32 + mi_bsr32(hi)); + #elif MI_INTPTR_SIZE==4 + return mi_bsr32(x); + #else + # error "define bsr for non-32 or 64-bit platforms" + #endif +} +*/ + + +static inline size_t _mi_wsize_from_size(size_t size) { + return (size + sizeof(uintptr_t) - 1) / sizeof(uintptr_t); +} + +// Return the bin for a given field size. +// Returns MI_BIN_HUGE if the size is too large. +// We use `wsize` for the size in "machine word sizes", +// i.e. byte size == `wsize*sizeof(void*)`. +extern inline uint8_t _mi_bin8(size_t size) { + size_t wsize = _mi_wsize_from_size(size); + uint8_t bin; + if (wsize <= 1) { + bin = 1; + } +#if defined(MI_ALIGN4W) + else if (wsize <= 4) { + bin = (uint8_t)((wsize+1)&~1); // round to double word sizes + } +#elif defined(MI_ALIGN2W) + else if (wsize <= 8) { + bin = (uint8_t)((wsize+1)&~1); // round to double word sizes + } +#else + else if (wsize <= 8) { + bin = (uint8_t)wsize; + } +#endif + else if (wsize > MI_LARGE_WSIZE_MAX) { + bin = MI_BIN_HUGE; + } + else { +#if defined(MI_ALIGN4W) + if (wsize <= 16) { wsize = (wsize+3)&~3; } // round to 4x word sizes +#endif + wsize--; + // find the highest bit + uint8_t b = mi_bsr32((uint32_t)wsize); + // and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation). + // - adjust with 3 because we use do not round the first 8 sizes + // which each get an exact bin + bin = ((b << 2) + (uint8_t)((wsize >> (b - 2)) & 0x03)) - 3; + } + return bin; +} + +static inline uint8_t _mi_bin4(size_t size) { + size_t wsize = _mi_wsize_from_size(size); + uint8_t bin; + if (wsize <= 1) { + bin = 1; + } +#if defined(MI_ALIGN4W) + else if (wsize <= 4) { + bin = (uint8_t)((wsize+1)&~1); // round to double word sizes + } +#elif defined(MI_ALIGN2W) + else if (wsize <= 8) { + bin = (uint8_t)((wsize+1)&~1); // round to double word sizes + } +#else + else if (wsize <= 8) { + bin = (uint8_t)wsize; + } +#endif + else if (wsize > MI_LARGE_WSIZE_MAX) { + bin = MI_BIN_HUGE; + } + else { + uint8_t b = mi_bsr32((uint32_t)wsize); + bin = ((b << 1) + (uint8_t)((wsize >> (b - 1)) & 0x01)) + 3; + } + return bin; +} + +static size_t _mi_binx4(size_t bsize) { + if (bsize==0) return 0; + uint8_t b = mi_bsr32((uint32_t)bsize); + if (b <= 1) return bsize; + size_t bin = ((b << 1) | (bsize >> (b - 1))&0x01); + return bin; +} + +static size_t _mi_binx8(size_t bsize) { + if (bsize<=1) return bsize; + uint8_t b = mi_bsr32((uint32_t)bsize); + if (b <= 2) return bsize; + size_t bin = ((b << 2) | (bsize >> (b - 2))&0x03) - 5; + return bin; +} + +static void mi_bins(void) { + //printf(" QNULL(1), /* 0 */ \\\n "); + size_t last_bin = 0; + size_t min_bsize = 0; + size_t last_bsize = 0; + for (size_t bsize = 1; bsize < 2*1024; bsize++) { + size_t size = bsize * 64 * 1024; + size_t bin = _mi_binx8(bsize); + if (bin != last_bin) { + printf("min bsize: %6zd, max bsize: %6zd, bin: %6zd\n", min_bsize, last_bsize, last_bin); + //printf("QNULL(%6zd), ", wsize); + //if (last_bin%8 == 0) printf("/* %i */ \\\n ", last_bin); + last_bin = bin; + min_bsize = bsize; + } + last_bsize = bsize; + } +} +#endif \ No newline at end of file diff --git a/test/test-api.c b/test/test-api.c index 7ce6f111..0302464e 100644 --- a/test/test-api.c +++ b/test/test-api.c @@ -72,6 +72,10 @@ int main(void) { CHECK_BODY("calloc0",{ result = (mi_usable_size(mi_calloc(0,1000)) <= 16); }); + CHECK_BODY("malloc-large",{ // see PR #544. + void* p = mi_malloc(67108872); + mi_free(p); + }); // --------------------------------------------------- // Extended