diff --git a/CMakeLists.txt b/CMakeLists.txt index 4729e5b5..5fc1808e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,7 +26,7 @@ option(MI_BUILD_OBJECT "Build object library" ON) option(MI_BUILD_TESTS "Build test executables" ON) option(MI_DEBUG_TSAN "Build with thread sanitizer (needs clang)" OFF) option(MI_DEBUG_UBSAN "Build with undefined-behavior sanitizer (needs clang++)" OFF) -option(MI_DEBUG_GUARDED "Build with guard pages behind certain object allocations (implies MI_NO_PADDING=ON)" OFF) +option(MI_GUARDED "Build with guard pages behind certain object allocations (implies MI_NO_PADDING=ON)" OFF) option(MI_SKIP_COLLECT_ON_EXIT "Skip collecting memory on program exit" OFF) option(MI_NO_PADDING "Force no use of padding even in DEBUG mode etc." OFF) option(MI_INSTALL_TOPLEVEL "Install directly into $CMAKE_INSTALL_PREFIX instead of PREFIX/lib/mimalloc-version" OFF) @@ -207,9 +207,9 @@ if(MI_TRACK_ETW) endif() endif() -if(MI_DEBUG_GUARDED) - message(STATUS "Compile guard pages behind certain object allocations (MI_DEBUG_GUARDED=ON)") - list(APPEND mi_defines MI_DEBUG_GUARDED=1) +if(MI_GUARDED) + message(STATUS "Compile guard pages behind certain object allocations (MI_GUARDED=ON)") + list(APPEND mi_defines MI_GUARDED=1) if(NOT MI_NO_PADDING) message(STATUS " Disabling padding due to guard pages (MI_NO_PADDING=ON)") set(MI_NO_PADDING ON) @@ -320,13 +320,13 @@ if(MI_WIN_USE_FLS) endif() - # Check /proc/cpuinfo for an SV39 MMU and define a constant if one is - # found. We will want to skip the aligned hinting in that case. Issue #939, #949 + # Check /proc/cpuinfo for an SV39 MMU and limit the virtual address bits. + # (this will skip the aligned hinting in that case. Issue #939, #949) if (EXISTS /proc/cpuinfo) file(STRINGS /proc/cpuinfo mi_sv39_mmu REGEX "^mmu[ \t]+:[ \t]+sv39$") if (mi_sv39_mmu) - MESSAGE( STATUS "Disable aligned hints (SV39 MMU detected)" ) - list(APPEND mi_defines MI_NO_ALIGNED_HINT=1) + MESSAGE( STATUS "Set virtual address bits to 39 (SV39 MMU detected)" ) + list(APPEND mi_defines MI_DEFAULT_VIRTUAL_ADDRESS_BITS=39) endif() endif() diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 6f1cd256..a481ac48 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -15,7 +15,7 @@ trigger: jobs: - job: - displayName: Windows + displayName: Windows 2022 pool: vmImage: windows-2022 @@ -52,7 +52,7 @@ jobs: # artifact: mimalloc-windows-$(BuildType) - job: - displayName: Linux + displayName: Ubuntu 22.04 pool: vmImage: ubuntu-22.04 @@ -117,8 +117,8 @@ jobs: CC: clang CXX: clang BuildType: debug-guarded-clang - cmakeExtraArgs: -DCMAKE_BUILD_TYPE=RelWithDebInfo -DMI_DEBUG_FULL=ON -DMI_DEBUG_GUARDED=ON - + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=RelWithDebInfo -DMI_DEBUG_FULL=ON -DMI_GUARDED=ON + steps: - task: CMake@1 inputs: @@ -129,16 +129,16 @@ jobs: - script: ctest --verbose --timeout 180 workingDirectory: $(BuildType) displayName: CTest - env: - MIMALLOC_DEBUG_GUARDED_MAX: 1024 + env: + MIMALLOC_GUARDED_SAMPLE_RATE: 1000 # - upload: $(Build.SourcesDirectory)/$(BuildType) # artifact: mimalloc-ubuntu-$(BuildType) - job: - displayName: macOS + displayName: macOS 14 (Sonoma) pool: vmImage: - macOS-latest + macOS-14 strategy: matrix: Debug: @@ -164,35 +164,145 @@ jobs: # - upload: $(Build.SourcesDirectory)/$(BuildType) # artifact: mimalloc-macos-$(BuildType) -# - job: -# displayName: Windows-2017 -# pool: -# vmImage: -# vs2017-win2016 -# strategy: -# matrix: -# Debug: -# BuildType: debug -# cmakeExtraArgs: -A x64 -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON -# MSBuildConfiguration: Debug -# Release: -# BuildType: release -# cmakeExtraArgs: -A x64 -DCMAKE_BUILD_TYPE=Release -# MSBuildConfiguration: Release -# Secure: -# BuildType: secure -# cmakeExtraArgs: -A x64 -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON -# MSBuildConfiguration: Release -# steps: -# - task: CMake@1 -# inputs: -# workingDirectory: $(BuildType) -# cmakeArgs: .. $(cmakeExtraArgs) -# - task: MSBuild@1 -# inputs: -# solution: $(BuildType)/libmimalloc.sln -# configuration: '$(MSBuildConfiguration)' -# - script: | -# cd $(BuildType) -# ctest --verbose --timeout 180 -# displayName: CTest +# ---------------------------------------------------------- +# Other OS versions (just debug mode) +# ---------------------------------------------------------- + +- job: + displayName: Windows 2019 + pool: + vmImage: + windows-2019 + strategy: + matrix: + Debug: + BuildType: debug + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON + MSBuildConfiguration: Debug + Release: + BuildType: release + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release + MSBuildConfiguration: Release + steps: + - task: CMake@1 + inputs: + workingDirectory: $(BuildType) + cmakeArgs: .. $(cmakeExtraArgs) + - task: MSBuild@1 + inputs: + solution: $(BuildType)/libmimalloc.sln + configuration: '$(MSBuildConfiguration)' + msbuildArguments: -m + - script: ctest --verbose --timeout 180 -C $(MSBuildConfiguration) + workingDirectory: $(BuildType) + displayName: CTest + +- job: + displayName: Ubuntu 24.04 + pool: + vmImage: + ubuntu-24.04 + strategy: + matrix: + Debug: + CC: gcc + CXX: g++ + BuildType: debug + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON + Debug++: + CC: gcc + CXX: g++ + BuildType: debug-cxx + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON -DMI_USE_CXX=ON + Debug Clang: + CC: clang + CXX: clang++ + BuildType: debug-clang + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON + Debug++ Clang: + CC: clang + CXX: clang++ + BuildType: debug-clang-cxx + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON -DMI_USE_CXX=ON + Release Clang: + CC: clang + CXX: clang++ + BuildType: release-clang + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release + steps: + - task: CMake@1 + inputs: + workingDirectory: $(BuildType) + cmakeArgs: .. $(cmakeExtraArgs) + - script: make -j$(nproc) -C $(BuildType) + displayName: Make + - script: ctest --verbose --timeout 180 + workingDirectory: $(BuildType) + displayName: CTest + +- job: + displayName: Ubuntu 20.04 + pool: + vmImage: + ubuntu-20.04 + strategy: + matrix: + Debug: + CC: gcc + CXX: g++ + BuildType: debug + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON + Debug++: + CC: gcc + CXX: g++ + BuildType: debug-cxx + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON -DMI_USE_CXX=ON + Debug Clang: + CC: clang + CXX: clang++ + BuildType: debug-clang + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON + Debug++ Clang: + CC: clang + CXX: clang++ + BuildType: debug-clang-cxx + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON -DMI_USE_CXX=ON + Release Clang: + CC: clang + CXX: clang++ + BuildType: release-clang + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release + steps: + - task: CMake@1 + inputs: + workingDirectory: $(BuildType) + cmakeArgs: .. $(cmakeExtraArgs) + - script: make -j$(nproc) -C $(BuildType) + displayName: Make + - script: ctest --verbose --timeout 180 + workingDirectory: $(BuildType) + displayName: CTest + +- job: + displayName: macOS 15 (Sequia) + pool: + vmImage: + macOS-15 + strategy: + matrix: + Debug: + BuildType: debug + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON + Release: + BuildType: release + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release + steps: + - task: CMake@1 + inputs: + workingDirectory: $(BuildType) + cmakeArgs: .. $(cmakeExtraArgs) + - script: make -j$(sysctl -n hw.ncpu) -C $(BuildType) + displayName: Make + - script: ctest --verbose --timeout 180 + workingDirectory: $(BuildType) + displayName: CTest diff --git a/bin/mimalloc-redirect.dll b/bin/mimalloc-redirect.dll index ed001d64..4702fec0 100644 Binary files a/bin/mimalloc-redirect.dll and b/bin/mimalloc-redirect.dll differ diff --git a/bin/mimalloc-redirect32.dll b/bin/mimalloc-redirect32.dll index ec4ff1d5..17c05550 100644 Binary files a/bin/mimalloc-redirect32.dll and b/bin/mimalloc-redirect32.dll differ diff --git a/ide/vs2022/mimalloc.vcxproj b/ide/vs2022/mimalloc.vcxproj index 9e0b31f2..160f1436 100644 --- a/ide/vs2022/mimalloc.vcxproj +++ b/ide/vs2022/mimalloc.vcxproj @@ -116,7 +116,7 @@ true Default ../../include - MI_DEBUG=4;MI_SECURE=0;%(PreprocessorDefinitions); + MI_DEBUG=4;MI_GUARDED=1;%(PreprocessorDefinitions); CompileAsCpp false stdcpp20 diff --git a/include/mimalloc.h b/include/mimalloc.h index df85a2c0..534f1cbf 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -291,7 +291,7 @@ mi_decl_nodiscard mi_decl_export mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t a #endif -// Experimental: allow sub-processes whose memory segments stay separated (and no reclamation between them) +// Experimental: allow sub-processes whose memory segments stay separated (and no reclamation between them) // Used for example for separate interpreter's in one process. typedef void* mi_subproc_id_t; mi_decl_export mi_subproc_id_t mi_subproc_main(void); @@ -310,6 +310,12 @@ mi_decl_nodiscard mi_decl_export mi_heap_t* mi_heap_new_ex(int heap_tag, bool al // deprecated mi_decl_export int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept; +// Experimental: objects followed by a guard page. +// A sample rate of 0 disables guarded objects, while 1 uses a guard page for every object. +// A seed of 0 uses a random start point. Only objects within the size bound are eligable for guard pages. +mi_decl_export void mi_heap_guarded_set_sample_rate(mi_heap_t* heap, size_t sample_rate, size_t seed); +mi_decl_export void mi_heap_guarded_set_size_bound(mi_heap_t* heap, size_t min, size_t max); + // ------------------------------------------------------ // Convenience @@ -350,7 +356,7 @@ typedef enum mi_option_e { mi_option_deprecated_segment_cache, mi_option_deprecated_page_reset, mi_option_abandoned_page_purge, // immediately purge delayed purges on thread termination - mi_option_deprecated_segment_reset, + mi_option_deprecated_segment_reset, mi_option_eager_commit_delay, // the first N segments per thread are not eagerly committed (but per page in the segment on demand) mi_option_purge_delay, // memory purging is delayed by N milli seconds; use 0 for immediate purging or -1 for no purging at all. (=10) mi_option_use_numa_nodes, // 0 = use all available numa nodes, otherwise use at most N nodes. @@ -367,8 +373,11 @@ typedef enum mi_option_e { mi_option_disallow_arena_alloc, // 1 = do not use arena's for allocation (except if using specific arena id's) mi_option_retry_on_oom, // retry on out-of-memory for N milli seconds (=400), set to 0 to disable retries. (only on windows) mi_option_visit_abandoned, // allow visiting heap blocks from abandoned threads (=0) - mi_option_debug_guarded_min, // only used when building with MI_DEBUG_GUARDED: minimal rounded object size for guarded objects (=0) - mi_option_debug_guarded_max, // only used when building with MI_DEBUG_GUARDED: maximal rounded object size for guarded objects (=0) + mi_option_guarded_min, // only used when building with MI_GUARDED: minimal rounded object size for guarded objects (=0) + mi_option_guarded_max, // only used when building with MI_GUARDED: maximal rounded object size for guarded objects (=0) + mi_option_guarded_precise, // disregard minimal alignment requirement to always place guarded blocks exactly in front of a guard page (=0) + mi_option_guarded_sample_rate, // 1 out of N allocations in the min/max range will be guarded (=1000) + mi_option_guarded_sample_seed, // can be set to allow for a (more) deterministic re-execution when a guard page is triggered (=0) mi_option_target_segments_per_thread, // experimental (=0) _mi_option_last, // legacy option names diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index dcbaf15d..91897b9d 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -91,6 +91,7 @@ void _mi_tld_init(mi_tld_t* tld, mi_heap_t* bheap); mi_threadid_t _mi_thread_id(void) mi_attr_noexcept; mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing heap mi_subproc_t* _mi_subproc_from_id(mi_subproc_id_t subproc_id); +void _mi_heap_guarded_init(mi_heap_t* heap); // os.c void _mi_os_init(void); // called from process init @@ -641,16 +642,40 @@ static inline void mi_page_set_has_aligned(mi_page_t* page, bool has_aligned) { page->flags.x.has_aligned = has_aligned; } -#if MI_DEBUG_GUARDED -static inline bool mi_page_has_guarded(const mi_page_t* page) { - return page->flags.x.has_guarded; +/* ------------------------------------------------------------------- + Guarded objects +------------------------------------------------------------------- */ +#if MI_GUARDED +static inline bool mi_block_ptr_is_guarded(const mi_block_t* block, const void* p) { + const ptrdiff_t offset = (uint8_t*)p - (uint8_t*)block; + return (offset >= (ptrdiff_t)(sizeof(mi_block_t)) && block->next == MI_BLOCK_TAG_GUARDED); } -static inline void mi_page_set_has_guarded(mi_page_t* page, bool has_guarded) { - page->flags.x.has_guarded = has_guarded; +static inline bool mi_heap_malloc_use_guarded(mi_heap_t* heap, size_t size) { + // this code is written to result in fast assembly as it is on the hot path for allocation + const size_t count = heap->guarded_sample_count - 1; // if the rate was 0, this will underflow and count for a long time.. + if mi_likely(count != 0) { + // no sample + heap->guarded_sample_count = count; + return false; + } + else if (size >= heap->guarded_size_min && size <= heap->guarded_size_max) { + // use guarded allocation + heap->guarded_sample_count = heap->guarded_sample_rate; // reset + return (heap->guarded_sample_rate != 0); + } + else { + // failed size criteria, rewind count (but don't write to an empty heap) + if (heap->guarded_sample_rate != 0) { heap->guarded_sample_count = 1; } + return false; + } } + +mi_decl_restrict void* _mi_heap_malloc_guarded(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept; + #endif + /* ------------------------------------------------------------------- Encoding/Decoding the free list next pointers diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h index 640c966f..f8bf948e 100644 --- a/include/mimalloc/prim.h +++ b/include/mimalloc/prim.h @@ -25,6 +25,8 @@ typedef struct mi_os_mem_config_s { size_t page_size; // default to 4KiB size_t large_page_size; // 0 if not supported, usually 2MiB (4MiB on Windows) size_t alloc_granularity; // smallest allocation size (usually 4KiB, on Windows 64KiB) + size_t physical_memory; // physical memory size + size_t virtual_address_bits; // usually 48 or 56 bits on 64-bit systems. (used to determine secure randomization) bool has_overcommit; // can we reserve more memory than can be actually committed? bool has_partial_free; // can allocated blocks be freed partially? (true for mmap, false for VirtualAlloc) bool has_virtual_reserve; // supports virtual address space reservation? (if true we can reserve virtual address space without using commit or physical memory) @@ -41,9 +43,10 @@ int _mi_prim_free(void* addr, size_t size ); // If `commit` is false, the virtual memory range only needs to be reserved (with no access) // which will later be committed explicitly using `_mi_prim_commit`. // `is_zero` is set to true if the memory was zero initialized (as on most OS's) +// The `hint_addr` address is either `NULL` or a preferred allocation address but can be ignored. // pre: !commit => !allow_large // try_alignment >= _mi_os_page_size() and a power of 2 -int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr); +int _mi_prim_alloc(void* hint_addr, size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr); // Commit memory. Returns error code or 0 on success. // For example, on Linux this would make the memory PROT_READ|PROT_WRITE. diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index 044d6eae..aa5f9996 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -75,8 +75,8 @@ terms of the MIT license. A copy of the license can be found in the file // Use guard pages behind objects of a certain size (set by the MIMALLOC_DEBUG_GUARDED_MIN/MAX options) // Padding should be disabled when using guard pages -// #define MI_DEBUG_GUARDED 1 -#if defined(MI_DEBUG_GUARDED) +// #define MI_GUARDED 1 +#if defined(MI_GUARDED) #define MI_PADDING 0 #endif @@ -244,6 +244,13 @@ typedef struct mi_block_s { mi_encoded_t next; } mi_block_t; +#if MI_GUARDED +// we always align guarded pointers in a block at an offset +// the block `next` field is then used as a tag to distinguish regular offset aligned blocks from guarded ones +#define MI_BLOCK_TAG_ALIGNED ((mi_encoded_t)(0)) +#define MI_BLOCK_TAG_GUARDED (~MI_BLOCK_TAG_ALIGNED) +#endif + // The delayed flags are used for efficient multi-threaded free-ing typedef enum mi_delayed_e { @@ -262,7 +269,6 @@ typedef union mi_page_flags_s { struct { uint8_t in_full : 1; uint8_t has_aligned : 1; - uint8_t has_guarded : 1; // only used with MI_DEBUG_GUARDED } x; } mi_page_flags_t; #else @@ -272,7 +278,6 @@ typedef union mi_page_flags_s { struct { uint8_t in_full; uint8_t has_aligned; - uint8_t has_guarded; // only used with MI_DEBUG_GUARDED } x; } mi_page_flags_t; #endif @@ -556,6 +561,13 @@ struct mi_heap_s { mi_heap_t* next; // list of heaps per thread bool no_reclaim; // `true` if this heap should not reclaim abandoned pages uint8_t tag; // custom tag, can be used for separating heaps based on the object types + #if MI_GUARDED + size_t guarded_size_min; // minimal size for guarded objects + size_t guarded_size_max; // maximal size for guarded objects + size_t guarded_sample_rate; // sample rate (set to 0 to disable guarded pages) + size_t guarded_sample_seed; // starting sample count + size_t guarded_sample_count; // current sample count (counting down to 0) + #endif mi_page_t* pages_free_direct[MI_PAGES_DIRECT]; // optimize: array where every entry points a page with possibly free blocks in the corresponding queue for that size. mi_page_queue_t pages[MI_BIN_FULL + 1]; // queue of pages for each size class (or "bin") }; @@ -649,6 +661,7 @@ typedef struct mi_stats_s { mi_stat_counter_t arena_count; mi_stat_counter_t arena_crossover_count; mi_stat_counter_t arena_rollback_count; + mi_stat_counter_t guarded_alloc_count; #if MI_STAT>1 mi_stat_count_t normal_bins[MI_BIN_HUGE+1]; #endif diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index 3d987bdd..9b5a6bd1 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -20,14 +20,36 @@ static bool mi_malloc_is_naturally_aligned( size_t size, size_t alignment ) { mi_assert_internal(_mi_is_power_of_two(alignment) && (alignment > 0)); if (alignment > size) return false; if (alignment <= MI_MAX_ALIGN_SIZE) return true; - #if MI_DEBUG_GUARDED - return false; - #else const size_t bsize = mi_good_size(size); return (bsize <= MI_MAX_ALIGN_GUARANTEE && (bsize & (alignment-1)) == 0); - #endif } +#if MI_GUARDED +static mi_decl_restrict void* mi_heap_malloc_guarded_aligned(mi_heap_t* heap, size_t size, size_t alignment, bool zero) mi_attr_noexcept { + // use over allocation for guarded blocksl + mi_assert_internal(alignment > 0 && alignment < MI_BLOCK_ALIGNMENT_MAX); + const size_t oversize = size + alignment - 1; + void* base = _mi_heap_malloc_guarded(heap, oversize, zero); + void* p = mi_align_up_ptr(base, alignment); + mi_track_align(base, p, (uint8_t*)p - (uint8_t*)base, size); + mi_assert_internal(mi_usable_size(p) >= size); + mi_assert_internal(_mi_is_aligned(p, alignment)); + return p; +} + +static void* mi_heap_malloc_zero_no_guarded(mi_heap_t* heap, size_t size, bool zero) { + const size_t rate = heap->guarded_sample_rate; + heap->guarded_sample_rate = 0; + void* p = _mi_heap_malloc_zero(heap, size, zero); + heap->guarded_sample_rate = rate; + return p; +} +#else +static void* mi_heap_malloc_zero_no_guarded(mi_heap_t* heap, size_t size, bool zero) { + return _mi_heap_malloc_zero(heap, size, zero); +} +#endif + // Fallback aligned allocation that over-allocates -- split out for better codegen static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_overalloc(mi_heap_t* const heap, const size_t size, const size_t alignment, const size_t offset, const bool zero) mi_attr_noexcept { @@ -48,6 +70,7 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_overalloc(mi_heap_t return NULL; } oversize = (size <= MI_SMALL_SIZE_MAX ? MI_SMALL_SIZE_MAX + 1 /* ensure we use generic malloc path */ : size); + // note: no guarded as alignment > 0 p = _mi_heap_malloc_zero_ex(heap, oversize, false, alignment); // the page block size should be large enough to align in the single huge page block // zero afterwards as only the area from the aligned_p may be committed! if (p == NULL) return NULL; @@ -55,11 +78,11 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_overalloc(mi_heap_t else { // otherwise over-allocate oversize = size + alignment - 1; - p = _mi_heap_malloc_zero(heap, oversize, zero); + p = mi_heap_malloc_zero_no_guarded(heap, oversize, zero); if (p == NULL) return NULL; } mi_page_t* page = _mi_ptr_page(p); - + // .. and align within the allocation const uintptr_t align_mask = alignment - 1; // for any x, `(x & align_mask) == (x % alignment)` const uintptr_t poffset = ((uintptr_t)p + offset) & align_mask; @@ -68,6 +91,13 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_overalloc(mi_heap_t void* aligned_p = (void*)((uintptr_t)p + adjust); if (aligned_p != p) { mi_page_set_has_aligned(page, true); + #if MI_GUARDED + // set tag to aligned so mi_usable_size works with guard pages + if (adjust >= sizeof(mi_block_t)) { + mi_block_t* const block = (mi_block_t*)p; + block->next = MI_BLOCK_TAG_ALIGNED; + } + #endif _mi_padding_shrink(page, (mi_block_t*)p, adjust + size); } // todo: expand padding if overallocated ? @@ -76,8 +106,10 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_overalloc(mi_heap_t mi_assert_internal(((uintptr_t)aligned_p + offset) % alignment == 0); mi_assert_internal(mi_usable_size(aligned_p)>=size); mi_assert_internal(mi_usable_size(p) == mi_usable_size(aligned_p)+adjust); - #if !MI_DEBUG_GUARDED - mi_assert_internal(p == _mi_page_ptr_unalign(_mi_ptr_page(aligned_p), aligned_p)); + #if MI_DEBUG > 1 + mi_page_t* const apage = _mi_ptr_page(aligned_p); + void* unalign_p = _mi_page_ptr_unalign(apage, aligned_p); + mi_assert_internal(p == unalign_p); #endif // now zero the block if needed @@ -91,6 +123,9 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_overalloc(mi_heap_t if (p != aligned_p) { mi_track_align(p,aligned_p,adjust,mi_usable_size(aligned_p)); + #if MI_GUARDED + mi_track_mem_defined(p, sizeof(mi_block_t)); + #endif } return aligned_p; } @@ -100,27 +135,27 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_generic(mi_heap_t* { mi_assert_internal(alignment != 0 && _mi_is_power_of_two(alignment)); // we don't allocate more than MI_MAX_ALLOC_SIZE (see ) - if mi_unlikely(size > (MI_MAX_ALLOC_SIZE - MI_PADDING_SIZE)) { + if mi_unlikely(size > (MI_MAX_ALLOC_SIZE - MI_PADDING_SIZE)) { #if MI_DEBUG > 0 _mi_error_message(EOVERFLOW, "aligned allocation request is too large (size %zu, alignment %zu)\n", size, alignment); #endif return NULL; } - + // use regular allocation if it is guaranteed to fit the alignment constraints. // this is important to try as the fast path in `mi_heap_malloc_zero_aligned` only works when there exist // a page with the right block size, and if we always use the over-alloc fallback that would never happen. if (offset == 0 && mi_malloc_is_naturally_aligned(size,alignment)) { - void* p = _mi_heap_malloc_zero(heap, size, zero); + void* p = mi_heap_malloc_zero_no_guarded(heap, size, zero); mi_assert_internal(p == NULL || ((uintptr_t)p % alignment) == 0); - const bool is_aligned_or_null = (((uintptr_t)p) & (alignment-1))==0; + const bool is_aligned_or_null = (((uintptr_t)p) & (alignment-1))==0; if mi_likely(is_aligned_or_null) { return p; } else { // this should never happen if the `mi_malloc_is_naturally_aligned` check is correct.. mi_assert(false); - mi_free(p); + mi_free(p); } } @@ -128,6 +163,7 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_generic(mi_heap_t* return mi_heap_malloc_zero_aligned_at_overalloc(heap,size,alignment,offset,zero); } + // Primitive aligned allocation static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t size, const size_t alignment, const size_t offset, const bool zero) mi_attr_noexcept { @@ -138,12 +174,17 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t #endif return NULL; } - - #if !MI_DEBUG_GUARDED + + #if MI_GUARDED + if (offset==0 && alignment < MI_BLOCK_ALIGNMENT_MAX && mi_heap_malloc_use_guarded(heap,size)) { + return mi_heap_malloc_guarded_aligned(heap, size, alignment, zero); + } + #endif + // try first if there happens to be a small block available with just the right alignment if mi_likely(size <= MI_SMALL_SIZE_MAX && alignment <= size) { const uintptr_t align_mask = alignment-1; // for any x, `(x & align_mask) == (x % alignment)` - const size_t padsize = size + MI_PADDING_SIZE; + const size_t padsize = size + MI_PADDING_SIZE; mi_page_t* page = _mi_heap_get_free_small_page(heap, padsize); if mi_likely(page->free != NULL) { const bool is_aligned = (((uintptr_t)page->free + offset) & align_mask)==0; @@ -160,7 +201,6 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t } } } - #endif // fallback to generic aligned allocation return mi_heap_malloc_zero_aligned_at_generic(heap, size, alignment, offset, zero); @@ -318,3 +358,5 @@ mi_decl_nodiscard void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t mi_decl_nodiscard void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept { return mi_heap_recalloc_aligned(mi_prim_get_default_heap(), p, newcount, size, alignment); } + + diff --git a/src/alloc.c b/src/alloc.c index 70767e5b..ffa7b8b7 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -31,22 +31,22 @@ terms of the MIT license. A copy of the license can be found in the file extern inline void* _mi_page_malloc_zero(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept { mi_assert_internal(page->block_size == 0 /* empty heap */ || mi_page_block_size(page) >= size); - + // check the free list mi_block_t* const block = page->free; if mi_unlikely(block == NULL) { return _mi_malloc_generic(heap, size, zero, 0); } mi_assert_internal(block != NULL && _mi_ptr_page(block) == page); - + // pop from the free list page->free = mi_block_next(page, block); page->used++; mi_assert_internal(page->free == NULL || _mi_ptr_page(page->free) == page); mi_assert_internal(page->block_size < MI_MAX_ALIGN_SIZE || _mi_is_aligned(block, MI_MAX_ALIGN_SIZE)); - + #if MI_DEBUG>3 - if (page->free_is_zero && size > sizeof(*block)) { + if (page->free_is_zero && size > sizeof(*block)) { mi_assert_expensive(mi_mem_is_zero(block+1,size - sizeof(*block))); } #endif @@ -121,10 +121,8 @@ extern void* _mi_page_malloc_zeroed(mi_heap_t* heap, mi_page_t* page, size_t siz return _mi_page_malloc_zero(heap,page,size,true); } -#if MI_DEBUG_GUARDED -static mi_decl_restrict void* mi_heap_malloc_guarded(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept; -static inline bool mi_heap_malloc_use_guarded(size_t size, bool has_huge_alignment); -static inline bool mi_heap_malloc_small_use_guarded(size_t size); +#if MI_GUARDED +mi_decl_restrict void* _mi_heap_malloc_guarded(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept; #endif static inline mi_decl_restrict void* mi_heap_malloc_small_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept { @@ -134,11 +132,13 @@ static inline mi_decl_restrict void* mi_heap_malloc_small_zero(mi_heap_t* heap, const uintptr_t tid = _mi_thread_id(); mi_assert(heap->thread_id == 0 || heap->thread_id == tid); // heaps are thread local #endif - #if (MI_PADDING || MI_DEBUG_GUARDED) + #if (MI_PADDING || MI_GUARDED) if (size == 0) { size = sizeof(void*); } #endif - #if MI_DEBUG_GUARDED - if (mi_heap_malloc_small_use_guarded(size)) { return mi_heap_malloc_guarded(heap, size, zero); } + #if MI_GUARDED + if (mi_heap_malloc_use_guarded(heap,size)) { + return _mi_heap_malloc_guarded(heap, size, zero); + } #endif // get page in constant time, and allocate from it @@ -171,13 +171,15 @@ mi_decl_nodiscard extern inline mi_decl_restrict void* mi_malloc_small(size_t si // The main allocation function extern inline void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept { - // fast path for small objects + // fast path for small objects if mi_likely(size <= MI_SMALL_SIZE_MAX) { mi_assert_internal(huge_alignment == 0); return mi_heap_malloc_small_zero(heap, size, zero); } - #if MI_DEBUG_GUARDED - else if (mi_heap_malloc_use_guarded(size,huge_alignment>0)) { return mi_heap_malloc_guarded(heap, size, zero); } + #if MI_GUARDED + else if (huge_alignment==0 && mi_heap_malloc_use_guarded(heap,size)) { + return _mi_heap_malloc_guarded(heap, size, zero); + } #endif else { // regular allocation @@ -185,7 +187,7 @@ extern inline void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool z mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local void* const p = _mi_malloc_generic(heap, size + MI_PADDING_SIZE, zero, huge_alignment); // note: size can overflow but it is detected in malloc_generic mi_track_malloc(p,size,zero); - + #if MI_STAT>1 if (p != NULL) { if (!mi_heap_is_initialized(heap)) { heap = mi_prim_get_default_heap(); } @@ -601,69 +603,73 @@ mi_decl_nodiscard void* mi_new_reallocn(void* p, size_t newcount, size_t size) { } } -#if MI_DEBUG_GUARDED -static inline bool mi_heap_malloc_small_use_guarded(size_t size) { - return (size <= (size_t)_mi_option_get_fast(mi_option_debug_guarded_max) - && size >= (size_t)_mi_option_get_fast(mi_option_debug_guarded_min)); +#if MI_GUARDED +// We always allocate a guarded allocation at an offset (`mi_page_has_aligned` will be true). +// We then set the first word of the block to `0` for regular offset aligned allocations (in `alloc-aligned.c`) +// and the first word to `~0` for guarded allocations to have a correct `mi_usable_size` + +static void* mi_block_ptr_set_guarded(mi_block_t* block, size_t obj_size) { + // TODO: we can still make padding work by moving it out of the guard page area + mi_page_t* const page = _mi_ptr_page(block); + mi_page_set_has_aligned(page, true); + block->next = MI_BLOCK_TAG_GUARDED; + + // set guard page at the end of the block + mi_segment_t* const segment = _mi_page_segment(page); + const size_t block_size = mi_page_block_size(page); // must use `block_size` to match `mi_free_local` + const size_t os_page_size = _mi_os_page_size(); + mi_assert_internal(block_size >= obj_size + os_page_size + sizeof(mi_block_t)); + if (block_size < obj_size + os_page_size + sizeof(mi_block_t)) { + // should never happen + mi_free(block); + return NULL; + } + uint8_t* guard_page = (uint8_t*)block + block_size - os_page_size; + mi_assert_internal(_mi_is_aligned(guard_page, os_page_size)); + if (segment->allow_decommit && _mi_is_aligned(guard_page, os_page_size)) { + _mi_os_protect(guard_page, os_page_size); + } + else { + _mi_warning_message("unable to set a guard page behind an object due to pinned memory (large OS pages?) (object %p of size %zu)\n", block, block_size); + } + + // align pointer just in front of the guard page + size_t offset = block_size - os_page_size - obj_size; + mi_assert_internal(offset > sizeof(mi_block_t)); + if (offset > MI_BLOCK_ALIGNMENT_MAX) { + // give up to place it right in front of the guard page if the offset is too large for unalignment + offset = MI_BLOCK_ALIGNMENT_MAX; + } + void* p = (uint8_t*)block + offset; + mi_track_align(block, p, offset, obj_size); + mi_track_mem_defined(block, sizeof(mi_block_t)); + return p; } -static inline bool mi_heap_malloc_use_guarded(size_t size, bool has_huge_alignment) { - return (!has_huge_alignment // guarded pages do not work with huge aligments at the moment - && _mi_option_get_fast(mi_option_debug_guarded_max) > 0 // guarded must be enabled - && (mi_heap_malloc_small_use_guarded(size) - || ((mi_good_size(size) & (_mi_os_page_size() - 1)) == 0)) // page-size multiple are always guarded so we can have a correct `mi_usable_size`. - ); -} - -static mi_decl_restrict void* mi_heap_malloc_guarded(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept +mi_decl_restrict void* _mi_heap_malloc_guarded(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept { #if defined(MI_PADDING_SIZE) mi_assert(MI_PADDING_SIZE==0); #endif // allocate multiple of page size ending in a guard page - const size_t obj_size = _mi_align_up(size, MI_MAX_ALIGN_SIZE); // ensure minimal alignment requirement + // ensure minimal alignment requirement? const size_t os_page_size = _mi_os_page_size(); - const size_t req_size = _mi_align_up(obj_size + os_page_size, os_page_size); - void* const block = _mi_malloc_generic(heap, req_size, zero, 0 /* huge_alignment */); + const size_t obj_size = (mi_option_is_enabled(mi_option_guarded_precise) ? size : _mi_align_up(size, MI_MAX_ALIGN_SIZE)); + const size_t bsize = _mi_align_up(_mi_align_up(obj_size, MI_MAX_ALIGN_SIZE) + sizeof(mi_block_t), MI_MAX_ALIGN_SIZE); + const size_t req_size = _mi_align_up(bsize + os_page_size, os_page_size); + mi_block_t* const block = (mi_block_t*)_mi_malloc_generic(heap, req_size, zero, 0 /* huge_alignment */); if (block==NULL) return NULL; - mi_page_t* page = _mi_ptr_page(block); - mi_segment_t* segment = _mi_page_segment(page); - - const size_t block_size = mi_page_block_size(page); // must use `block_size` to match `mi_free_local` - void* const guard_page = (uint8_t*)block + (block_size - os_page_size); - mi_assert_internal(_mi_is_aligned(guard_page, os_page_size)); - - // place block in front of the guard page - size_t offset = block_size - os_page_size - obj_size; - if (offset > MI_BLOCK_ALIGNMENT_MAX) { - // give up to place it right in front of the guard page if the offset is too large for unalignment - offset = MI_BLOCK_ALIGNMENT_MAX; - } - void* const p = (uint8_t*)block + offset; - mi_assert_internal(p>=block); - - // set page flags - if (offset > 0) { - mi_page_set_has_aligned(page, true); - } - - // set guard page - if (segment->allow_decommit) { - mi_page_set_has_guarded(page, true); - _mi_os_protect(guard_page, os_page_size); - } - else { - _mi_warning_message("unable to set a guard page behind an object due to pinned memory (large OS pages?) (object %p of size %zu)\n", p, size); - } + void* const p = mi_block_ptr_set_guarded(block, obj_size); // stats - mi_track_malloc(p, size, zero); - #if MI_STAT>1 + mi_track_malloc(p, size, zero); if (p != NULL) { if (!mi_heap_is_initialized(heap)) { heap = mi_prim_get_default_heap(); } + #if MI_STAT>1 mi_heap_stat_increase(heap, malloc, mi_usable_size(p)); + #endif + _mi_stat_counter_increase(&heap->tld->stats.guarded_alloc_count, 1); } - #endif #if MI_DEBUG>3 if (p != NULL && zero) { mi_assert_expensive(mi_mem_is_zero(p, size)); diff --git a/src/arena.c b/src/arena.c index d2039623..686500b4 100644 --- a/src/arena.c +++ b/src/arena.c @@ -293,7 +293,7 @@ static void* mi_arena_try_alloc_at_id(mi_arena_id_t arena_id, bool match_numa_no bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld ) { MI_UNUSED_RELEASE(alignment); - mi_assert_internal(alignment <= MI_SEGMENT_ALIGN); + mi_assert(alignment <= MI_SEGMENT_ALIGN); const size_t bcount = mi_block_count_of_size(size); const size_t arena_index = mi_arena_id_index(arena_id); mi_assert_internal(arena_index < mi_atomic_load_relaxed(&mi_arena_count)); diff --git a/src/free.c b/src/free.c index ad162915..f856da77 100644 --- a/src/free.c +++ b/src/free.c @@ -34,7 +34,7 @@ static inline void mi_free_block_local(mi_page_t* page, mi_block_t* block, bool if mi_unlikely(mi_check_is_double_free(page, block)) return; mi_check_padding(page, block); if (track_stats) { mi_stat_free(page, block); } - #if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN && !MI_DEBUG_GUARDED + #if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN && !MI_GUARDED if (!mi_page_is_huge(page)) { // huge page content may be already decommitted memset(block, MI_DEBUG_FREED, mi_page_block_size(page)); } @@ -71,21 +71,30 @@ mi_block_t* _mi_page_ptr_unalign(const mi_page_t* page, const void* p) { return (mi_block_t*)((uintptr_t)p - adjust); } -// forward declaration for a MI_DEBUG_GUARDED build -static void mi_block_unguard(mi_page_t* page, mi_block_t* block); +// forward declaration for a MI_GUARDED build +#if MI_GUARDED +static void mi_block_unguard(mi_page_t* page, mi_block_t* block, void* p); // forward declaration +static inline void mi_block_check_unguard(mi_page_t* page, mi_block_t* block, void* p) { + if (mi_block_ptr_is_guarded(block, p)) { mi_block_unguard(page, block, p); } +} +#else +static inline void mi_block_check_unguard(mi_page_t* page, mi_block_t* block, void* p) { + MI_UNUSED(page); MI_UNUSED(block); MI_UNUSED(p); +} +#endif // free a local pointer (page parameter comes first for better codegen) static void mi_decl_noinline mi_free_generic_local(mi_page_t* page, mi_segment_t* segment, void* p) mi_attr_noexcept { MI_UNUSED(segment); mi_block_t* const block = (mi_page_has_aligned(page) ? _mi_page_ptr_unalign(page, p) : (mi_block_t*)p); - mi_block_unguard(page,block); + mi_block_check_unguard(page, block, p); mi_free_block_local(page, block, true /* track stats */, true /* check for a full page */); } // free a pointer owned by another thread (page parameter comes first for better codegen) static void mi_decl_noinline mi_free_generic_mt(mi_page_t* page, mi_segment_t* segment, void* p) mi_attr_noexcept { mi_block_t* const block = _mi_page_ptr_unalign(page, p); // don't check `has_aligned` flag to avoid a race (issue #865) - mi_block_unguard(page, block); + mi_block_check_unguard(page, block, p); mi_free_block_mt(page, segment, block); } @@ -102,17 +111,17 @@ static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* ms { MI_UNUSED(msg); -#if (MI_DEBUG>0) - if mi_unlikely(((uintptr_t)p & (MI_INTPTR_SIZE - 1)) != 0) { + #if (MI_DEBUG>0) + if mi_unlikely(((uintptr_t)p & (MI_INTPTR_SIZE - 1)) != 0 && !mi_option_is_enabled(mi_option_guarded_precise)) { _mi_error_message(EINVAL, "%s: invalid (unaligned) pointer: %p\n", msg, p); return NULL; } -#endif + #endif mi_segment_t* const segment = _mi_ptr_segment(p); if mi_unlikely(segment==NULL) return segment; -#if (MI_DEBUG>0) + #if (MI_DEBUG>0) if mi_unlikely(!mi_is_in_heap_region(p)) { #if (MI_INTPTR_SIZE == 8 && defined(__linux__)) if (((uintptr_t)p >> 40) != 0x7F) { // linux tends to align large blocks above 0x7F000000000 (issue #640) @@ -126,13 +135,13 @@ static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* ms } } } -#endif -#if (MI_DEBUG>0 || MI_SECURE>=4) + #endif + #if (MI_DEBUG>0 || MI_SECURE>=4) if mi_unlikely(_mi_ptr_cookie(segment) != segment->cookie) { _mi_error_message(EINVAL, "%s: pointer does not point to a valid heap space: %p\n", msg, p); return NULL; } -#endif + #endif return segment; } @@ -305,20 +314,19 @@ static size_t mi_decl_noinline mi_page_usable_aligned_size_of(const mi_page_t* p const size_t size = mi_page_usable_size_of(page, block); const ptrdiff_t adjust = (uint8_t*)p - (uint8_t*)block; mi_assert_internal(adjust >= 0 && (size_t)adjust <= size); - return (size - adjust); + const size_t aligned_size = (size - adjust); + #if MI_GUARDED + if (mi_block_ptr_is_guarded(block, p)) { + return aligned_size - _mi_os_page_size(); + } + #endif + return aligned_size; } static inline size_t _mi_usable_size(const void* p, const char* msg) mi_attr_noexcept { const mi_segment_t* const segment = mi_checked_ptr_segment(p, msg); if mi_unlikely(segment==NULL) return 0; const mi_page_t* const page = _mi_segment_page_of(segment, p); - #if MI_DEBUG_GUARDED - if (mi_page_has_guarded(page)) { - const size_t bsize = mi_page_usable_aligned_size_of(page, p); - mi_assert_internal(bsize > _mi_os_page_size()); - return (bsize > _mi_os_page_size() ? bsize - _mi_os_page_size() : bsize); - } else - #endif if mi_likely(!mi_page_has_aligned(page)) { const mi_block_t* block = (const mi_block_t*)p; return mi_page_usable_size_of(page, block); @@ -543,23 +551,21 @@ static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) { #endif -// Remove guard page when building with MI_DEBUG_GUARDED -#if !MI_DEBUG_GUARDED -static void mi_block_unguard(mi_page_t* page, mi_block_t* block) { - MI_UNUSED(page); - MI_UNUSED(block); - // do nothing -} -#else -static void mi_block_unguard(mi_page_t* page, mi_block_t* block) { - if (mi_page_has_guarded(page)) { - const size_t bsize = mi_page_block_size(page); - const size_t psize = _mi_os_page_size(); - mi_assert_internal(bsize > psize); - mi_assert_internal(_mi_page_segment(page)->allow_decommit); - void* gpage = (uint8_t*)block + (bsize - psize); - mi_assert_internal(_mi_is_aligned(gpage, psize)); - _mi_os_unprotect(gpage, psize); - } +// Remove guard page when building with MI_GUARDED +#if MI_GUARDED +static void mi_block_unguard(mi_page_t* page, mi_block_t* block, void* p) { + MI_UNUSED(p); + mi_assert_internal(mi_block_ptr_is_guarded(block, p)); + mi_assert_internal(mi_page_has_aligned(page)); + mi_assert_internal((uint8_t*)p - (uint8_t*)block >= (ptrdiff_t)sizeof(mi_block_t)); + mi_assert_internal(block->next == MI_BLOCK_TAG_GUARDED); + + const size_t bsize = mi_page_block_size(page); + const size_t psize = _mi_os_page_size(); + mi_assert_internal(bsize > psize); + mi_assert_internal(_mi_page_segment(page)->allow_decommit); + void* gpage = (uint8_t*)block + bsize - psize; + mi_assert_internal(_mi_is_aligned(gpage, psize)); + _mi_os_unprotect(gpage, psize); } #endif diff --git a/src/heap.c b/src/heap.c index b3fda0f6..154d4b80 100644 --- a/src/heap.c +++ b/src/heap.c @@ -228,6 +228,7 @@ void _mi_heap_init(mi_heap_t* heap, mi_tld_t* tld, mi_arena_id_t arena_id, bool heap->cookie = _mi_heap_random_next(heap) | 1; heap->keys[0] = _mi_heap_random_next(heap); heap->keys[1] = _mi_heap_random_next(heap); + _mi_heap_guarded_init(heap); // push on the thread local heaps list heap->next = heap->tld->heaps; heap->tld->heaps = heap; @@ -381,8 +382,8 @@ void mi_heap_destroy(mi_heap_t* heap) { mi_assert(heap->no_reclaim); mi_assert_expensive(mi_heap_is_valid(heap)); if (heap==NULL || !mi_heap_is_initialized(heap)) return; - #if MI_DEBUG_GUARDED - _mi_warning_message("'mi_heap_destroy' called but ignored as MI_DEBUG_GUARDED is enabled (heap at %p)\n", heap); + #if MI_GUARDED + // _mi_warning_message("'mi_heap_destroy' called but MI_GUARDED is enabled -- using `mi_heap_delete` instead (heap at %p)\n", heap); mi_heap_delete(heap); return; #else diff --git a/src/init.c b/src/init.c index a13edba6..ccaf9445 100644 --- a/src/init.c +++ b/src/init.c @@ -88,7 +88,7 @@ const mi_page_t _mi_page_empty = { { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, \ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, \ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, \ - { 0, 0 } \ + { 0, 0 }, { 0, 0 } \ MI_STAT_COUNT_END_NULL() @@ -125,6 +125,9 @@ mi_decl_cache_align const mi_heap_t _mi_heap_empty = { NULL, // next false, // can reclaim 0, // tag + #if MI_GUARDED + 0, 0, 0, 0, 1, // count is 1 so we never write to it (see `internal.h:mi_heap_malloc_use_guarded`) + #endif MI_SMALL_PAGES_EMPTY, MI_PAGE_QUEUES_EMPTY }; @@ -173,6 +176,9 @@ mi_decl_cache_align mi_heap_t _mi_heap_main = { NULL, // next heap false, // can reclaim 0, // tag + #if MI_GUARDED + 0, 0, 0, 0, 0, + #endif MI_SMALL_PAGES_EMPTY, MI_PAGE_QUEUES_EMPTY }; @@ -181,6 +187,45 @@ bool _mi_process_is_initialized = false; // set to `true` in `mi_process_init`. mi_stats_t _mi_stats_main = { MI_STATS_NULL }; +#if MI_GUARDED +mi_decl_export void mi_heap_guarded_set_sample_rate(mi_heap_t* heap, size_t sample_rate, size_t seed) { + heap->guarded_sample_seed = seed; + if (heap->guarded_sample_seed == 0) { + heap->guarded_sample_seed = _mi_heap_random_next(heap); + } + heap->guarded_sample_rate = sample_rate; + if (heap->guarded_sample_rate >= 1) { + heap->guarded_sample_seed = heap->guarded_sample_seed % heap->guarded_sample_rate; + } + heap->guarded_sample_count = heap->guarded_sample_seed; // count down samples +} + +mi_decl_export void mi_heap_guarded_set_size_bound(mi_heap_t* heap, size_t min, size_t max) { + heap->guarded_size_min = min; + heap->guarded_size_max = (min > max ? min : max); +} + +void _mi_heap_guarded_init(mi_heap_t* heap) { + mi_heap_guarded_set_sample_rate(heap, + (size_t)mi_option_get_clamp(mi_option_guarded_sample_rate, 0, LONG_MAX), + (size_t)mi_option_get(mi_option_guarded_sample_seed)); + mi_heap_guarded_set_size_bound(heap, + (size_t)mi_option_get_clamp(mi_option_guarded_min, 0, LONG_MAX), + (size_t)mi_option_get_clamp(mi_option_guarded_max, 0, LONG_MAX) ); +} +#else +mi_decl_export void mi_heap_guarded_set_sample_rate(mi_heap_t* heap, size_t sample_rate, size_t seed) { + MI_UNUSED(heap); MI_UNUSED(sample_rate); MI_UNUSED(seed); +} + +mi_decl_export void mi_heap_guarded_set_size_bound(mi_heap_t* heap, size_t min, size_t max) { + MI_UNUSED(heap); MI_UNUSED(min); MI_UNUSED(max); +} +void _mi_heap_guarded_init(mi_heap_t* heap) { + MI_UNUSED(heap); +} +#endif + static void mi_heap_main_init(void) { if (_mi_heap_main.cookie == 0) { @@ -196,6 +241,7 @@ static void mi_heap_main_init(void) { _mi_heap_main.keys[1] = _mi_heap_random_next(&_mi_heap_main); mi_lock_init(&mi_subproc_default.abandoned_os_lock); mi_lock_init(&mi_subproc_default.abandoned_os_visit_lock); + _mi_heap_guarded_init(&_mi_heap_main); } } @@ -577,7 +623,7 @@ static void mi_detect_cpu_features(void) { } #else static void mi_detect_cpu_features(void) { - // nothing + // nothing } #endif diff --git a/src/options.c b/src/options.c index ed1cf921..2a816096 100644 --- a/src/options.c +++ b/src/options.c @@ -47,7 +47,9 @@ typedef struct mi_option_desc_s { #define MI_OPTION(opt) mi_option_##opt, #opt, NULL #define MI_OPTION_LEGACY(opt,legacy) mi_option_##opt, #opt, #legacy -// Some options can be set at build time for statically linked libraries (use `-DMI_EXTRA_CPPDEFS="opt1=val1;opt2=val2"`) +// Some options can be set at build time for statically linked libraries +// (use `-DMI_EXTRA_CPPDEFS="opt1=val1;opt2=val2"`) +// // This is useful if we cannot pass them as environment variables // (and setting them programmatically would be too late) @@ -100,14 +102,19 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(show_stats) }, { MI_DEFAULT_VERBOSE, UNINIT, MI_OPTION(verbose) }, - // the following options are experimental and not all combinations make sense. - { MI_DEFAULT_EAGER_COMMIT, UNINIT, MI_OPTION(eager_commit) }, // commit per segment directly (4MiB) (but see also `eager_commit_delay`) - { MI_DEFAULT_ARENA_EAGER_COMMIT, UNINIT, MI_OPTION_LEGACY(arena_eager_commit,eager_region_commit) }, // eager commit arena's? 2 is used to enable this only on an OS that has overcommit (i.e. linux) + // some of the following options are experimental and not all combinations are allowed. + { MI_DEFAULT_EAGER_COMMIT, + UNINIT, MI_OPTION(eager_commit) }, // commit per segment directly (4MiB) (but see also `eager_commit_delay`) + { MI_DEFAULT_ARENA_EAGER_COMMIT, + UNINIT, MI_OPTION_LEGACY(arena_eager_commit,eager_region_commit) }, // eager commit arena's? 2 is used to enable this only on an OS that has overcommit (i.e. linux) { 1, UNINIT, MI_OPTION_LEGACY(purge_decommits,reset_decommits) }, // purge decommits memory (instead of reset) (note: on linux this uses MADV_DONTNEED for decommit) - { MI_DEFAULT_ALLOW_LARGE_OS_PAGES, UNINIT, MI_OPTION_LEGACY(allow_large_os_pages,large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's - { MI_DEFAULT_RESERVE_HUGE_OS_PAGES, UNINIT, MI_OPTION(reserve_huge_os_pages) }, // per 1GiB huge pages + { MI_DEFAULT_ALLOW_LARGE_OS_PAGES, + UNINIT, MI_OPTION_LEGACY(allow_large_os_pages,large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's + { MI_DEFAULT_RESERVE_HUGE_OS_PAGES, + UNINIT, MI_OPTION(reserve_huge_os_pages) }, // per 1GiB huge pages {-1, UNINIT, MI_OPTION(reserve_huge_os_pages_at) }, // reserve huge pages at node N - { MI_DEFAULT_RESERVE_OS_MEMORY, UNINIT, MI_OPTION(reserve_os_memory) }, // reserve N KiB OS memory in advance (use `option_get_size`) + { MI_DEFAULT_RESERVE_OS_MEMORY, + UNINIT, MI_OPTION(reserve_os_memory) }, // reserve N KiB OS memory in advance (use `option_get_size`) { 0, UNINIT, MI_OPTION(deprecated_segment_cache) }, // cache N segments per thread { 0, UNINIT, MI_OPTION(deprecated_page_reset) }, // reset page memory on free { 0, UNINIT, MI_OPTION_LEGACY(abandoned_page_purge,abandoned_page_reset) }, // reset free page memory when a thread terminates @@ -125,19 +132,26 @@ static mi_option_desc_t options[_mi_option_last] = { 32, UNINIT, MI_OPTION(max_warnings) }, // maximum warnings that are output { 10, UNINIT, MI_OPTION(max_segment_reclaim)}, // max. percentage of the abandoned segments to be reclaimed per try. { 0, UNINIT, MI_OPTION(destroy_on_exit)}, // release all OS memory on process exit; careful with dangling pointer or after-exit frees! - { MI_DEFAULT_ARENA_RESERVE, UNINIT, MI_OPTION(arena_reserve) }, // reserve memory N KiB at a time (=1GiB) (use `option_get_size`) + { MI_DEFAULT_ARENA_RESERVE, UNINIT, MI_OPTION(arena_reserve) }, // reserve memory N KiB at a time (=1GiB) (use `option_get_size`) { 10, UNINIT, MI_OPTION(arena_purge_mult) }, // purge delay multiplier for arena's { 1, UNINIT, MI_OPTION_LEGACY(purge_extend_delay, decommit_extend_delay) }, { 1, UNINIT, MI_OPTION(abandoned_reclaim_on_free) },// reclaim an abandoned segment on a free - { MI_DEFAULT_DISALLOW_ARENA_ALLOC, UNINIT, MI_OPTION(disallow_arena_alloc) }, // 1 = do not use arena's for allocation (except if using specific arena id's) + { MI_DEFAULT_DISALLOW_ARENA_ALLOC, UNINIT, MI_OPTION(disallow_arena_alloc) }, // 1 = do not use arena's for allocation (except if using specific arena id's) { 400, UNINIT, MI_OPTION(retry_on_oom) }, // windows only: retry on out-of-memory for N milli seconds (=400), set to 0 to disable retries. -#if defined(MI_VISIT_ABANDONED) +#if defined(MI_VISIT_ABANDONED) { 1, INITIALIZED, MI_OPTION(visit_abandoned) }, // allow visiting heap blocks in abandonded segments; requires taking locks during reclaim. #else - { 0, UNINIT, MI_OPTION(visit_abandoned) }, + { 0, UNINIT, MI_OPTION(visit_abandoned) }, #endif - { 0, UNINIT, MI_OPTION(debug_guarded_min) }, // only used when building with MI_DEBUG_GUARDED: minimal rounded object size for guarded objects - { 0, UNINIT, MI_OPTION(debug_guarded_max) }, // only used when building with MI_DEBUG_GUARDED: maximal rounded object size for guarded objects + { 0, UNINIT, MI_OPTION(guarded_min) }, // only used when building with MI_GUARDED: minimal rounded object size for guarded objects + { MI_GiB, UNINIT, MI_OPTION(guarded_max) }, // only used when building with MI_GUARDED: maximal rounded object size for guarded objects + { 0, UNINIT, MI_OPTION(guarded_precise) }, // disregard minimal alignment requirement to always place guarded blocks exactly in front of a guard page (=0) +#if MI_GUARDED + { 4000,UNINIT, MI_OPTION(guarded_sample_rate)}, // 1 out of N allocations in the min/max range will be guarded(= 1000) +#else + { 0, UNINIT, MI_OPTION(guarded_sample_rate)}, +#endif + { 0, UNINIT, MI_OPTION(guarded_sample_seed)}, { 0, UNINIT, MI_OPTION(target_segments_per_thread) }, // abandon segments beyond this point, or 0 to disable. }; @@ -161,25 +175,25 @@ void _mi_options_init(void) { } mi_max_error_count = mi_option_get(mi_option_max_errors); mi_max_warning_count = mi_option_get(mi_option_max_warnings); - #if MI_DEBUG_GUARDED - if (mi_option_get(mi_option_debug_guarded_max) > 0) { + #if MI_GUARDED + if (mi_option_get(mi_option_guarded_sample_rate) > 0) { if (mi_option_is_enabled(mi_option_allow_large_os_pages)) { mi_option_disable(mi_option_allow_large_os_pages); _mi_warning_message("option 'allow_large_os_pages' is disabled to allow for guarded objects\n"); } } - _mi_verbose_message("guarded build: %s\n", mi_option_get(mi_option_debug_guarded_max) > 0 ? "enabled" : "disabled"); + _mi_verbose_message("guarded build: %s\n", mi_option_get(mi_option_guarded_max) > 0 ? "enabled" : "disabled"); #endif } long _mi_option_get_fast(mi_option_t option) { mi_assert(option >= 0 && option < _mi_option_last); - mi_option_desc_t* desc = &options[option]; + mi_option_desc_t* desc = &options[option]; mi_assert(desc->option == option); // index should match the option //mi_assert(desc->init != UNINIT); return desc->value; } - + mi_decl_nodiscard long mi_option_get(mi_option_t option) { mi_assert(option >= 0 && option < _mi_option_last); @@ -214,11 +228,11 @@ void mi_option_set(mi_option_t option, long value) { desc->value = value; desc->init = INITIALIZED; // ensure min/max range; be careful to not recurse. - if (desc->option == mi_option_debug_guarded_min && _mi_option_get_fast(mi_option_debug_guarded_max) < value) { - mi_option_set(mi_option_debug_guarded_max, value); + if (desc->option == mi_option_guarded_min && _mi_option_get_fast(mi_option_guarded_max) < value) { + mi_option_set(mi_option_guarded_max, value); } - else if (desc->option == mi_option_debug_guarded_max && _mi_option_get_fast(mi_option_debug_guarded_min) > value) { - mi_option_set(mi_option_debug_guarded_min, value); + else if (desc->option == mi_option_guarded_max && _mi_option_get_fast(mi_option_guarded_min) > value) { + mi_option_set(mi_option_guarded_min, value); } } @@ -554,7 +568,7 @@ static void mi_option_init(mi_option_desc_t* desc) { char* end = buf; long value = strtol(buf, &end, 10); if (mi_option_has_size_in_kib(desc->option)) { - // this option is interpreted in KiB to prevent overflow of `long` for large allocations + // this option is interpreted in KiB to prevent overflow of `long` for large allocations // (long is 32-bit on 64-bit windows, which allows for 4TiB max.) size_t size = (value < 0 ? 0 : (size_t)value); bool overflow = false; @@ -569,7 +583,7 @@ static void mi_option_init(mi_option_desc_t* desc) { value = (size > LONG_MAX ? LONG_MAX : (long)size); } if (*end == 0) { - mi_option_set(desc->option, value); + mi_option_set(desc->option, value); } else { // set `init` first to avoid recursion through _mi_warning_message on mimalloc_verbose. diff --git a/src/os.c b/src/os.c index b794b4da..967f5663 100644 --- a/src/os.c +++ b/src/os.c @@ -11,16 +11,33 @@ terms of the MIT license. A copy of the license can be found in the file /* ----------------------------------------------------------- - Initialization. + Initialization. ----------------------------------------------------------- */ +#ifndef MI_DEFAULT_VIRTUAL_ADDRESS_BITS +#if MI_INTPTR_SIZE < 8 +#define MI_DEFAULT_VIRTUAL_ADDRESS_BITS 32 +#else +#define MI_DEFAULT_VIRTUAL_ADDRESS_BITS 48 +#endif +#endif + +#ifndef MI_DEFAULT_PHYSICAL_MEMORY +#if MI_INTPTR_SIZE < 8 +#define MI_DEFAULT_PHYSICAL_MEMORY 4*MI_GiB +#else +#define MI_DEFAULT_PHYSICAL_MEMORY 32*MI_GiB +#endif +#endif static mi_os_mem_config_t mi_os_mem_config = { - 4096, // page size - 0, // large page size (usually 2MiB) - 4096, // allocation granularity - true, // has overcommit? (if true we use MAP_NORESERVE on mmap systems) - false, // can we partially free allocated blocks? (on mmap systems we can free anywhere in a mapped range, but on Windows we must free the entire span) - true // has virtual reserve? (if true we can reserve virtual address space without using commit or physical memory) + 4096, // page size + 0, // large page size (usually 2MiB) + 4096, // allocation granularity + MI_DEFAULT_PHYSICAL_MEMORY, + MI_DEFAULT_VIRTUAL_ADDRESS_BITS, + true, // has overcommit? (if true we use MAP_NORESERVE on mmap systems) + false, // can we partially free allocated blocks? (on mmap systems we can free anywhere in a mapped range, but on Windows we must free the entire span) + true // has virtual reserve? (if true we can reserve virtual address space without using commit or physical memory) }; bool _mi_os_has_overcommit(void) { @@ -76,9 +93,9 @@ bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats aligned hinting -------------------------------------------------------------- */ -// On 64-bit systems, we can do efficient aligned allocation by using -// the 2TiB to 30TiB area to allocate those. We assume we have -// at least 48 bits of virtual address space on 64-bit systems (but see issue #939) +// On systems with enough virtual address bits, we can do efficient aligned allocation by using +// the 2TiB to 30TiB area to allocate those. If we have at least 46 bits of virtual address +// space (64TiB) we use this technique. (but see issue #939) #if (MI_INTPTR_SIZE >= 8) && !defined(MI_NO_ALIGNED_HINT) static mi_decl_cache_align _Atomic(uintptr_t)aligned_base; @@ -96,6 +113,7 @@ static mi_decl_cache_align _Atomic(uintptr_t)aligned_base; void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size) { if (try_alignment <= 1 || try_alignment > MI_SEGMENT_SIZE) return NULL; + if (mi_os_mem_config.virtual_address_bits < 46) return NULL; // < 64TiB virtual address space size = _mi_align_up(size, MI_SEGMENT_SIZE); if (size > 1*MI_GiB) return NULL; // guarantee the chance of fixed valid address is at most 1/(MI_HINT_AREA / 1<<30) = 1/4096. #if (MI_SECURE>0) @@ -181,7 +199,8 @@ void _mi_os_free(void* p, size_t size, mi_memid_t memid, mi_stats_t* stats) { -------------------------------------------------------------- */ // Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned. -static void* mi_os_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* tld_stats) { +// Also `hint_addr` is a hint and may be ignored. +static void* mi_os_prim_alloc_at(void* hint_addr, size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* tld_stats) { mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0); mi_assert_internal(is_zero != NULL); mi_assert_internal(is_large != NULL); @@ -190,9 +209,9 @@ static void* mi_os_prim_alloc(size_t size, size_t try_alignment, bool commit, bo if (try_alignment == 0) { try_alignment = 1; } // avoid 0 to ensure there will be no divide by zero when aligning *is_zero = false; void* p = NULL; - int err = _mi_prim_alloc(size, try_alignment, commit, allow_large, is_large, is_zero, &p); + int err = _mi_prim_alloc(hint_addr, size, try_alignment, commit, allow_large, is_large, is_zero, &p); if (err != 0) { - _mi_warning_message("unable to allocate OS memory (error: %d (0x%x), size: 0x%zx bytes, align: 0x%zx, commit: %d, allow large: %d)\n", err, err, size, try_alignment, commit, allow_large); + _mi_warning_message("unable to allocate OS memory (error: %d (0x%x), addr: %p, size: 0x%zx bytes, align: 0x%zx, commit: %d, allow large: %d)\n", err, err, hint_addr, size, try_alignment, commit, allow_large); } MI_UNUSED(tld_stats); @@ -212,6 +231,10 @@ static void* mi_os_prim_alloc(size_t size, size_t try_alignment, bool commit, bo return p; } +static void* mi_os_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* tld_stats) { + return mi_os_prim_alloc_at(NULL, size, try_alignment, commit, allow_large, is_large, is_zero, tld_stats); +} + // Primitive aligned allocation from the OS. // This function guarantees the allocated memory is aligned. @@ -235,7 +258,9 @@ static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit } else { // if not aligned, free it, overallocate, and unmap around it + #if !MI_TRACK_ASAN _mi_warning_message("unable to allocate aligned OS memory directly, fall back to over-allocation (size: 0x%zx bytes, address: %p, alignment: 0x%zx, commit: %d)\n", size, p, alignment, commit); + #endif mi_os_prim_free(p, size, commit, stats); if (size >= (SIZE_MAX - alignment)) return NULL; // overflow const size_t over_size = size + alignment; @@ -261,7 +286,7 @@ static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit p = mi_os_prim_alloc(over_size, 1, commit, false, is_large, is_zero, stats); if (p == NULL) return NULL; - // and selectively unmap parts around the over-allocated area. + // and selectively unmap parts around the over-allocated area. void* aligned_p = mi_align_up_ptr(p, alignment); size_t pre_size = (uint8_t*)aligned_p - (uint8_t*)p; size_t mid_size = _mi_align_up(size, _mi_os_page_size()); diff --git a/src/page.c b/src/page.c index 5671c7d4..6ae4f172 100644 --- a/src/page.c +++ b/src/page.c @@ -436,9 +436,6 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) { // no more aligned blocks in here mi_page_set_has_aligned(page, false); - #if MI_DEBUG_GUARDED - mi_page_set_has_guarded(page, false); - #endif mi_heap_t* heap = mi_page_heap(page); @@ -467,9 +464,6 @@ void _mi_page_retire(mi_page_t* page) mi_attr_noexcept { mi_assert_internal(mi_page_all_free(page)); mi_page_set_has_aligned(page, false); - #if MI_DEBUG_GUARDED - mi_page_set_has_guarded(page, false); - #endif // don't retire too often.. // (or we end up retiring and re-allocating most of the time) diff --git a/src/prim/emscripten/prim.c b/src/prim/emscripten/prim.c index 944c0cb4..82147de7 100644 --- a/src/prim/emscripten/prim.c +++ b/src/prim/emscripten/prim.c @@ -71,8 +71,8 @@ int _mi_prim_free(void* addr, size_t size) { extern void* emmalloc_memalign(size_t alignment, size_t size); // Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned. -int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) { - MI_UNUSED(try_alignment); MI_UNUSED(allow_large); MI_UNUSED(commit); +int _mi_prim_alloc(void* hint_addr, size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) { + MI_UNUSED(try_alignment); MI_UNUSED(allow_large); MI_UNUSED(commit); MI_UNUSED(hint_addr); *is_large = false; // TODO: Track the highest address ever seen; first uses of it are zeroes. // That assumes no one else uses sbrk but us (they could go up, diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c index a6628fb7..6c224cb0 100644 --- a/src/prim/unix/prim.c +++ b/src/prim/unix/prim.c @@ -139,6 +139,12 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config ) if (psize > 0) { config->page_size = (size_t)psize; config->alloc_granularity = (size_t)psize; + #if defined(_SC_PHYS_PAGES) + long pphys = sysconf(_SC_PHYS_PAGES); + if (pphys > 0 && (size_t)pphys < (SIZE_MAX/(size_t)psize)) { + config->physical_memory = (size_t)pphys * (size_t)psize; + } + #endif } config->large_page_size = 2*MI_MiB; // TODO: can we query the OS for this? config->has_overcommit = unix_detect_overcommit(); @@ -351,14 +357,14 @@ static void* unix_mmap(void* addr, size_t size, size_t try_alignment, int protec } // Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned. -int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) { +int _mi_prim_alloc(void* hint_addr, size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) { mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0); mi_assert_internal(commit || !allow_large); mi_assert_internal(try_alignment > 0); *is_zero = true; int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE); - *addr = unix_mmap(NULL, size, try_alignment, protect_flags, false, allow_large, is_large); + *addr = unix_mmap(hint_addr, size, try_alignment, protect_flags, false, allow_large, is_large); return (*addr != NULL ? 0 : errno); } diff --git a/src/prim/wasi/prim.c b/src/prim/wasi/prim.c index 5d7a8132..e1e7de5e 100644 --- a/src/prim/wasi/prim.c +++ b/src/prim/wasi/prim.c @@ -119,8 +119,8 @@ static void* mi_prim_mem_grow(size_t size, size_t try_alignment) { } // Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned. -int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) { - MI_UNUSED(allow_large); MI_UNUSED(commit); +int _mi_prim_alloc(void* hint_addr, size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) { + MI_UNUSED(allow_large); MI_UNUSED(commit); MI_UNUSED(hint_addr); *is_large = false; *is_zero = false; *addr = mi_prim_mem_grow(size, try_alignment); diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c index 385354fc..1d3d6f41 100644 --- a/src/prim/windows/prim.c +++ b/src/prim/windows/prim.c @@ -118,6 +118,18 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config ) GetSystemInfo(&si); if (si.dwPageSize > 0) { config->page_size = si.dwPageSize; } if (si.dwAllocationGranularity > 0) { config->alloc_granularity = si.dwAllocationGranularity; } + // get virtual address bits + if ((uintptr_t)si.lpMaximumApplicationAddress > 0) { + const size_t vbits = MI_INTPTR_BITS - mi_clz((uintptr_t)si.lpMaximumApplicationAddress); + config->virtual_address_bits = vbits; + } + // get physical memory + ULONGLONG memInKiB = 0; + if (GetPhysicallyInstalledSystemMemory(&memInKiB)) { + if (memInKiB > 0 && memInKiB < (SIZE_MAX / MI_KiB)) { + config->physical_memory = memInKiB * MI_KiB; + } + } // get the VirtualAlloc2 function HINSTANCE hDll; hDll = LoadLibrary(TEXT("kernelbase.dll")); @@ -191,7 +203,7 @@ static void* win_virtual_alloc_prim_once(void* addr, size_t size, size_t try_ali } #endif // on modern Windows try use VirtualAlloc2 for aligned allocation - if (try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0 && pVirtualAlloc2 != NULL) { + if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0 && pVirtualAlloc2 != NULL) { MI_MEM_ADDRESS_REQUIREMENTS reqs = { 0, 0, 0 }; reqs.Alignment = try_alignment; MI_MEM_EXTENDED_PARAMETER param = { {0, 0}, {0} }; @@ -279,14 +291,14 @@ static void* win_virtual_alloc(void* addr, size_t size, size_t try_alignment, DW return p; } -int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) { +int _mi_prim_alloc(void* hint_addr, size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) { mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0); mi_assert_internal(commit || !allow_large); mi_assert_internal(try_alignment > 0); *is_zero = true; int flags = MEM_RESERVE; if (commit) { flags |= MEM_COMMIT; } - *addr = win_virtual_alloc(NULL, size, try_alignment, flags, false, allow_large, is_large); + *addr = win_virtual_alloc(hint_addr, size, try_alignment, flags, false, allow_large, is_large); return (*addr != NULL ? 0 : (int)GetLastError()); } @@ -617,8 +629,8 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) { _mi_process_done(); } else if (reason==DLL_THREAD_DETACH && !_mi_is_redirected()) { - _mi_thread_done(NULL); - } + _mi_thread_done(NULL); + } } @@ -681,7 +693,7 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) { #pragma data_seg() #pragma data_seg(".CRT$XLY") PIMAGE_TLS_CALLBACK _mi_tls_callback_post[] = { &mi_win_main_detach }; - #pragma data_seg() + #pragma data_seg() #endif #if defined(__cplusplus) @@ -695,13 +707,13 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) { MI_UNUSED(heap); } -#else // deprecated: statically linked, use fiber api +#else // deprecated: statically linked, use fiber api #if defined(_MSC_VER) // on clang/gcc use the constructor attribute (in `src/prim/prim.c`) // MSVC: use data section magic for static libraries // See #define MI_PRIM_HAS_PROCESS_ATTACH 1 - + static int mi_process_attach(void) { mi_win_main(NULL,DLL_PROCESS_ATTACH,NULL); atexit(&_mi_process_done); @@ -754,9 +766,9 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) { } #endif -// ---------------------------------------------------- +// ---------------------------------------------------- // Communicate with the redirection module on Windows -// ---------------------------------------------------- +// ---------------------------------------------------- #if defined(MI_SHARED_LIB) && !defined(MI_WIN_NOREDIRECT) #define MI_PRIM_HAS_ALLOCATOR_INIT 1 diff --git a/src/stats.c b/src/stats.c index a9364027..a2d97e94 100644 --- a/src/stats.c +++ b/src/stats.c @@ -119,6 +119,7 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) { mi_stat_counter_add(&stats->normal_count, &src->normal_count, 1); mi_stat_counter_add(&stats->huge_count, &src->huge_count, 1); mi_stat_counter_add(&stats->large_count, &src->large_count, 1); + mi_stat_counter_add(&stats->guarded_alloc_count, &src->guarded_alloc_count, 1); #if MI_STAT>1 for (size_t i = 0; i <= MI_BIN_HUGE; i++) { if (src->normal_bins[i].allocated > 0 || src->normal_bins[i].freed > 0) { @@ -345,6 +346,7 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0) mi_stat_counter_print(&stats->commit_calls, "commits", out, arg); mi_stat_counter_print(&stats->reset_calls, "resets", out, arg); mi_stat_counter_print(&stats->purge_calls, "purges", out, arg); + mi_stat_counter_print(&stats->guarded_alloc_count, "guarded", out, arg); mi_stat_print(&stats->threads, "threads", -1, out, arg); mi_stat_counter_print_avg(&stats->searches, "searches", out, arg); _mi_fprintf(out, arg, "%10s: %5zu\n", "numa nodes", _mi_os_numa_node_count()); diff --git a/test/main-override-static.c b/test/main-override-static.c index 07af1090..b2b6ee20 100644 --- a/test/main-override-static.c +++ b/test/main-override-static.c @@ -20,12 +20,9 @@ static void test_reserved(void); static void negative_stat(void); static void alloc_huge(void); static void test_heap_walk(void); -<<<<<<< HEAD static void test_heap_arena(void); static void test_align(void); -======= static void test_canary_leak(void); ->>>>>>> dev // static void test_large_pages(void); int main() { diff --git a/test/main-override.cpp b/test/main-override.cpp index 5e8b6f82..fc9c3f22 100644 --- a/test/main-override.cpp +++ b/test/main-override.cpp @@ -11,7 +11,7 @@ #include #include -#include +//#include #include #ifdef _WIN32 @@ -35,21 +35,23 @@ static void test_mt_shutdown(); static void large_alloc(void); // issue #363 static void fail_aslr(); // issue #372 static void tsan_numa_test(); // issue #414 -static void strdup_test(); // issue #445 +static void strdup_test(); // issue #445 static void bench_alloc_large(void); // issue #xxx //static void test_large_migrate(void); // issue #691 static void heap_thread_free_huge(); static void test_std_string(); // issue #697 static void test_thread_local(); // issue #944 - +// static void test_mixed0(); // issue #942 +static void test_mixed1(); // issue #942 static void test_stl_allocators(); int main() { // mi_stats_reset(); // ignore earlier allocations + test_mixed1(); //test_std_string(); - test_thread_local(); + //test_thread_local(); // heap_thread_free_huge(); /* heap_thread_free_huge(); @@ -65,10 +67,9 @@ int main() { // test_stl_allocators(); // test_mt_shutdown(); // test_large_migrate(); - + //fail_aslr(); - // bench_alloc_large(); - // mi_stats_print(NULL); + mi_stats_print(NULL); return 0; } @@ -187,6 +188,53 @@ static void test_stl_allocators() { #endif } +#if 0 +#include +#include +#include +#include +#include +#include + +static void test_mixed0() { + std::vector> numbers(1024 * 1024 * 100); + std::vector threads(1); + + std::atomic index{}; + + auto start = std::chrono::system_clock::now(); + + for (auto& thread : threads) { + thread = std::thread{[&index, &numbers]() { + while (true) { + auto i = index.fetch_add(1, std::memory_order_relaxed); + if (i >= numbers.size()) return; + + numbers[i] = std::make_unique(i); + } + }}; + } + + for (auto& thread : threads) thread.join(); + + auto end = std::chrono::system_clock::now(); + + auto duration = + std::chrono::duration_cast(end - start); + std::cout << "Running on " << threads.size() << " threads took " << duration + << std::endl; +} +#endif + +void asd() { + void* p = malloc(128); + free(p); +} +static void test_mixed1() { + std::thread thread(asd); + thread.join(); +} + #if 0 // issue #691 static char* cptr; diff --git a/test/test-api-fill.c b/test/test-api-fill.c index 3baee83d..eebbd394 100644 --- a/test/test-api-fill.c +++ b/test/test-api-fill.c @@ -271,7 +271,7 @@ int main(void) { mi_free(p); }; - #if !(MI_TRACK_VALGRIND || MI_TRACK_ASAN || MI_DEBUG_GUARDED) + #if !(MI_TRACK_VALGRIND || MI_TRACK_ASAN || MI_GUARDED) CHECK_BODY("fill-freed-small") { size_t malloc_size = MI_SMALL_SIZE_MAX / 2; uint8_t* p = (uint8_t*)mi_malloc(malloc_size); diff --git a/test/test-stress.c b/test/test-stress.c index caf18798..0e8b45a2 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -22,19 +22,22 @@ terms of the MIT license. #include #include +// #define MI_GUARDED +// #define USE_STD_MALLOC + // > mimalloc-test-stress [THREADS] [SCALE] [ITER] // // argument defaults #if defined(MI_TSAN) // with thread-sanitizer reduce the threads to test within the azure pipeline limits -static int THREADS = 8; +static int THREADS = 8; static int SCALE = 25; static int ITER = 400; #elif defined(MI_UBSAN) // with undefined behavious sanitizer reduce parameters to stay within the azure pipeline limits -static int THREADS = 8; +static int THREADS = 8; static int SCALE = 25; static int ITER = 20; -#elif defined(MI_DEBUG_GUARDED) // with debug guard pages reduce parameters to stay within the azure pipeline limits -static int THREADS = 8; +#elif defined(xMI_GUARDED) // with debug guard pages reduce parameters to stay within the azure pipeline limits +static int THREADS = 8; static int SCALE = 10; static int ITER = 10; #else @@ -47,16 +50,11 @@ static int ITER = 50; // N full iterations destructing and re-creating a #define STRESS // undefine for leak test -#ifndef NDEBUG -#define HEAP_WALK // walk the heap objects? -#endif - static bool allow_large_objects = true; // allow very large objects? (set to `true` if SCALE>100) static size_t use_one_size = 0; // use single object size of `N * sizeof(uintptr_t)`? static bool main_participates = false; // main thread participates as a worker too -// #define USE_STD_MALLOC #ifdef USE_STD_MALLOC #define custom_calloc(n,s) calloc(n,s) #define custom_realloc(p,s) realloc(p,s) @@ -66,6 +64,9 @@ static bool main_participates = false; // main thread participates as a #define custom_calloc(n,s) mi_calloc(n,s) #define custom_realloc(p,s) mi_realloc(p,s) #define custom_free(p) mi_free(p) +#ifndef NDEBUG +#define HEAP_WALK // walk the heap objects? +#endif #endif // transfer pointer between threads @@ -220,7 +221,7 @@ static void test_stress(void) { uintptr_t r = rand(); for (int n = 0; n < ITER; n++) { run_os_threads(THREADS, &stress); - #ifndef NDEBUG + #if !defined(NDEBUG) && !defined(USE_STD_MALLOC) // switch between arena and OS allocation for testing mi_option_set_enabled(mi_option_disallow_arena_alloc, (n%2)==1); #endif @@ -270,7 +271,7 @@ int main(int argc, char** argv) { #ifdef HEAP_WALK mi_option_enable(mi_option_visit_abandoned); #endif - #ifndef NDEBUG + #if !defined(NDEBUG) && !defined(USE_STD_MALLOC) mi_option_set(mi_option_arena_reserve, 32 * 1024 /* in kib = 32MiB */); #endif #ifndef USE_STD_MALLOC