Merge branch 'dev' into dev-trace

2025-07-13 06:34:58 +03:00 · 2022-04-14 16:59:36 -07:00 · 2022-04-14 16:59:36 -07:00 · b2fe83fa2c
commit b2fe83fa2c
parent 1270eec6c0 8d6a9df752
10 changed files with 30 additions and 17 deletions
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@ -18,7 +18,7 @@ jobs:
  displayName: Windows
  pool:
    vmImage:
-      windows-2019
+      windows-2022
  strategy:
    matrix:
      Debug:
--- a/doc/mimalloc-doc.h
+++ b/doc/mimalloc-doc.h
@ -56,7 +56,7 @@ Notable aspects of the design include:
 - __first-class heaps__: efficiently create and use multiple heaps to allocate across different regions.
  A heap can be destroyed at once instead of deallocating each object separately.
 - __bounded__: it does not suffer from _blowup_ \[1\], has bounded worst-case allocation
-  times (_wcat_), bounded space overhead (~0.2% meta-data, with at most 12.5% waste in allocation sizes),
+  times (_wcat_), bounded space overhead (~0.2% meta-data, with low internal fragmentation),
  and has no internal points of contention using only atomic operations.
 - __fast__: In our benchmarks (see [below](#performance)),
  _mimalloc_ outperforms all other leading allocators (_jemalloc_, _tcmalloc_, _Hoard_, etc),
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@ -20,6 +20,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #define MI_CACHE_LINE          64
 #if defined(_MSC_VER)
 #pragma warning(disable:4127)   // suppress constant conditional warning (due to MI_SECURE paths)
+#pragma warning(disable:26812)  // unscoped enum warning
 #define mi_decl_noinline        __declspec(noinline)
 #define mi_decl_thread          __declspec(thread)
 #define mi_decl_cache_align     __declspec(align(MI_CACHE_LINE))
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@ -324,6 +324,7 @@ typedef enum mi_option_e {
  mi_option_os_tag,
  mi_option_max_errors,
  mi_option_max_warnings,
+  mi_option_max_segment_reclaim,
  _mi_option_last
 } mi_option_t;

@ -335,6 +336,7 @@ mi_decl_export void mi_option_set_enabled(mi_option_t option, bool enable);
 mi_decl_export void mi_option_set_enabled_default(mi_option_t option, bool enable);

 mi_decl_nodiscard mi_decl_export long mi_option_get(mi_option_t option);
+mi_decl_nodiscard mi_decl_export long mi_option_get_clamp(mi_option_t option, long min, long max);
 mi_decl_export void mi_option_set(mi_option_t option, long value);
 mi_decl_export void mi_option_set_default(mi_option_t option, long value);

--- a/readme.md
+++ b/readme.md
@ -12,8 +12,8 @@ is a general purpose allocator with excellent [performance](#performance) charac
 Initially developed by Daan Leijen for the run-time systems of the
 [Koka](https://koka-lang.github.io) and [Lean](https://github.com/leanprover/lean) languages.

-Latest release tag: `v2.0.5` (alpha, 2022-02-14).  
-Latest stable  tag: `v1.7.5` (2022-02-14).
+Latest release tag: `v2.0.6` (2022-04-14).  
+Latest stable  tag: `v1.7.6` (2022-02-14).

 mimalloc is a drop-in replacement for `malloc` and can be used in other programs
 without code changes, for example, on dynamically linked ELF-based systems (Linux, BSD, etc.) you can use it as:
@ -52,7 +52,7 @@ It also has an easy way to override the default allocator in [Windows](#override
 - __first-class heaps__: efficiently create and use multiple heaps to allocate across different regions.
  A heap can be destroyed at once instead of deallocating each object separately.  
 - __bounded__: it does not suffer from _blowup_ \[1\], has bounded worst-case allocation
-  times (_wcat_), bounded space overhead (~0.2% meta-data, with at most 12.5% waste in allocation sizes),
+  times (_wcat_), bounded space overhead (~0.2% meta-data, with low internal fragmentation),
  and has no internal points of contention using only atomic operations.
 - __fast__: In our benchmarks (see [below](#performance)),
  _mimalloc_ outperforms other leading allocators (_jemalloc_, _tcmalloc_, _Hoard_, etc),
@ -67,16 +67,21 @@ Enjoy!

 ### Branches

-* `master`: latest stable release.
-* `dev`: development branch for mimalloc v1.
-* `dev-slice`: development branch for mimalloc v2 with a new algorithm for managing internal mimalloc pages.
+* `master`: latest stable release (based on `dev-slice`).
+* `dev`: development branch for mimalloc v1. Use this branch for submitting PR's.
+* `dev-slice`: development branch for mimalloc v2. This branch is downstream of `dev`.

 ### Releases

-Note: the `v2.x` beta has a new algorithm for managing internal mimalloc pages that tends to use reduce memory usage
+Note: the `v2.x` version has a new algorithm for managing internal mimalloc pages that tends to use reduce memory usage
  and fragmentation compared to mimalloc `v1.x` (especially for large workloads). Should otherwise have similar performance
  (see [below](#performance)); please report if you observe any significant performance regression.

+* 2022-04-14, `v1.7.6`, `v2.0.6`: fix fallback path for aligned OS allocation on Windows, improve Windows aligned allocation
+  even when compiling with older SDK's, fix dynamic overriding on macOS Monterey, fix MSVC C++ dynamic overriding, fix
+  warnings under Clang 14, improve performance if many OS threads are created and destroyed, fix statistics for large object
+  allocations, using MIMALLOC_VERBOSE=1 has no maximum on the number of error messages, various small fixes.
+
 * 2022-02-14, `v1.7.5`, `v2.0.5` (alpha): fix malloc override on
  Windows 11, fix compilation with musl, potentially reduced
  committed memory, add `bin/minject` for Windows, 
@ -301,7 +306,7 @@ or via environment variables:

 Use caution when using `fork` in combination with either large or huge OS pages: on a fork, the OS uses copy-on-write
 for all pages in the original process including the huge OS pages. When any memory is now written in that area, the
-OS will copy the entire 1GiB huge page (or 2MiB large page) which can cause the memory usage to grow in big increments.
+OS will copy the entire 1GiB huge page (or 2MiB large page) which can cause the memory usage to grow in large increments.

 [linux-huge]: https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/5/html/tuning_and_optimizing_red_hat_enterprise_linux_for_oracle_9i_and_10g_databases/sect-oracle_9i_and_10g_tuning_guide-large_memory_optimization_big_pages_and_huge_pages-configuring_huge_pages_in_red_hat_enterprise_linux_4_or_5
 [windows-huge]: https://docs.microsoft.com/en-us/sql/database-engine/configure-windows/enable-the-lock-pages-in-memory-option-windows?view=sql-server-2017
--- a/src/init.c
+++ b/src/init.c
@ -555,7 +555,7 @@ void mi_process_init(void) mi_attr_noexcept {
  mi_stats_reset();  // only call stat reset *after* thread init (or the heap tld == NULL)

  if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) {
-    size_t pages = mi_option_get(mi_option_reserve_huge_os_pages);
+    size_t pages = mi_option_get_clamp(mi_option_reserve_huge_os_pages, 0, 128*1024);
    long reserve_at = mi_option_get(mi_option_reserve_huge_os_pages_at);
    if (reserve_at != -1) {
      mi_reserve_huge_os_pages_at(pages, reserve_at, pages*500);
--- a/src/options.c
+++ b/src/options.c
@ -92,8 +92,8 @@ static mi_option_desc_t options[_mi_option_last] =
  { 0,   UNINIT, MI_OPTION(limit_os_alloc) },    // 1 = do not use OS memory for allocation (but only reserved arenas)
  { 100, UNINIT, MI_OPTION(os_tag) },            // only apple specific for now but might serve more or less related purpose
  { 16,  UNINIT, MI_OPTION(max_errors) },        // maximum errors that are output
-  { 16,  UNINIT, MI_OPTION(max_warnings) }       // maximum warnings that are output
-
+  { 16,  UNINIT, MI_OPTION(max_warnings) },      // maximum warnings that are output
+  { 8,   UNINIT, MI_OPTION(max_segment_reclaim)} // max. number of segment reclaims from the abandoned segments per try.
 };

 static void mi_option_init(mi_option_desc_t* desc);
@ -125,6 +125,11 @@ mi_decl_nodiscard long mi_option_get(mi_option_t option) {
  return desc->value;
 }

+mi_decl_nodiscard long mi_option_get_clamp(mi_option_t option, long min, long max) {
+  long x = mi_option_get(option);
+  return (x < min ? min : (x > max ? max : x));
+}
+
 void mi_option_set(mi_option_t option, long value) {
  mi_assert(option >= 0 && option < _mi_option_last);
  if (option < 0 || option >= _mi_option_last) return;
--- a/src/os.c
+++ b/src/os.c
@ -375,7 +375,7 @@ static bool mi_os_mem_free(void* addr, size_t size, bool was_committed, mi_stats
    // the start of the region.
    MEMORY_BASIC_INFORMATION info = { 0, 0 };
    VirtualQuery(addr, &info, sizeof(info));
-    if (info.AllocationBase < addr) {
+    if (info.AllocationBase < addr && ((uint8_t*)addr - (uint8_t*)info.AllocationBase) < MI_SEGMENT_SIZE) {
      errcode = 0;
      err = (VirtualFree(info.AllocationBase, 0, MEM_RELEASE) == 0);
      if (err) { errcode = GetLastError(); }
@ -411,7 +411,7 @@ static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment
    if (hint != NULL) {
      void* p = VirtualAlloc(hint, size, flags, PAGE_READWRITE);
      if (p != NULL) return p;
-      _mi_warning_message("unable to allocate hinted aligned OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x)\n", size, GetLastError(), hint, try_alignment, flags);
+      _mi_verbose_message("warning: unable to allocate hinted aligned OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x)\n", size, GetLastError(), hint, try_alignment, flags);
      // fall through on error
    }
  } 
--- a/src/region.c
+++ b/src/region.c
@ -122,7 +122,7 @@ static size_t mi_good_commit_size(size_t size) {
 */

 // Return if a pointer points into a region reserved by us.
-bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
+mi_decl_nodiscard bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
  if (p==NULL) return false;
  size_t count = mi_atomic_load_relaxed(&regions_count);
  for (size_t i = 0; i < count; i++) {
--- a/src/segment.c
+++ b/src/segment.c
@ -1112,7 +1112,7 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t block_size,
 {
  *reclaimed = false;
  mi_segment_t* segment;
-  int max_tries = 8;     // limit the work to bound allocation times
+  long max_tries = mi_option_get_clamp(mi_option_max_segment_reclaim, 8, 1024);     // limit the work to bound allocation times  
  while ((max_tries-- > 0) && ((segment = mi_abandoned_pop()) != NULL)) {
    segment->abandoned_visits++;
    bool all_pages_free;