diff --git a/CMakeLists.txt b/CMakeLists.txt
index 56af1c94..7018b73c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -22,6 +22,7 @@ option(MI_BUILD_TESTS       "Build test executables" ON)
 option(MI_DEBUG_TSAN        "Build with thread sanitizer (needs clang)" OFF)
 option(MI_DEBUG_UBSAN       "Build with undefined-behavior sanitizer (needs clang++)" OFF)
 option(MI_DEBUG_TRACE       "Store allocation stack trace in each heap block to debug heap block overflows or corruption" OFF)
+option(MI_SKIP_COLLECT_ON_EXIT, "Skip collecting memory on program exit" OFF)
 
 # deprecated options
 option(MI_CHECK_FULL        "Use full internal invariant checking in DEBUG mode (deprecated, use MI_DEBUG_FULL instead)" OFF)
@@ -121,6 +122,11 @@ if(MI_CHECK_FULL)
   set(MI_DEBUG_FULL "ON")
 endif()
 
+if (MI_SKIP_COLLECT_ON_EXIT)
+  message(STATUS "Skip collecting memory on program exit (MI_SKIP_COLLECT_ON_EXIT=ON)")
+  list(APPEND mi_defines MI_SKIP_COLLECT_ON_EXIT=1)
+endif()
+
 if(MI_DEBUG_FULL)
   message(STATUS "Set debug level to full internal invariant checking (MI_DEBUG_FULL=ON)")
   list(APPEND mi_defines MI_DEBUG=3)   # full invariant checking
@@ -252,7 +258,7 @@ endif()
 # -----------------------------------------------------------------------------
 
 # dynamic/shared library and symlinks always go to /usr/local/lib equivalent
-set(mi_install_libdir   "${CMAKE_INSTALL_LIBDIR}") 
+set(mi_install_libdir   "${CMAKE_INSTALL_LIBDIR}")
 
 # static libraries and object files, includes, and cmake config files
 # are either installed at top level, or use versioned directories for side-by-side installation (default)
@@ -331,6 +337,7 @@ if(MI_BUILD_SHARED)
     add_custom_command(TARGET mimalloc POST_BUILD
       COMMAND "${CMAKE_COMMAND}" -E copy "${CMAKE_CURRENT_SOURCE_DIR}/bin/mimalloc-redirect${MIMALLOC_REDIRECT_SUFFIX}.dll" $<TARGET_FILE_DIR:mimalloc>
       COMMENT "Copy mimalloc-redirect${MIMALLOC_REDIRECT_SUFFIX}.dll to output directory")
+    install(FILES "$<TARGET_FILE_DIR:mimalloc>/mimalloc-redirect${MIMALLOC_REDIRECT_SUFFIX}.dll" DESTINATION ${mi_install_libdir})
   endif()
 
   install(TARGETS mimalloc EXPORT mimalloc DESTINATION ${mi_install_libdir} LIBRARY)  
@@ -381,7 +388,7 @@ if (MI_BUILD_OBJECT)
   )
 
   # the following seems to lead to cmake warnings/errors on some systems, disable for now :-(
-  # install(TARGETS mimalloc-obj EXPORT mimalloc DESTINATION ${mi_install_libdir})
+  # install(TARGETS mimalloc-obj EXPORT mimalloc DESTINATION ${mi_install_objdir})
 
   # the FILES expression can also be: $<TARGET_OBJECTS:mimalloc-obj>
   # but that fails cmake versions less than 3.10 so we leave it as is for now
diff --git a/bin/minject.exe b/bin/minject.exe
index e576c71f..e5f87800 100644
Binary files a/bin/minject.exe and b/bin/minject.exe differ
diff --git a/bin/minject32.exe b/bin/minject32.exe
index 1eb8a75d..d0181028 100644
Binary files a/bin/minject32.exe and b/bin/minject32.exe differ
diff --git a/doc/ds-logo.jpg b/doc/ds-logo.jpg
index 853a7279..c9abb1a9 100644
Binary files a/doc/ds-logo.jpg and b/doc/ds-logo.jpg differ
diff --git a/doc/ds-logo.png b/doc/ds-logo.png
new file mode 100644
index 00000000..93b84e44
Binary files /dev/null and b/doc/ds-logo.png differ
diff --git a/doc/mimalloc-doc.h b/doc/mimalloc-doc.h
index 4cf8c2c3..4de6085c 100644
--- a/doc/mimalloc-doc.h
+++ b/doc/mimalloc-doc.h
@@ -802,20 +802,32 @@ typedef enum mi_option_e {
   mi_option_show_errors,  ///< Print error messages to `stderr`.
   mi_option_show_stats,   ///< Print statistics to `stderr` when the program is done.
   mi_option_verbose,      ///< Print verbose messages to `stderr`.
+
   // the following options are experimental
   mi_option_eager_commit, ///< Eagerly commit segments (4MiB) (enabled by default).
-  mi_option_eager_region_commit, ///< Eagerly commit large (256MiB) memory regions (enabled by default, except on Windows)
   mi_option_large_os_pages,      ///< Use large OS pages (2MiB in size) if possible
   mi_option_reserve_huge_os_pages, ///< The number of huge OS pages (1GiB in size) to reserve at the start of the program.
   mi_option_reserve_huge_os_pages_at, ///< Reserve huge OS pages at node N.
-  mi_option_segment_cache,   ///< The number of segments per thread to keep cached.
+  mi_option_reserve_os_memory,        ///< Reserve specified amount of OS memory at startup, e.g. "1g" or "512m".
+  mi_option_segment_cache,   ///< The number of segments per thread to keep cached (0).
   mi_option_page_reset,      ///< Reset page memory after \a mi_option_reset_delay milliseconds when it becomes free.
+  mi_option_abandoned_page_reset, //< Reset free page memory when a thread terminates.
+  mi_option_use_numa_nodes,  ///< Pretend there are at most N NUMA nodes; Use 0 to use the actual detected NUMA nodes at runtime.
+  mi_option_eager_commit_delay,  ///< the first N segments per thread are not eagerly committed (=1). 
+  mi_option_os_tag,          ///< OS tag to assign to mimalloc'd memory
+  mi_option_limit_os_alloc,  ///< If set to 1, do not use OS memory for allocation (but only pre-reserved arenas)
+
+  // v1.x specific options
+  mi_option_eager_region_commit, ///< Eagerly commit large (256MiB) memory regions (enabled by default, except on Windows)
   mi_option_segment_reset,   ///< Experimental
   mi_option_reset_delay,     ///< Delay in milli-seconds before resetting a page (100ms by default)
-  mi_option_use_numa_nodes,  ///< Pretend there are at most N NUMA nodes
   mi_option_reset_decommits, ///< Experimental
-  mi_option_eager_commit_delay,  ///< Experimental
-  mi_option_os_tag,          ///< OS tag to assign to mimalloc'd memory
+
+  // v2.x specific options
+  mi_option_allow_decommit,  ///< Enable decommitting memory (=on)
+  mi_option_decommit_delay,  ///< Decommit page memory after N milli-seconds delay (25ms).
+  mi_option_segment_decommit_delay, ///< Decommit large segment memory after N milli-seconds delay (500ms).
+
   _mi_option_last
 } mi_option_t;
 
@@ -1068,7 +1080,7 @@ or via environment variables.
 - `MIMALLOC_PAGE_RESET=0`: by default, mimalloc will reset (or purge) OS pages when not in use to signal to the OS
    that the underlying physical memory can be reused. This can reduce memory fragmentation in long running (server)
    programs. By setting it to `0` no such page resets will be done which can improve performance for programs that are not long
-   running. As an alternative, the `MIMALLOC_RESET_DELAY=`<msecs> can be set higher (100ms by default) to make the page
+   running. As an alternative, the `MIMALLOC_DECOMMIT_DELAY=`<msecs> can be set higher (100ms by default) to make the page
    reset occur less frequently instead of turning it off completely.
 - `MIMALLOC_LARGE_OS_PAGES=1`: use large OS pages (2MiB) when available; for some workloads this can significantly
    improve performance. Use `MIMALLOC_VERBOSE` to check if the large OS pages are enabled -- usually one needs
diff --git a/doc/spades-logo.png b/doc/spades-logo.png
new file mode 100644
index 00000000..d8c73fef
Binary files /dev/null and b/doc/spades-logo.png differ
diff --git a/doc/unreal-logo.svg b/doc/unreal-logo.svg
new file mode 100644
index 00000000..5d5192a2
--- /dev/null
+++ b/doc/unreal-logo.svg
@@ -0,0 +1,43 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<svg
+   height="706.71118"
+   width="746.71118"
+   viewBox="-150.3282 -273.04775 810.70706 1447.2442"
+   version="1.1"
+   id="svg34"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:svg="http://www.w3.org/2000/svg">
+  <defs
+     id="defs24">
+    <clipPath
+       id="a"
+       clipPathUnits="userSpaceOnUse">
+      <path
+         d="M 0,1024 H 1024 V 0 H 0 Z"
+         id="path21" />
+    </clipPath>
+  </defs>
+  <rect
+     style="fill:#b2b2b2;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:2.04786;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+     id="rect121"
+     width="1300.7299"
+     height="1264.0651"
+     x="-391.91745"
+     y="-186.69598"
+     rx="154.79872"
+     ry="154.79872" />
+  <path
+     d="m 693.86447,863.90527 v -94.746 h -35.855 v 122.747 h 120.26 v -28 z"
+     id="path26" />
+  <g
+     clip-path="url(#a)"
+     transform="matrix(1.33333,0,0,-1.33333,-408.39652,1133.2393)"
+     id="g32">
+    <path
+       d="m 498.837,117.958 h 20.085 c 0.498,-5.295 -1.063,-7.971 -5.392,-8.386 -4.293,-0.411 -8.612,-0.76 -12.92,-0.77 -17.903,-0.044 -19.822,2.052 -18.737,19.932 0.02,0.333 0.03,0.666 0.063,0.997 0.67,6.667 3.126,9.189 9.81,9.499 5.15,0.238 10.316,0.089 15.476,0.132 5.373,0.044 10.5,-0.337 11.806,-6.626 h 13.198 c 1.86,10.558 -1.6,16.306 -11.74,16.9 -13.059,0.767 -26.228,0.648 -39.293,-0.086 -8.782,-0.493 -13.292,-5.67 -13.804,-14.27 -0.444,-7.454 -0.42,-14.975 -0.007,-22.434 0.441,-7.953 4.16,-11.928 11.938,-13.918 2.87,-0.734 5.785,-1.29 8.68,-1.928 h 26 c 2.091,0.444 4.177,0.913 6.274,1.328 5.615,1.11 11.135,2.918 12.264,9.28 1.114,6.278 1.061,12.763 1.546,19.515 h -35.247 z m -103.4,63.438 v 91.393 h -26.424 v -69.795 l -54.236,70.22 H 273.38 V 181.28 h 26.308 v 68.354 l 1.226,0.31 53.645,-68.547 z m 47.577,45.635 v 26.323 c 12.208,0 24.12,0.002 36.033,-0.003 1.662,0 3.323,-0.048 4.985,-0.059 14.801,-0.1 16.353,-4.42 14.866,-18.979 -0.484,-4.738 -4.073,-7.103 -8.306,-7.175 -15.731,-0.268 -31.469,-0.107 -47.578,-0.107 m 56.443,-33.406 c 0.357,-4.063 0.548,-8.14 0.82,-12.3 h 26.391 c 0,5.758 0.219,11.061 -0.045,16.34 -0.581,11.61 -2.289,16.735 -13.599,18.696 -0.592,0.103 -1.114,0.614 -2.506,1.42 11.369,2.53 15.442,10.208 15.981,20.201 0.267,4.96 0.176,10.017 -0.484,14.932 -1.604,11.95 -5.993,16.673 -17.761,18.885 a 101.743,101.743 0 0 1 -17.843,1.741 c -22.818,0.197 -45.638,0.09 -68.458,0.066 -1.78,-0.002 -3.559,-0.22 -5.646,-0.36 v -91.884 h 26.535 v 25.35 c 6.04,0 11.509,0.111 16.972,-0.025 9.644,-0.24 19.344,-0.086 28.908,-1.129 7.832,-0.854 10.041,-4.046 10.735,-11.933 m 73.129,24.754 h 68.786 v 18.565 h -68.899 v 16.585 h 72.852 v 19.517 h -99.79 v -91.744 h 101.018 v 20.193 h -73.967 z m -319.109,52.037 c 0.004,0.918 -0.563,1.84 -0.847,2.708 h -26.34 c 0,-16.299 0.052,-32.104 -0.036,-47.909 -0.025,-4.314 -0.31,-8.644 -0.772,-12.935 -0.665,-6.161 -4.552,-10.04 -10.412,-10.334 -12.098,-0.61 -24.283,-1.056 -36.347,-0.266 -9.717,0.635 -12.873,4.996 -12.984,14.895 -0.19,16.997 -0.103,33.998 -0.188,50.997 -0.009,1.895 -0.385,3.789 -0.59,5.67 h -26.32 c 0,-22.846 -0.788,-45.02 0.27,-67.106 0.734,-15.35 7.1,-21.4 22.59,-23.992 22.481,-3.765 45.194,-3.61 67.677,-0.266 17.168,2.553 22.995,8.927 23.753,26.069 0.92,20.79 0.464,41.644 0.546,62.47 m 447.926,-53.371 19.515,38.362 19.723,-38.362 z m 2.8,56.306 -49.975,-92.133 h 28.863 l 8.345,15.905 h 59.333 l 8.375,-15.847 h 29.165 l -50.248,92.075 z m -264.917,-161.351 -1.233,-0.292 a 532757.74,532757.74 0 0 1 -30.02,38.7 h -23.4 v -51.93 h 14.433 v 37.887 l 1.47,0.522 30.288,-38.488 h 23.367 v 51.89 h -14.904 z m 187.996,-0.026 -1.193,-0.31 c -9.93,12.813 -19.858,25.627 -29.476,38.039 H 573.309 V 98.583 h 14.777 v 39.2 l 30.889,-39.43 h 23.009 v 51.189 h -14.702 z m -295.244,7.284 h 39.176 v 10.717 H 332.04 v 9.647 h 41.322 V 150.45 H 317.11 V 98.519 h 57.303 v 11.039 h -42.375 z m 338.914,0.018 h 38.455 v 10.44 H 671.1 v 9.635 h 40.57 v 10.3 H 655.83 V 98.556 h 56.742 v 10.952 h -41.62 z M 545.666,98.416 h 14.301 v 51.189 H 545.666 Z M 312.554,585.913 c 0,0 -10.463,51.999 44.874,114.973 55.342,62.972 96.863,85.871 141.707,97.799 l -0.078,-0.046 0.17,0.046 c 0,0 -35.227,-20.517 -35.227,-51.522 0,-9 1.435,-15.184 3.423,-19.315 2.638,-5.488 6.816,-7.354 10.567,-7.372 3.16,-0.018 5.01,1.281 9.01,2.834 V 555.386 c 0,0 0.433,-2.707 2.863,-5.915 3.5,-4.613 9,-10.243 19.335,-10.3 17.629,0.113 39.802,20.026 39.802,20.026 v 135.964 c 0,13.832 -9.9,30.53 -20.39,36.259 0,0 1.43,0.09 3.744,0.089 6.926,0.002 21.784,-0.79 30.063,-7.115 2.3,2.665 39.418,44.665 105.466,57.589 l -0.032,-0.037 0.18,0.037 c 0,0 -36.531,-43.016 -47.981,-64.348 -2.035,-0.121 -2.332,-40.471 -1.993,-80.79 0.329,-38.92 1.253,-77.818 1.789,-80.509 0,0 3.898,-5.794 13.824,-5.844 11.196,-0.057 30.063,7.195 59.643,38.288 l -10e-4,-10e-4 v 10e-4 c 0,0 -0.78,-1.771 -2.35,-4.908 -8.476,-16.938 -39.974,-73.721 -95.924,-106.725 l -0.003,0.003 -0.005,-0.003 -36.29,30.693 -0.927,0.774 -39.58,-41.967 -0.06,0.006 -0.005,-0.006 c 0,0 -90.232,7.633 -127.92,62.015 l 0.179,-0.065 -0.047,0.065 c 0,0 5.707,-2.221 12.148,-2.342 7.547,-0.138 15.472,2.619 15.472,15.229 v 127.85 c 0,6.529 -4.495,13.853 -14.362,13.825 -8.874,-0.023 -21.614,-5.994 -39.01,-23.843 -36.734,-37.685 -51.99,-73.468 -51.99,-73.468 l -0.025,0.136 -0.06,-0.136 M 513,369.156 c 70.386,0 136.56,27.41 186.33,77.18 49.77,49.771 77.18,115.944 77.18,186.33 0,70.386 -27.41,136.559 -77.18,186.33 -49.77,49.77 -115.944,77.18 -186.33,77.18 -70.386,0 -136.56,-27.41 -186.33,-77.18 -49.77,-49.771 -77.18,-115.944 -77.18,-186.33 0,-70.386 27.41,-136.559 77.18,-186.33 49.77,-49.77 115.944,-77.18 186.33,-77.18"
+       id="path28" />
+    <path
+       d="m 513,908.307 c 152.232,0 275.641,-123.409 275.641,-275.64 0,-152.233 -123.409,-275.642 -275.641,-275.642 -152.232,0 -275.641,123.41 -275.641,275.641 0,152.232 123.409,275.641 275.641,275.641 m 0,-559.119 c 75.72,0 146.907,29.487 200.449,83.03 53.542,53.541 83.028,124.728 83.028,200.448 0,75.72 -29.486,146.907 -83.028,200.45 -53.542,53.54 -124.729,83.026 -200.449,83.026 -75.72,0 -146.907,-29.486 -200.449,-83.028 -53.542,-53.542 -83.028,-124.73 -83.028,-200.449 0,-75.72 29.486,-146.907 83.028,-200.449 C 366.093,378.674 437.28,349.188 513,349.188"
+       id="path30" />
+  </g>
+</svg>
diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index 90afb72d..f793fcb2 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -101,6 +101,7 @@ void       _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed,
 // "segment-cache.c"
 void*      _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
 bool       _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* decommit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld);
+void       _mi_segment_cache_collect(bool force, mi_os_tld_t* tld);
 void       _mi_segment_map_allocated_at(const mi_segment_t* segment);
 void       _mi_segment_map_freed_at(const mi_segment_t* segment);
 
@@ -115,6 +116,7 @@ void       _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi
 uint8_t*   _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size); // page start for any page
 void       _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld);
 void       _mi_abandoned_await_readers(void);
+void       _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld);
 
 
 
@@ -818,6 +820,7 @@ static inline mi_threadid_t _mi_thread_id(void) mi_attr_noexcept {
         || (defined(__APPLE__)   && (defined(__x86_64__) || defined(__aarch64__))) \
         || (defined(__BIONIC__)  && (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))) \
         || (defined(__FreeBSD__) && (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__))) \
+        || (defined(__OpenBSD__) && (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__))) \
       )
 
 static inline void* mi_tls_slot(size_t slot) mi_attr_noexcept {
diff --git a/include/mimalloc.h b/include/mimalloc.h
index 98689d28..06597e9a 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -298,7 +298,7 @@ mi_decl_export int  mi_reserve_huge_os_pages(size_t pages, double max_secs, size
 
 
 // ------------------------------------------------------
-// Options, all `false` by default
+// Options
 // ------------------------------------------------------
 
 typedef enum mi_option_e {
@@ -306,27 +306,28 @@ typedef enum mi_option_e {
   mi_option_show_errors,
   mi_option_show_stats,
   mi_option_verbose,
-  // the following options are experimental
+  // some of the following options are experimental
+  // (deprecated options are kept for binary backward compatibility with v1.x versions)
   mi_option_eager_commit,
-  mi_option_eager_region_commit,
-  mi_option_reset_decommits,
-  mi_option_large_os_pages,         // implies eager commit
-  mi_option_reserve_huge_os_pages,
-  mi_option_reserve_huge_os_pages_at,
-  mi_option_reserve_os_memory,
+  mi_option_deprecated_eager_region_commit,
+  mi_option_deprecated_reset_decommits,
+  mi_option_large_os_pages,           // use large (2MiB) OS pages, implies eager commit
+  mi_option_reserve_huge_os_pages,    // reserve N huge OS pages (1GiB) at startup
+  mi_option_reserve_huge_os_pages_at, // reserve huge OS pages at a specific NUMA node
+  mi_option_reserve_os_memory,        // reserve specified amount of OS memory at startup
   mi_option_segment_cache,
   mi_option_page_reset,
-  mi_option_abandoned_page_reset,
-  mi_option_segment_reset,
+  mi_option_abandoned_page_decommit,
+  mi_option_deprecated_segment_reset,
   mi_option_eager_commit_delay,
-  mi_option_allow_decommit,
-  mi_option_reset_delay,
-  mi_option_segment_decommit_delay,
-  mi_option_use_numa_nodes,
-  mi_option_limit_os_alloc,
+  mi_option_decommit_delay,
+  mi_option_use_numa_nodes,           // 0 = use available numa nodes, otherwise use at most N nodes.
+  mi_option_limit_os_alloc,           // 1 = do not use OS memory for allocation (but only reserved arenas)
   mi_option_os_tag,
   mi_option_max_errors,
   mi_option_max_warnings,
+  mi_option_allow_decommit,
+  mi_option_segment_decommit_delay,  
   _mi_option_last
 } mi_option_t;
 
diff --git a/readme.md b/readme.md
index dfef82d5..635d983e 100644
--- a/readme.md
+++ b/readme.md
@@ -91,34 +91,7 @@ Note: the `v2.x` beta has a new algorithm for managing internal mimalloc pages t
 * 2021-01-31, `v1.7.0`: stable release 1.7: support explicit user provided memory regions, more precise statistics,
   improve macOS overriding, initial support for Apple M1, improved DragonFly support, faster memcpy on Windows, various small fixes.
 
-### Older Releases
-
-* 2020-09-24, `v1.6.7`: stable release 1.6: using standard C atomics, passing tsan testing, improved
-  handling of failing to commit on Windows, add [`mi_process_info`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc.h#L156) api call.
-* 2020-08-06, `v1.6.4`: stable release 1.6: improved error recovery in low-memory situations,
-  support for IllumOS and Haiku, NUMA support for Vista/XP, improved NUMA detection for AMD Ryzen, ubsan support.
-* 2020-05-05, `v1.6.3`: stable release 1.6: improved behavior in out-of-memory situations, improved malloc zones on macOS,
-  build PIC static libraries by default, add option to abort on out-of-memory, line buffered statistics.
-* 2020-04-20, `v1.6.2`: stable release 1.6: fix compilation on Android, MingW, Raspberry, and Conda,
-  stability fix for Windows 7, fix multiple mimalloc instances in one executable, fix `strnlen` overload,
-  fix aligned debug padding.
-* 2020-02-17, `v1.6.1`: stable release 1.6: minor updates (build with clang-cl, fix alignment issue for small objects).
-* 2020-02-09, `v1.6.0`: stable release 1.6: fixed potential memory leak, improved overriding
-  and thread local support on FreeBSD, NetBSD, DragonFly, and macOSX. New byte-precise
-  heap block overflow detection in debug mode (besides the double-free detection and free-list
-  corruption detection). Add `nodiscard` attribute to most allocation functions.
-  Enable `MIMALLOC_PAGE_RESET` by default. New reclamation strategy for abandoned heap pages
-  for better memory footprint.
-* 2020-02-09, `v1.5.0`: stable release 1.5: improved free performance, small bug fixes.
-* 2020-01-22, `v1.4.0`: stable release 1.4: improved performance for delayed OS page reset,
-more eager concurrent free, addition of STL allocator, fixed potential memory leak.
-* 2020-01-15, `v1.3.0`: stable release 1.3: bug fixes, improved randomness and [stronger
-free list encoding](https://github.com/microsoft/mimalloc/blob/783e3377f79ee82af43a0793910a9f2d01ac7863/include/mimalloc-internal.h#L396) in secure mode.
-* 2019-12-22, `v1.2.2`: stable release 1.2: minor updates.
-* 2019-11-22, `v1.2.0`: stable release 1.2: bug fixes, improved secure mode (free list corruption checks, double free mitigation). Improved dynamic overriding on Windows.
-* 2019-10-07, `v1.1.0`: stable release 1.1.
-* 2019-09-01, `v1.0.8`: pre-release 8: more robust windows dynamic overriding, initial huge page support.
-* 2019-08-10, `v1.0.6`: pre-release 6: various performance improvements.
+* [Older release notes](#older-release-notes)
 
 Special thanks to:
 
@@ -130,7 +103,9 @@ Special thanks to:
   at large scale services, leading to many improvements in the mimalloc algorithms for large workloads.
 * Jason Gibson (@jasongibson) for exhaustive testing on large scale workloads and server environments, and finding complex bugs 
   in (early versions of) `mimalloc`.
-* Manuel Pöter (@mpoeter) and Sam Gross (@colesbury) for finding an ABA concurrency issue in abandoned segment reclamation.
+* Manuel Pöter (@mpoeter) and Sam Gross(@colesbury) for finding an ABA concurrency issue in abandoned segment reclamation. Sam also created the [no GIL](https://github.com/colesbury/nogil) Python fork which 
+  uses mimalloc internally.
+
 
 [genMC]: https://plv.mpi-sws.org/genmc/
 
@@ -138,9 +113,12 @@ Special thanks to:
 
 mimalloc is used in various large scale low-latency services and programs, for example:
 
-<a href="https://www.bing.com"><img align="left"  height="50" src="https://upload.wikimedia.org/wikipedia/commons/e/e9/Bing_logo.svg"></a>
-<a href="https://azure.microsoft.com/"><img align="left" height="50" src="https://upload.wikimedia.org/wikipedia/commons/a/a8/Microsoft_Azure_Logo.svg"></a>
-<a href="https://deathstrandingpc.505games.com"><img height="100" src="doc/ds-logo.jpg" style="border-radius=1ex;vertical-align:center"></a>
+<a href="https://www.bing.com"><img height="50" align="left" src="https://upload.wikimedia.org/wikipedia/commons/e/e9/Bing_logo.svg"></a>
+<a href="https://azure.microsoft.com/"><img height="50" align="left" src="https://upload.wikimedia.org/wikipedia/commons/a/a8/Microsoft_Azure_Logo.svg"></a>
+<a href="https://deathstrandingpc.505games.com"><img height="100" src="doc/ds-logo.png"></a>
+<a href="https://docs.unrealengine.com/4.26/en-US/WhatsNew/Builds/ReleaseNotes/4_25/"><img height="100" src="doc/unreal-logo.svg"></a>
+<a href="https://cab.spbu.ru/software/spades/"><img height="100" src="doc/spades-logo.png"></a>
+
 
 # Building
 
@@ -647,6 +625,7 @@ see the differences in the _larsonN_, _mstressN_, and _xmalloc-testN_ benchmarks
 
 -->
 
+
 # References
 
 - \[1] Emery D. Berger, Kathryn S. McKinley, Robert D. Blumofe, and Paul R. Wilson.
@@ -684,7 +663,6 @@ see the differences in the _larsonN_, _mstressN_, and _xmalloc-testN_ benchmarks
   In Proceedings of the 2019 ACM SIGPLAN International Symposium on Memory Management, 122–135. ACM. 2019.
 -->
 
-
 # Contributing
 
 This project welcomes contributions and suggestions.  Most contributions require you to agree to a
@@ -694,3 +672,34 @@ the rights to use your contribution. For details, visit https://cla.microsoft.co
 When you submit a pull request, a CLA-bot will automatically determine whether you need to provide
 a CLA and decorate the PR appropriately (e.g., label, comment). Simply follow the instructions
 provided by the bot. You will only need to do this once across all repos using our CLA.
+
+
+# Older Release Notes
+
+* 2020-09-24, `v1.6.7`: stable release 1.6: using standard C atomics, passing tsan testing, improved
+  handling of failing to commit on Windows, add [`mi_process_info`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc.h#L156) api call.
+* 2020-08-06, `v1.6.4`: stable release 1.6: improved error recovery in low-memory situations,
+  support for IllumOS and Haiku, NUMA support for Vista/XP, improved NUMA detection for AMD Ryzen, ubsan support.
+* 2020-05-05, `v1.6.3`: stable release 1.6: improved behavior in out-of-memory situations, improved malloc zones on macOS,
+  build PIC static libraries by default, add option to abort on out-of-memory, line buffered statistics.
+* 2020-04-20, `v1.6.2`: stable release 1.6: fix compilation on Android, MingW, Raspberry, and Conda,
+  stability fix for Windows 7, fix multiple mimalloc instances in one executable, fix `strnlen` overload,
+  fix aligned debug padding.
+* 2020-02-17, `v1.6.1`: stable release 1.6: minor updates (build with clang-cl, fix alignment issue for small objects).
+* 2020-02-09, `v1.6.0`: stable release 1.6: fixed potential memory leak, improved overriding
+  and thread local support on FreeBSD, NetBSD, DragonFly, and macOSX. New byte-precise
+  heap block overflow detection in debug mode (besides the double-free detection and free-list
+  corruption detection). Add `nodiscard` attribute to most allocation functions.
+  Enable `MIMALLOC_PAGE_RESET` by default. New reclamation strategy for abandoned heap pages
+  for better memory footprint.
+* 2020-02-09, `v1.5.0`: stable release 1.5: improved free performance, small bug fixes.
+* 2020-01-22, `v1.4.0`: stable release 1.4: improved performance for delayed OS page reset,
+more eager concurrent free, addition of STL allocator, fixed potential memory leak.
+* 2020-01-15, `v1.3.0`: stable release 1.3: bug fixes, improved randomness and [stronger
+free list encoding](https://github.com/microsoft/mimalloc/blob/783e3377f79ee82af43a0793910a9f2d01ac7863/include/mimalloc-internal.h#L396) in secure mode.
+* 2019-12-22, `v1.2.2`: stable release 1.2: minor updates.
+* 2019-11-22, `v1.2.0`: stable release 1.2: bug fixes, improved secure mode (free list corruption checks, double free mitigation). Improved dynamic overriding on Windows.
+* 2019-10-07, `v1.1.0`: stable release 1.1.
+* 2019-09-01, `v1.0.8`: pre-release 8: more robust windows dynamic overriding, initial huge page support.
+* 2019-08-10, `v1.0.6`: pre-release 6: various performance improvements.
+
diff --git a/src/alloc-override-osx.c b/src/alloc-override-osx.c
index edd93b37..a88186bc 100644
--- a/src/alloc-override-osx.c
+++ b/src/alloc-override-osx.c
@@ -183,6 +183,10 @@ static boolean_t intro_zone_locked(malloc_zone_t* zone) {
 #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
 #endif
 
+#if defined(__clang__)
+#pragma clang diagnostic ignored "-Wc99-extensions"
+#endif
+
 static malloc_introspection_t mi_introspect = {
   .enumerator = &intro_enumerator,
   .good_size = &intro_good_size,
@@ -213,7 +217,7 @@ static malloc_zone_t mi_malloc_zone = {
   .batch_free = &zone_batch_free,
   .introspect = &mi_introspect,  
 #if defined(MAC_OS_X_VERSION_10_6) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6)
-  #if defined(MAC_OS_X_VERSION_10_7) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_7)
+  #if defined(MAC_OS_X_VERSION_10_14) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_14)
   .version = 10,
   #else
   .version = 9,
@@ -222,7 +226,7 @@ static malloc_zone_t mi_malloc_zone = {
   .memalign = &zone_memalign,
   .free_definite_size = &zone_free_definite_size,
   .pressure_relief = &zone_pressure_relief,
-  #if defined(MAC_OS_X_VERSION_10_7) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_7)
+  #if defined(MAC_OS_X_VERSION_10_14) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_14)
   .claimed_address = &zone_claimed_address,
   #endif
 #else
diff --git a/src/alloc.c b/src/alloc.c
index c6090149..39ee09a2 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -553,12 +553,12 @@ static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* ms
 // Free a block 
 void mi_free(void* p) mi_attr_noexcept
 {
-  const mi_segment_t* const segment = mi_checked_ptr_segment(p,"mi_free");
+  mi_segment_t* const segment = mi_checked_ptr_segment(p,"mi_free");
   if (mi_unlikely(segment == NULL)) return; 
 
   mi_threadid_t tid = _mi_thread_id();
   mi_page_t* const page = _mi_segment_page_of(segment, p);
-
+  
   if (mi_likely(tid == mi_atomic_load_relaxed(&segment->thread_id) && page->flags.full_aligned == 0)) {  // the thread id matches and it is not a full page, nor has aligned blocks
     // local, and not full or aligned
     mi_block_t* block = (mi_block_t*)(p);
diff --git a/src/heap.c b/src/heap.c
index d7975b0b..4fdfb0b9 100644
--- a/src/heap.c
+++ b/src/heap.c
@@ -115,17 +115,20 @@ static bool mi_heap_page_never_delayed_free(mi_heap_t* heap, mi_page_queue_t* pq
 static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
 {
   if (heap==NULL || !mi_heap_is_initialized(heap)) return;
-  _mi_deferred_free(heap, collect >= MI_FORCE);
+
+  const bool force = collect >= MI_FORCE;  
+  _mi_deferred_free(heap, force);
 
   // note: never reclaim on collect but leave it to threads that need storage to reclaim 
-  if (
-  #ifdef NDEBUG
+  const bool force_main = 
+    #ifdef NDEBUG
       collect == MI_FORCE
-  #else
+    #else
       collect >= MI_FORCE
-  #endif
-    && _mi_is_main_thread() && mi_heap_is_backing(heap) && !heap->no_reclaim)
-  {
+    #endif
+      && _mi_is_main_thread() && mi_heap_is_backing(heap) && !heap->no_reclaim;
+
+  if (force_main) {
     // the main thread is abandoned (end-of-program), try to reclaim all abandoned segments.
     // if all memory is freed by now, all segments should be freed.
     _mi_abandoned_reclaim_all(heap, &heap->tld->segments);
@@ -141,19 +144,27 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
   _mi_heap_delayed_free(heap);
 
   // collect retired pages
-  _mi_heap_collect_retired(heap, collect >= MI_FORCE);
+  _mi_heap_collect_retired(heap, force);
 
   // collect all pages owned by this thread
   mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL);
   mi_assert_internal( collect != MI_ABANDON || mi_atomic_load_ptr_acquire(mi_block_t,&heap->thread_delayed_free) == NULL );
 
-  // collect segment caches
-  if (collect >= MI_FORCE) {
+  // collect abandoned segments (in particular, decommit expired parts of segments in the abandoned segment list)
+  // note: forced decommit can be quite expensive if many threads are created/destroyed so we do not force on abandonment
+  _mi_abandoned_collect(heap, collect == MI_FORCE /* force? */, &heap->tld->segments);
+
+  // collect segment local caches
+  if (force) {
     _mi_segment_thread_collect(&heap->tld->segments);
   }
 
+  // decommit in global segment caches
+  // note: forced decommit can be quite expensive if many threads are created/destroyed so we do not force on abandonment
+  _mi_segment_cache_collect( collect == MI_FORCE, &heap->tld->os);  
+
   // collect regions on program-exit (or shared library unload)
-  if (collect >= MI_FORCE && _mi_is_main_thread() && mi_heap_is_backing(heap)) {
+  if (force && _mi_is_main_thread() && mi_heap_is_backing(heap)) {
     //_mi_mem_collect(&heap->tld->os);
   }
 }
diff --git a/src/init.c b/src/init.c
index 9e015aa8..5396f20e 100644
--- a/src/init.c
+++ b/src/init.c
@@ -466,7 +466,9 @@ static void mi_process_load(void) {
   MI_UNUSED(dummy);
   #endif
   os_preloading = false;
-  atexit(&mi_process_done);
+  #if !(defined(_WIN32) && defined(MI_SHARED_LIB))  // use Dll process detach (see below) instead of atexit (issue #521)
+  atexit(&mi_process_done);  
+  #endif
   _mi_options_init();
   mi_process_init();
   //mi_stats_reset();-
@@ -553,11 +555,13 @@ static void mi_process_done(void) {
   FlsFree(mi_fls_key);  // call thread-done on all threads (except the main thread) to prevent dangling callback pointer if statically linked with a DLL; Issue #208
   #endif
   
-  #if (MI_DEBUG != 0) || !defined(MI_SHARED_LIB)  
-  // free all memory if possible on process exit. This is not needed for a stand-alone process
-  // but should be done if mimalloc is statically linked into another shared library which
-  // is repeatedly loaded/unloaded, see issue #281.
-  mi_collect(true /* force */ );
+  #ifndef MI_SKIP_COLLECT_ON_EXIT
+    #if (MI_DEBUG != 0) || !defined(MI_SHARED_LIB)  
+    // free all memory if possible on process exit. This is not needed for a stand-alone process
+    // but should be done if mimalloc is statically linked into another shared library which
+    // is repeatedly loaded/unloaded, see issue #281.
+    mi_collect(true /* force */ );
+    #endif
   #endif
 
   if (mi_option_is_enabled(mi_option_show_stats) || mi_option_is_enabled(mi_option_verbose)) {
@@ -578,9 +582,14 @@ static void mi_process_done(void) {
     if (reason==DLL_PROCESS_ATTACH) {
       mi_process_load();
     }
-    else if (reason==DLL_THREAD_DETACH) {
-      if (!mi_is_redirected()) mi_thread_done();
+    else if (reason==DLL_PROCESS_DETACH) {
+      mi_process_done();
     }
+    else if (reason==DLL_THREAD_DETACH) {
+      if (!mi_is_redirected()) {
+        mi_thread_done();
+      }
+    }    
     return TRUE;
   }
 
@@ -599,7 +608,7 @@ static void mi_process_done(void) {
     __pragma(comment(linker, "/include:" "__mi_msvc_initu"))
   #endif
   #pragma data_seg(".CRT$XIU")
-  extern "C" _mi_crt_callback_t _mi_msvc_initu[] = { &_mi_process_init };
+  mi_decl_externc _mi_crt_callback_t _mi_msvc_initu[] = { &_mi_process_init };
   #pragma data_seg()
 
 #elif defined(__cplusplus)
diff --git a/src/options.c b/src/options.c
index 24f51f80..bd1a8c1d 100644
--- a/src/options.c
+++ b/src/options.c
@@ -49,54 +49,50 @@ typedef struct mi_option_desc_s {
   mi_init_t   init;   // is it initialized yet? (from the environment)
   mi_option_t option; // for debugging: the option index should match the option
   const char* name;   // option name without `mimalloc_` prefix
+  const char* legacy_name; // potential legacy v1.x option name
 } mi_option_desc_t;
 
-#define MI_OPTION(opt)        mi_option_##opt, #opt
-#define MI_OPTION_DESC(opt)   {0, UNINIT, MI_OPTION(opt) }
+#define MI_OPTION(opt)                  mi_option_##opt, #opt, NULL
+#define MI_OPTION_LEGACY(opt,legacy)    mi_option_##opt, #opt, #legacy
 
 static mi_option_desc_t options[_mi_option_last] =
 {
   // stable options
-#if MI_DEBUG || defined(MI_SHOW_ERRORS)
+  #if MI_DEBUG || defined(MI_SHOW_ERRORS)
   { 1, UNINIT, MI_OPTION(show_errors) },
-#else
+  #else
   { 0, UNINIT, MI_OPTION(show_errors) },
-#endif
+  #endif
   { 0, UNINIT, MI_OPTION(show_stats) },
   { 0, UNINIT, MI_OPTION(verbose) },
 
-  // the following options are experimental and not all combinations make sense.
+  // Some of the following options are experimental and not all combinations are valid. Use with care.
   { 1, UNINIT, MI_OPTION(eager_commit) },        // commit per segment directly (8MiB)  (but see also `eager_commit_delay`)
-  #if defined(_WIN32) || (MI_INTPTR_SIZE <= 4)   // and other OS's without overcommit?
-  { 0, UNINIT, MI_OPTION(eager_region_commit) },
-  { 0, UNINIT, MI_OPTION(reset_decommits) },     // reset decommits memory
-  #else
-  { 1, UNINIT, MI_OPTION(eager_region_commit) },
-  { 0, UNINIT, MI_OPTION(reset_decommits) },     // legacy; ignored now and reset always uses MADV_FREE/MADV_DONTNEED (issue #518)
-  #endif
+  { 0, UNINIT, MI_OPTION(deprecated_eager_region_commit) },
+  { 0, UNINIT, MI_OPTION(deprecated_reset_decommits) },
   { 0, UNINIT, MI_OPTION(large_os_pages) },      // use large OS pages, use only with eager commit to prevent fragmentation of VMA's
   { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) },  // per 1GiB huge pages
   { -1, UNINIT, MI_OPTION(reserve_huge_os_pages_at) }, // reserve huge pages at node N
   { 0, UNINIT, MI_OPTION(reserve_os_memory)     },
   { 0, UNINIT, MI_OPTION(segment_cache) },       // cache N segments per thread
   { 0, UNINIT, MI_OPTION(page_reset) },          // reset page memory on free
-  { 0, UNINIT, MI_OPTION(abandoned_page_reset) },// reset free page memory when a thread terminates
-  { 0, UNINIT, MI_OPTION(segment_reset) },       // reset segment memory on free (needs eager commit)
-#if defined(__NetBSD__)
+  { 0, UNINIT, MI_OPTION_LEGACY(abandoned_page_decommit, abandoned_page_reset) },// decommit free page memory when a thread terminates  
+  { 0, UNINIT, MI_OPTION(deprecated_segment_reset) },
+  #if defined(__NetBSD__)
   { 0, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed
-#elif defined(_WIN32)
+  #elif defined(_WIN32)
   { 4, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed (but per page in the segment on demand)
-#else
+  #else
   { 1, UNINIT, MI_OPTION(eager_commit_delay) },  // the first N segments per thread are not eagerly committed (but per page in the segment on demand)
-#endif
-  { 1,    UNINIT, MI_OPTION(allow_decommit) },    // decommit slices when no longer used (after reset_delay milli-seconds)
-  { 25,   UNINIT, MI_OPTION(reset_delay) },       // page reset delay in milli-seconds (= decommit)
-  { 500,  UNINIT, MI_OPTION(segment_decommit_delay) },// decommit delay in milli-seconds for freed segments
+  #endif
+  { 25,   UNINIT, MI_OPTION_LEGACY(decommit_delay, reset_delay) }, // page decommit delay in milli-seconds
   { 0,    UNINIT, MI_OPTION(use_numa_nodes) },    // 0 = use available numa nodes, otherwise use at most N nodes. 
   { 0,    UNINIT, MI_OPTION(limit_os_alloc) },    // 1 = do not use OS memory for allocation (but only reserved arenas)
   { 100,  UNINIT, MI_OPTION(os_tag) },            // only apple specific for now but might serve more or less related purpose
   { 16,   UNINIT, MI_OPTION(max_errors) },        // maximum errors that are output
-  { 16,   UNINIT, MI_OPTION(max_warnings) }       // maximum warnings that are output
+  { 16,   UNINIT, MI_OPTION(max_warnings) },      // maximum warnings that are output
+  { 1,    UNINIT, MI_OPTION(allow_decommit) },    // decommit slices when no longer used (after decommit_delay milli-seconds)
+  { 500,  UNINIT, MI_OPTION(segment_decommit_delay) } // decommit delay in milli-seconds for freed segments
 };
 
 static void mi_option_init(mi_option_desc_t* desc);
@@ -597,11 +593,21 @@ static bool mi_getenv(const char* name, char* result, size_t result_size) {
 
 static void mi_option_init(mi_option_desc_t* desc) {  
   // Read option value from the environment
+  char s[64+1];
   char buf[64+1];
   mi_strlcpy(buf, "mimalloc_", sizeof(buf));
   mi_strlcat(buf, desc->name, sizeof(buf));
-  char s[64+1];
-  if (mi_getenv(buf, s, sizeof(s))) {
+  bool found = mi_getenv(buf,s,sizeof(s));
+  if (!found && desc->legacy_name != NULL) {
+    mi_strlcpy(buf, "mimalloc_", sizeof(buf));
+    mi_strlcat(buf, desc->legacy_name, sizeof(buf));
+    found = mi_getenv(buf,s,sizeof(s));
+    if (found) {
+      _mi_warning_message("environment option \"mimalloc_%s\" is deprecated -- use \"mimalloc_%s\" instead.\n", desc->legacy_name, desc->name );
+    }    
+  }
+
+  if (found) {
     size_t len = strlen(s);
     if (len >= sizeof(buf)) len = sizeof(buf) - 1;
     for (size_t i = 0; i < len; i++) {
diff --git a/src/page-queue.c b/src/page-queue.c
index a109df43..92f933c2 100644
--- a/src/page-queue.c
+++ b/src/page-queue.c
@@ -53,7 +53,7 @@ static inline bool mi_page_queue_is_special(const mi_page_queue_t* pq) {
 // Returns MI_BIN_HUGE if the size is too large.
 // We use `wsize` for the size in "machine word sizes",
 // i.e. byte size == `wsize*sizeof(void*)`.
-extern inline uint8_t _mi_bin(size_t size) {
+static inline uint8_t mi_bin(size_t size) {
   size_t wsize = _mi_wsize_from_size(size);
   uint8_t bin;
   if (wsize <= 1) {
@@ -98,6 +98,10 @@ extern inline uint8_t _mi_bin(size_t size) {
   Queue of pages with free blocks
 ----------------------------------------------------------- */
 
+uint8_t _mi_bin(size_t size) {
+  return mi_bin(size);
+}
+
 size_t _mi_bin_size(uint8_t bin) {
   return _mi_heap_empty.pages[bin].block_size;
 }
@@ -105,7 +109,7 @@ size_t _mi_bin_size(uint8_t bin) {
 // Good size for allocation
 size_t mi_good_size(size_t size) mi_attr_noexcept {
   if (size <= MI_MEDIUM_OBJ_SIZE_MAX) {
-    return _mi_bin_size(_mi_bin(size));
+    return _mi_bin_size(mi_bin(size));
   }
   else {
     return _mi_align_up(size,_mi_os_page_size());
@@ -134,7 +138,7 @@ static bool mi_heap_contains_queue(const mi_heap_t* heap, const mi_page_queue_t*
 #endif
 
 static mi_page_queue_t* mi_page_queue_of(const mi_page_t* page) {
-  uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : _mi_bin(page->xblock_size));
+  uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : mi_bin(page->xblock_size));
   mi_heap_t* heap = mi_page_heap(page);
   mi_assert_internal(heap != NULL && bin <= MI_BIN_FULL);
   mi_page_queue_t* pq = &heap->pages[bin];
@@ -144,7 +148,7 @@ static mi_page_queue_t* mi_page_queue_of(const mi_page_t* page) {
 }
 
 static mi_page_queue_t* mi_heap_page_queue_of(mi_heap_t* heap, const mi_page_t* page) {
-  uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : _mi_bin(page->xblock_size));
+  uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : mi_bin(page->xblock_size));
   mi_assert_internal(bin <= MI_BIN_FULL);
   mi_page_queue_t* pq = &heap->pages[bin];
   mi_assert_internal(mi_page_is_in_full(page) || page->xblock_size == pq->block_size);
@@ -177,9 +181,9 @@ static inline void mi_heap_queue_first_update(mi_heap_t* heap, const mi_page_que
   }
   else {
     // find previous size; due to minimal alignment upto 3 previous bins may need to be skipped
-    uint8_t bin = _mi_bin(size);
+    uint8_t bin = mi_bin(size);
     const mi_page_queue_t* prev = pq - 1;
-    while( bin == _mi_bin(prev->block_size) && prev > &heap->pages[0]) {
+    while( bin == mi_bin(prev->block_size) && prev > &heap->pages[0]) {
       prev--;
     }
     start = 1 + _mi_wsize_from_size(prev->block_size);
diff --git a/src/page.c b/src/page.c
index fe1cbe23..2d9a7033 100644
--- a/src/page.c
+++ b/src/page.c
@@ -587,14 +587,17 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld)
   // calculate the extend count
   const size_t bsize = (page->xblock_size < MI_HUGE_BLOCK_SIZE ? page->xblock_size : page_size);
   size_t extend = page->reserved - page->capacity;
-  size_t max_extend = (bsize >= MI_MAX_EXTEND_SIZE ? MI_MIN_EXTEND : MI_MAX_EXTEND_SIZE/(uint32_t)bsize);
-  if (max_extend < MI_MIN_EXTEND) max_extend = MI_MIN_EXTEND;
+  mi_assert_internal(extend > 0);
 
+  size_t max_extend = (bsize >= MI_MAX_EXTEND_SIZE ? MI_MIN_EXTEND : MI_MAX_EXTEND_SIZE/(uint32_t)bsize);
+  if (max_extend < MI_MIN_EXTEND) { max_extend = MI_MIN_EXTEND; }
+  mi_assert_internal(max_extend > 0);
+    
   if (extend > max_extend) {
     // ensure we don't touch memory beyond the page to reduce page commit.
     // the `lean` benchmark tests this. Going from 1 to 8 increases rss by 50%.
-    extend = (max_extend==0 ? 1 : max_extend);
-  }  
+    extend = max_extend;
+  }
 
   mi_assert_internal(extend > 0 && extend + page->capacity <= page->reserved);
   mi_assert_internal(extend < (1UL<<16));
@@ -783,7 +786,7 @@ void mi_register_deferred_free(mi_deferred_free_fun* fn, void* arg) mi_attr_noex
 // that frees the block can free the whole page and segment directly.
 static mi_page_t* mi_large_huge_page_alloc(mi_heap_t* heap, size_t size) {
   size_t block_size = _mi_os_good_alloc_size(size);
-  mi_assert_internal(_mi_bin(block_size) == MI_BIN_HUGE);
+  mi_assert_internal(mi_bin(block_size) == MI_BIN_HUGE);
   bool is_huge = (block_size > MI_LARGE_OBJ_SIZE_MAX);
   mi_page_queue_t* pq = (is_huge ? NULL : mi_page_queue(heap, block_size));
   mi_page_t* page = mi_page_fresh_alloc(heap, pq, block_size);
diff --git a/src/random.c b/src/random.c
index e47946a6..0b44c8b9 100644
--- a/src/random.c
+++ b/src/random.c
@@ -239,7 +239,7 @@ static bool os_random_buf(void* buf, size_t buf_len) {
   if (mi_atomic_load_acquire(&no_getrandom)==0) {
     ssize_t ret = syscall(SYS_getrandom, buf, buf_len, GRND_NONBLOCK);
     if (ret >= 0) return (buf_len == (size_t)ret);
-    if (ret != ENOSYS) return false;
+    if (errno != ENOSYS) return false;
     mi_atomic_store_release(&no_getrandom, 1UL); // don't call again, and fall back to /dev/urandom
   }
 #endif
diff --git a/src/segment-cache.c b/src/segment-cache.c
index cabdec8f..93908c8f 100644
--- a/src/segment-cache.c
+++ b/src/segment-cache.c
@@ -115,24 +115,26 @@ static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, vo
 
 #define MI_MAX_PURGE_PER_PUSH  (4)
 
-static mi_decl_noinline void mi_segment_cache_purge(mi_os_tld_t* tld)
+static mi_decl_noinline void mi_segment_cache_purge(bool force, mi_os_tld_t* tld)
 {
   MI_UNUSED(tld);
+  if (!mi_option_is_enabled(mi_option_allow_decommit)) return;
   mi_msecs_t now = _mi_clock_now();
-  size_t idx = (_mi_random_shuffle((uintptr_t)now) % MI_CACHE_MAX);            // random start
   size_t purged = 0;
-  for (size_t visited = 0; visited < MI_CACHE_FIELDS; visited++,idx++) {  // probe just N slots
+  const size_t max_visits = (force ? MI_CACHE_MAX /* visit all */ : MI_CACHE_FIELDS /* probe at most N (=16) slots */);
+  size_t idx              = (force ? 0 : _mi_random_shuffle((uintptr_t)now) % MI_CACHE_MAX /* random start */ );
+  for (size_t visited = 0; visited < max_visits; visited++,idx++) {  // visit N slots
     if (idx >= MI_CACHE_MAX) idx = 0; // wrap
     mi_cache_slot_t* slot = &cache[idx];
     mi_msecs_t expire = mi_atomic_loadi64_relaxed(&slot->expire);
-    if (expire != 0 && now >= expire) {  // racy read
+    if (expire != 0 && (force || now >= expire)) {  // racy read
       // seems expired, first claim it from available
       purged++;
       mi_bitmap_index_t bitidx = mi_bitmap_index_create_from_bit(idx);
       if (_mi_bitmap_claim(cache_available, MI_CACHE_FIELDS, 1, bitidx, NULL)) {
         // was available, we claimed it
         expire = mi_atomic_loadi64_acquire(&slot->expire);
-        if (expire != 0 && now >= expire) {  // safe read
+        if (expire != 0 && (force || now >= expire)) {  // safe read
           // still expired, decommit it
           mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0);
           mi_assert_internal(!mi_commit_mask_is_empty(&slot->commit_mask) && _mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx));
@@ -144,11 +146,15 @@ static mi_decl_noinline void mi_segment_cache_purge(mi_os_tld_t* tld)
         }
         _mi_bitmap_unclaim(cache_available, MI_CACHE_FIELDS, 1, bitidx); // make it available again for a pop
       }
-      if (purged > MI_MAX_PURGE_PER_PUSH) break;  // bound to no more than N purge tries per push
+      if (!force && purged > MI_MAX_PURGE_PER_PUSH) break;  // bound to no more than N purge tries per push
     }
   }
 }
 
+void _mi_segment_cache_collect(bool force, mi_os_tld_t* tld) {
+  mi_segment_cache_purge(force, tld );
+}
+
 mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* decommit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld)
 {
 #ifdef MI_CACHE_DISABLE
@@ -167,7 +173,7 @@ mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t me
   }
 
   // purge expired entries
-  mi_segment_cache_purge(tld);
+  mi_segment_cache_purge(false /* force? */, tld);
 
   // find an available slot
   mi_bitmap_index_t bitidx;
diff --git a/src/segment.c b/src/segment.c
index 3001f160..e9d30510 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -538,7 +538,7 @@ static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, s
   }
   // increase expiration of reusing part of the delayed decommit
   if (commit && mi_commit_mask_any_set(&segment->decommit_mask, &mask)) {
-    segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_reset_delay);
+    segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_decommit_delay);
   }
   // always undo delayed decommits
   mi_commit_mask_clear(&segment->decommit_mask, &mask);
@@ -554,7 +554,7 @@ static bool mi_segment_ensure_committed(mi_segment_t* segment, uint8_t* p, size_
 
 static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) {
   if (!segment->allow_decommit) return;
-  if (mi_option_get(mi_option_reset_delay) == 0) {
+  if (mi_option_get(mi_option_decommit_delay) == 0) {
     mi_segment_commitx(segment, false, p, size, stats);
   }
   else {
@@ -569,21 +569,20 @@ static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_
     mi_commit_mask_t cmask;
     mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask);  // only decommit what is committed; span_free may try to decommit more
     mi_commit_mask_set(&segment->decommit_mask, &cmask);
-    segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_reset_delay);
-    mi_msecs_t now = _mi_clock_now();
+    mi_msecs_t now = _mi_clock_now();    
     if (segment->decommit_expire == 0) {
       // no previous decommits, initialize now
       mi_assert_internal(mi_commit_mask_is_empty(&segment->decommit_mask));
-      segment->decommit_expire = now + mi_option_get(mi_option_reset_delay);
+      segment->decommit_expire = now + mi_option_get(mi_option_decommit_delay);
     }
     else if (segment->decommit_expire <= now) {
       // previous decommit mask already expired
       // mi_segment_delayed_decommit(segment, true, stats);
-      segment->decommit_expire = now + (mi_option_get(mi_option_reset_delay) / 8); // wait a tiny bit longer in case there is a series of free's
+      segment->decommit_expire = now + (mi_option_get(mi_option_decommit_delay) / 8); // wait a tiny bit longer in case there is a series of free's
     }
     else {
-      // previous decommit mask is not yet expired
-      // segment->decommit_expire += 2; // = now + mi_option_get(mi_option_reset_delay);
+      // previous decommit mask is not yet expired, increase the expiration by a bit.
+      segment->decommit_expire += (mi_option_get(mi_option_decommit_delay) / 8);
     }
   }  
 }
@@ -877,7 +876,7 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_
     segment->commit_mask = commit_mask; // on lazy commit, the initial part is always committed
     segment->allow_decommit = (mi_option_is_enabled(mi_option_allow_decommit) && !segment->mem_is_pinned && !segment->mem_is_large);    
     if (segment->allow_decommit) {
-      segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_reset_delay);
+      segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_decommit_delay);
       segment->decommit_mask = decommit_mask;
       mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask));
       #if MI_DEBUG>2
@@ -1050,7 +1049,7 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld)
 Abandonment
 
 When threads terminate, they can leave segments with
-live blocks (reached through other threads). Such segments
+live blocks (reachable through other threads). Such segments
 are "abandoned" and will be reclaimed by other threads to
 reuse their pages and/or free them eventually
 
@@ -1065,11 +1064,11 @@ or decommitting segments that have a pending read operation.
 
 Note: the current implementation is one possible design;
 another way might be to keep track of abandoned segments
-in the regions. This would have the advantage of keeping
+in the arenas/segment_cache's. This would have the advantage of keeping
 all concurrent code in one place and not needing to deal
 with ABA issues. The drawback is that it is unclear how to
 scan abandoned segments efficiently in that case as they
-would be spread among all other segments in the regions.
+would be spread among all other segments in the arenas.
 ----------------------------------------------------------- */
 
 // Use the bottom 20-bits (on 64-bit) of the aligned segment pointers
@@ -1245,7 +1244,7 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
   }
 
   // perform delayed decommits
-  mi_segment_delayed_decommit(segment, mi_option_is_enabled(mi_option_abandoned_page_reset) /* force? */, tld->stats);    
+  mi_segment_delayed_decommit(segment, mi_option_is_enabled(mi_option_abandoned_page_decommit) /* force? */, tld->stats);    
   
   // all pages in the segment are abandoned; add it to the abandoned list
   _mi_stat_increase(&tld->stats->segments_abandoned, 1);
@@ -1431,7 +1430,7 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice
     }
     else {
       // otherwise, push on the visited list so it gets not looked at too quickly again
-      mi_segment_delayed_decommit(segment, true, tld->stats); // decommit if needed
+      mi_segment_delayed_decommit(segment, true /* force? */, tld->stats); // forced decommit if needed as we may not visit soon again
       mi_abandoned_visited_push(segment);
     }
   }
@@ -1439,6 +1438,30 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice
 }
 
 
+void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld)
+{
+  mi_segment_t* segment;
+  int max_tries = (force ? 16*1024 : 1024); // limit latency
+  if (force) {
+    mi_abandoned_visited_revisit(); 
+  }
+  while ((max_tries-- > 0) && ((segment = mi_abandoned_pop()) != NULL)) {
+    mi_segment_check_free(segment,0,0,tld); // try to free up pages (due to concurrent frees)
+    if (segment->used == 0) {
+      // free the segment (by forced reclaim) to make it available to other threads.
+      // note: we could in principle optimize this by skipping reclaim and directly
+      // freeing but that would violate some invariants temporarily)
+      mi_segment_reclaim(segment, heap, 0, NULL, tld);
+    }
+    else {
+      // otherwise, decommit if needed and push on the visited list 
+      // note: forced decommit can be expensive if many threads are destroyed/created as in mstress.
+      mi_segment_delayed_decommit(segment, force, tld->stats);
+      mi_abandoned_visited_push(segment);
+    }
+  }
+}
+
 /* -----------------------------------------------------------
    Reclaim or allocate
 ----------------------------------------------------------- */
diff --git a/test/test-stress.c b/test/test-stress.c
index 938315be..15df0e3c 100644
--- a/test/test-stress.c
+++ b/test/test-stress.c
@@ -190,11 +190,13 @@ static void test_stress(void) {
         free_items(p);
       }
     }
+    #ifndef NDEBUG
     //mi_collect(false);
-    //mi_debug_show_arenas();    
-#if !defined(NDEBUG) || defined(MI_TSAN)
+    //mi_debug_show_arenas();
+    #endif    
+    #if !defined(NDEBUG) || defined(MI_TSAN)
     if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); }
-#endif
+    #endif
   }
 }