diff --git a/readme.md b/readme.md index 2821802e..9419a658 100644 --- a/readme.md +++ b/readme.md @@ -11,7 +11,7 @@ mimalloc (pronounced "me-malloc") is a general purpose allocator with excellent [performance](#performance) characteristics. Initially developed by Daan Leijen for the run-time systems of the [Koka](https://github.com/koka-lang/koka) and [Lean](https://github.com/leanprover/lean) languages. -Latest release:`v1.6.6` (2020-09-24). +Latest release:`v1.6.7` (2020-09-24). It is a drop-in replacement for `malloc` and can be used in other programs without code changes, for example, on dynamically linked ELF-based systems (Linux, BSD, etc.) you can use it as: @@ -73,8 +73,8 @@ Enjoy! ### Releases -* 2020-09-24, `v1.6.6`: stable release 1.6: using standard C atomics, passing tsan testing, improved - handling of failing to commit on Windows, add `mi_process_info` api call. +* 2020-09-24, `v1.6.7`: stable release 1.6: using standard C atomics, passing tsan testing, improved + handling of failing to commit on Windows, add [`mi_process_info`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc.h#L156) api call. * 2020-08-06, `v1.6.4`: stable release 1.6: improved error recovery in low-memory situations, support for IllumOS and Haiku, NUMA support for Vista/XP, improved NUMA detection for AMD Ryzen, ubsan support. * 2020-05-05, `v1.6.3`: stable release 1.6: improved behavior in out-of-memory situations, improved malloc zones on macOS, diff --git a/src/alloc.c b/src/alloc.c index 2cef8bcd..8e863a67 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -448,8 +448,7 @@ void mi_free(void* p) mi_attr_noexcept #endif mi_block_set_next(page, block, page->local_free); page->local_free = block; - page->used--; - if (mi_unlikely(mi_page_all_free(page))) { + if (mi_unlikely(--page->used == 0)) { // using this expression generates better code than: page->used--; if (mi_page_all_free(page)) _mi_page_retire(page); } } diff --git a/src/region.c b/src/region.c index 8afa80a2..663859c8 100644 --- a/src/region.c +++ b/src/region.c @@ -243,7 +243,7 @@ static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, boo static bool mi_region_try_claim(int numa_node, size_t blocks, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld) { // try all regions for a free slot - const size_t count = mi_atomic_load_acquire(®ions_count); + const size_t count = mi_atomic_load_relaxed(®ions_count); // monotonic, so ok to be relaxed size_t idx = tld->region_idx; // Or start at 0 to reuse low addresses? Starting at 0 seems to increase latency though for (size_t visited = 0; visited < count; visited++, idx++) { if (idx >= count) idx = 0; // wrap around