diff --git a/docs/overrides.html b/docs/overrides.html
index 0e7fd0ec..fc0ad591 100644
--- a/docs/overrides.html
+++ b/docs/overrides.html
@@ -118,7 +118,7 @@ $(document).ready(function(){initNavTree('overrides.html',''); initResizable();
Windows
Overriding on Windows is robust and has the particular advantage to be able to redirect all malloc/free calls that go through the (dynamic) C runtime allocator, including those from other DLL's or libraries.
The overriding on Windows requires that you link your program explicitly with the mimalloc DLL and use the C-runtime library as a DLL (using the /MD
or /MDd
switch). Also, the mimalloc-redirect.dll
(or mimalloc-redirect32.dll
) must be available in the same folder as the main mimalloc-override.dll
at runtime (as it is a dependency). The redirection DLL ensures that all calls to the C runtime malloc API get redirected to mimalloc (in mimalloc-override.dll
).
-To ensure the mimalloc DLL is loaded at run-time it is easiest to insert some call to the mimalloc API in the main
function, like mi_version()
(or use the /INCLUDE:mi_version
switch on the linker). See the mimalloc-override-test
project for an example on how to use this. For best performance on Windows with C++, it is also recommended to also override the new
/delete
operations (by including mimalloc-new-delete.h
a single(!) source file in your project).
+To ensure the mimalloc DLL is loaded at run-time it is easiest to insert some call to the mimalloc API in the main
function, like mi_version()
(or use the /INCLUDE:mi_version
switch on the linker). See the mimalloc-override-test
project for an example on how to use this. For best performance on Windows with C++, it is also recommended to also override the new
/delete
operations (by including mimalloc-new-delete.h
a single(!) source file in your project without linking to the mimalloc library).
The environment variable MIMALLOC_DISABLE_REDIRECT=1
can be used to disable dynamic overriding at run-time. Use MIMALLOC_VERBOSE=1
to check if mimalloc was successfully redirected.
(Note: in principle, it is possible to even patch existing executables without any recompilation if they are linked with the dynamic C runtime (ucrtbase.dll
) – just put the mimalloc-override.dll
into the import table (and put mimalloc-redirect.dll
in the same folder) Such patching can be done for example with CFF Explorer).
Static override
diff --git a/docs/using.html b/docs/using.html
index e6aad1a2..140f0c5c 100644
--- a/docs/using.html
+++ b/docs/using.html
@@ -105,7 +105,7 @@ $(document).ready(function(){initNavTree('using.html',''); initResizable(); });
to link with the shared (dynamic) library, or:
target_link_libraries(myapp PUBLIC mimalloc-static)
to link with the static library. See test\CMakeLists.txt
for an example.
C++
-For best performance in C++ programs, it is also recommended to override the global new
and delete
operators. For convience, mimalloc provides mimalloc-new-delete.h
which does this for you – just include it in a single(!) source file in your project.
+For best performance in C++ programs, it is also recommended to override the global new
and delete
operators. For convience, mimalloc provides mimalloc-new-delete.h
which does this for you – just include it in a single(!) source file in your project without linking to the mimalloc's library.
In C++, mimalloc also provides the mi_stl_allocator
struct which implements the std::allocator
interface. For example:
std::vector<some_struct, mi_stl_allocator<some_struct>> vec;
vec.push_back(some_struct());
Statistics
diff --git a/include/mimalloc-atomic.h b/include/mimalloc-atomic.h
index e07df84d..7ad5da58 100644
--- a/include/mimalloc-atomic.h
+++ b/include/mimalloc-atomic.h
@@ -23,10 +23,15 @@ terms of the MIT license. A copy of the license can be found in the file
#define _Atomic(tp) std::atomic
#define mi_atomic(name) std::atomic_##name
#define mi_memory_order(name) std::memory_order_##name
+#if !defined(ATOMIC_VAR_INIT) || (__cplusplus >= 202002L) // c++20, see issue #571
+ #define MI_ATOMIC_VAR_INIT(x) x
+#else
+ #define MI_ATOMIC_VAR_INIT(x) ATOMIC_VAR_INIT(x)
+#endif
#elif defined(_MSC_VER)
// Use MSVC C wrapper for C11 atomics
#define _Atomic(tp) tp
-#define ATOMIC_VAR_INIT(x) x
+#define MI_ATOMIC_VAR_INIT(x) x
#define mi_atomic(name) mi_atomic_##name
#define mi_memory_order(name) mi_memory_order_##name
#else
@@ -34,6 +39,7 @@ terms of the MIT license. A copy of the license can be found in the file
#include
#define mi_atomic(name) atomic_##name
#define mi_memory_order(name) memory_order_##name
+#define MI_ATOMIC_VAR_INIT(x) ATOMIC_VAR_INIT(x)
#endif
// Various defines for all used memory orders in mimalloc
diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index f793fcb2..3a78f355 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -154,8 +154,8 @@ mi_msecs_t _mi_clock_start(void);
// "alloc.c"
void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept; // called from `_mi_malloc_generic`
-void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero);
-void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero);
+void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept;
+void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) mi_attr_noexcept;
mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p);
bool _mi_free_delayed_block(mi_block_t* block);
void _mi_block_zero_init(const mi_page_t* page, void* p, size_t size);
@@ -1043,7 +1043,7 @@ static inline void _mi_memcpy_aligned(void* dst, const void* src, size_t n) {
mi_assert_internal(((uintptr_t)dst % MI_INTPTR_SIZE == 0) && ((uintptr_t)src % MI_INTPTR_SIZE == 0));
void* adst = __builtin_assume_aligned(dst, MI_INTPTR_SIZE);
const void* asrc = __builtin_assume_aligned(src, MI_INTPTR_SIZE);
- memcpy(adst, asrc, n);
+ _mi_memcpy(adst, asrc, n);
}
#else
// Default fallback on `_mi_memcpy`
diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 94bb5d30..5a486c65 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -614,9 +614,6 @@ typedef struct mi_segments_tld_s {
size_t peak_count; // peak number of segments
size_t current_size; // current size of all segments
size_t peak_size; // peak size of all segments
- size_t cache_count; // number of segments in the cache
- size_t cache_size; // total size of all segments in the cache
- mi_segment_t* cache; // (small) cache of segments
mi_stats_t* stats; // points to tld stats
mi_os_tld_t* os; // points to os stats
} mi_segments_tld_t;
diff --git a/include/mimalloc.h b/include/mimalloc.h
index 5e4c3743..58ecaa67 100644
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@@ -256,6 +256,7 @@ typedef struct mi_heap_area_s {
size_t committed; // current available bytes for this area
size_t used; // number of allocated blocks
size_t block_size; // size in bytes of each block
+ size_t full_block_size; // size in bytes of a full block including padding and metadata.
} mi_heap_area_t;
typedef bool (mi_cdecl mi_block_visit_fun)(const mi_heap_t* heap, const mi_heap_area_t* area, void* block, size_t block_size, void* arg);
@@ -315,7 +316,7 @@ typedef enum mi_option_e {
mi_option_reserve_huge_os_pages, // reserve N huge OS pages (1GiB) at startup
mi_option_reserve_huge_os_pages_at, // reserve huge OS pages at a specific NUMA node
mi_option_reserve_os_memory, // reserve specified amount of OS memory at startup
- mi_option_segment_cache,
+ mi_option_deprecated_segment_cache,
mi_option_page_reset,
mi_option_abandoned_page_decommit,
mi_option_deprecated_segment_reset,
diff --git a/src/alloc-override.c b/src/alloc-override.c
index 12e9e0d6..e29cb4b2 100644
--- a/src/alloc-override.c
+++ b/src/alloc-override.c
@@ -166,8 +166,8 @@ typedef struct mi_nothrow_s { int _tag; } mi_nothrow_t;
void operator delete[](void* p, std::align_val_t al) noexcept { mi_free_aligned(p, static_cast(al)); }
void operator delete (void* p, std::size_t n, std::align_val_t al) noexcept { mi_free_size_aligned(p, n, static_cast(al)); };
void operator delete[](void* p, std::size_t n, std::align_val_t al) noexcept { mi_free_size_aligned(p, n, static_cast(al)); };
- void operator delete (void* p, std::align_val_t al, const std::nothrow_t& tag) noexcept { mi_free_aligned(p, static_cast(al)); }
- void operator delete[](void* p, std::align_val_t al, const std::nothrow_t& tag) noexcept { mi_free_aligned(p, static_cast(al)); }
+ void operator delete (void* p, std::align_val_t al, const std::nothrow_t&) noexcept { mi_free_aligned(p, static_cast(al)); }
+ void operator delete[](void* p, std::align_val_t al, const std::nothrow_t&) noexcept { mi_free_aligned(p, static_cast(al)); }
void* operator new( std::size_t n, std::align_val_t al) noexcept(false) { return mi_new_aligned(n, static_cast(al)); }
void* operator new[]( std::size_t n, std::align_val_t al) noexcept(false) { return mi_new_aligned(n, static_cast(al)); }
diff --git a/src/alloc-posix.c b/src/alloc-posix.c
index ee5babe1..176e7ec3 100644
--- a/src/alloc-posix.c
+++ b/src/alloc-posix.c
@@ -32,17 +32,17 @@ terms of the MIT license. A copy of the license can be found in the file
#endif
-size_t mi_malloc_size(const void* p) mi_attr_noexcept {
+mi_decl_nodiscard size_t mi_malloc_size(const void* p) mi_attr_noexcept {
//if (!mi_is_in_heap_region(p)) return 0;
return mi_usable_size(p);
}
-size_t mi_malloc_usable_size(const void *p) mi_attr_noexcept {
+mi_decl_nodiscard size_t mi_malloc_usable_size(const void *p) mi_attr_noexcept {
//if (!mi_is_in_heap_region(p)) return 0;
return mi_usable_size(p);
}
-size_t mi_malloc_good_size(size_t size) mi_attr_noexcept {
+mi_decl_nodiscard size_t mi_malloc_good_size(size_t size) mi_attr_noexcept {
return mi_good_size(size);
}
@@ -65,24 +65,24 @@ int mi_posix_memalign(void** p, size_t alignment, size_t size) mi_attr_noexcept
return 0;
}
-mi_decl_restrict void* mi_memalign(size_t alignment, size_t size) mi_attr_noexcept {
+mi_decl_nodiscard mi_decl_restrict void* mi_memalign(size_t alignment, size_t size) mi_attr_noexcept {
void* p = mi_malloc_aligned(size, alignment);
mi_assert_internal(((uintptr_t)p % alignment) == 0);
return p;
}
-mi_decl_restrict void* mi_valloc(size_t size) mi_attr_noexcept {
+mi_decl_nodiscard mi_decl_restrict void* mi_valloc(size_t size) mi_attr_noexcept {
return mi_memalign( _mi_os_page_size(), size );
}
-mi_decl_restrict void* mi_pvalloc(size_t size) mi_attr_noexcept {
+mi_decl_nodiscard mi_decl_restrict void* mi_pvalloc(size_t size) mi_attr_noexcept {
size_t psize = _mi_os_page_size();
if (size >= SIZE_MAX - psize) return NULL; // overflow
size_t asize = _mi_align_up(size, psize);
return mi_malloc_aligned(asize, psize);
}
-mi_decl_restrict void* mi_aligned_alloc(size_t alignment, size_t size) mi_attr_noexcept {
+mi_decl_nodiscard mi_decl_restrict void* mi_aligned_alloc(size_t alignment, size_t size) mi_attr_noexcept {
if (mi_unlikely((size&(alignment-1)) != 0)) { // C11 requires alignment>0 && integral multiple, see
#if MI_DEBUG > 0
_mi_error_message(EOVERFLOW, "(mi_)aligned_alloc requires the size to be an integral multiple of the alignment (size %zu, alignment %zu)\n", size, alignment);
@@ -95,13 +95,13 @@ mi_decl_restrict void* mi_aligned_alloc(size_t alignment, size_t size) mi_attr_n
return p;
}
-void* mi_reallocarray( void* p, size_t count, size_t size ) mi_attr_noexcept { // BSD
+mi_decl_nodiscard void* mi_reallocarray( void* p, size_t count, size_t size ) mi_attr_noexcept { // BSD
void* newp = mi_reallocn(p,count,size);
if (newp==NULL) { errno = ENOMEM; }
return newp;
}
-int mi_reallocarr( void* p, size_t count, size_t size ) mi_attr_noexcept { // NetBSD
+mi_decl_nodiscard int mi_reallocarr( void* p, size_t count, size_t size ) mi_attr_noexcept { // NetBSD
mi_assert(p != NULL);
if (p == NULL) {
errno = EINVAL;
@@ -120,7 +120,7 @@ void* mi__expand(void* p, size_t newsize) mi_attr_noexcept { // Microsoft
return res;
}
-mi_decl_restrict unsigned short* mi_wcsdup(const unsigned short* s) mi_attr_noexcept {
+mi_decl_nodiscard mi_decl_restrict unsigned short* mi_wcsdup(const unsigned short* s) mi_attr_noexcept {
if (s==NULL) return NULL;
size_t len;
for(len = 0; s[len] != 0; len++) { }
@@ -132,7 +132,7 @@ mi_decl_restrict unsigned short* mi_wcsdup(const unsigned short* s) mi_attr_noex
return p;
}
-mi_decl_restrict unsigned char* mi_mbsdup(const unsigned char* s) mi_attr_noexcept {
+mi_decl_nodiscard mi_decl_restrict unsigned char* mi_mbsdup(const unsigned char* s) mi_attr_noexcept {
return (unsigned char*)mi_strdup((const char*)s);
}
@@ -172,10 +172,10 @@ int mi_wdupenv_s(unsigned short** buf, size_t* size, const unsigned short* name)
#endif
}
-void* mi_aligned_offset_recalloc(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { // Microsoft
+mi_decl_nodiscard void* mi_aligned_offset_recalloc(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { // Microsoft
return mi_recalloc_aligned_at(p, newcount, size, alignment, offset);
}
-void* mi_aligned_recalloc(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept { // Microsoft
+mi_decl_nodiscard void* mi_aligned_recalloc(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept { // Microsoft
return mi_recalloc_aligned(p, newcount, size, alignment);
}
diff --git a/src/alloc.c b/src/alloc.c
index 92f04daf..d7e8aac8 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -165,7 +165,7 @@ mi_decl_restrict void* mi_zalloc_small(size_t size) mi_attr_noexcept {
return p;
}
-void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) {
+void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept {
void* p = mi_heap_malloc(heap,size);
if (zero && p != NULL) {
_mi_block_zero_init(_mi_ptr_page(p),p,size); // todo: can we avoid getting the page again?
@@ -613,20 +613,25 @@ bool _mi_free_delayed_block(mi_block_t* block) {
}
// Bytes available in a block
-static size_t _mi_usable_size(const void* p, const char* msg) mi_attr_noexcept {
- const mi_segment_t* const segment = mi_checked_ptr_segment(p,msg);
- if (segment==NULL) return 0;
- const mi_page_t* const page = _mi_segment_page_of(segment, p);
- const mi_block_t* block = (const mi_block_t*)p;
- if (mi_unlikely(mi_page_has_aligned(page))) {
- block = _mi_page_ptr_unalign(segment, page, p);
- size_t size = mi_page_usable_size_of(page, block);
- ptrdiff_t const adjust = (uint8_t*)p - (uint8_t*)block;
- mi_assert_internal(adjust >= 0 && (size_t)adjust <= size);
- return (size - adjust);
+mi_decl_noinline static size_t mi_page_usable_aligned_size_of(const mi_segment_t* segment, const mi_page_t* page, const void* p) mi_attr_noexcept {
+ const mi_block_t* block = _mi_page_ptr_unalign(segment, page, p);
+ const size_t size = mi_page_usable_size_of(page, block);
+ const ptrdiff_t adjust = (uint8_t*)p - (uint8_t*)block;
+ mi_assert_internal(adjust >= 0 && (size_t)adjust <= size);
+ return (size - adjust);
+}
+
+static inline size_t _mi_usable_size(const void* p, const char* msg) mi_attr_noexcept {
+ const mi_segment_t* const segment = mi_checked_ptr_segment(p, msg);
+ if (segment==NULL) return 0; // also returns 0 if `p == NULL`
+ const mi_page_t* const page = _mi_segment_page_of(segment, p);
+ if (mi_likely(!mi_page_has_aligned(page))) {
+ const mi_block_t* block = (const mi_block_t*)p;
+ return mi_page_usable_size_of(page, block);
}
else {
- return mi_page_usable_size_of(page, block);
+ // split out to separate routine for improved code generation
+ return mi_page_usable_aligned_size_of(segment, page, p);
}
}
@@ -695,40 +700,49 @@ mi_decl_restrict void* mi_mallocn(size_t count, size_t size) mi_attr_noexcept {
return mi_heap_mallocn(mi_get_default_heap(),count,size);
}
-// Expand in place or fail
+// Expand (or shrink) in place (or fail)
void* mi_expand(void* p, size_t newsize) mi_attr_noexcept {
+ #if MI_PADDING
+ // we do not shrink/expand with padding enabled
+ MI_UNUSED(p); MI_UNUSED(newsize);
+ return NULL;
+ #else
if (p == NULL) return NULL;
- size_t size = _mi_usable_size(p,"mi_expand");
+ const size_t size = _mi_usable_size(p,"mi_expand");
if (newsize > size) return NULL;
return p; // it fits
+ #endif
}
-void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) {
- if (p == NULL) return _mi_heap_malloc_zero(heap,newsize,zero);
- size_t size = _mi_usable_size(p,"mi_realloc");
- if (newsize <= size && newsize >= (size / 2)) {
+void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) mi_attr_noexcept {
+ const size_t size = _mi_usable_size(p,"mi_realloc"); // also works if p == NULL
+ if (mi_unlikely(newsize <= size && newsize >= (size / 2))) {
+ // todo: adjust potential padding to reflect the new size?
return p; // reallocation still fits and not more than 50% waste
}
void* newp = mi_heap_malloc(heap,newsize);
if (mi_likely(newp != NULL)) {
if (zero && newsize > size) {
// also set last word in the previous allocation to zero to ensure any padding is zero-initialized
- size_t start = (size >= sizeof(intptr_t) ? size - sizeof(intptr_t) : 0);
+ const size_t start = (size >= sizeof(intptr_t) ? size - sizeof(intptr_t) : 0);
memset((uint8_t*)newp + start, 0, newsize - start);
}
- if (mi_likely((uintptr_t)p % MI_INTPTR_SIZE == 0)) {
- _mi_memcpy_aligned(newp, p, (newsize > size ? size : newsize));
+ if (p != NULL) {
+ const size_t copysize = (newsize > size ? size : newsize);
+ if (mi_likely(((uintptr_t)p % MI_INTPTR_SIZE) == 0)) {
+ _mi_memcpy_aligned(newp, p, copysize);
+ }
+ else {
+ _mi_memcpy(newp, p, copysize);
+ }
+ mi_free(p); // only free the original pointer if successful
}
- else {
- _mi_memcpy(newp, p, (newsize > size ? size : newsize));
- }
- mi_free(p); // only free if successful
}
return newp;
}
void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept {
- return _mi_heap_realloc_zero(heap, p, newsize, false);
+ return _mi_heap_realloc_zero(heap, p, newsize, false);
}
void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept {
diff --git a/src/heap.c b/src/heap.c
index 4fdfb0b9..816d961a 100644
--- a/src/heap.c
+++ b/src/heap.c
@@ -481,13 +481,14 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v
if (page->used == 0) return true;
const size_t bsize = mi_page_block_size(page);
+ const size_t ubsize = mi_page_usable_block_size(page); // without padding
size_t psize;
uint8_t* pstart = _mi_page_start(_mi_page_segment(page), page, &psize);
if (page->capacity == 1) {
// optimize page with one block
mi_assert_internal(page->used == 1 && page->free == NULL);
- return visitor(mi_page_heap(page), area, pstart, bsize, arg);
+ return visitor(mi_page_heap(page), area, pstart, ubsize, arg);
}
// create a bitmap of free blocks.
@@ -521,7 +522,7 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v
else if ((m & ((uintptr_t)1 << bit)) == 0) {
used_count++;
uint8_t* block = pstart + (i * bsize);
- if (!visitor(mi_page_heap(page), area, block, bsize, arg)) return false;
+ if (!visitor(mi_page_heap(page), area, block, ubsize, arg)) return false;
}
}
mi_assert_internal(page->used == used_count);
@@ -537,12 +538,14 @@ static bool mi_heap_visit_areas_page(mi_heap_t* heap, mi_page_queue_t* pq, mi_pa
mi_heap_area_visit_fun* fun = (mi_heap_area_visit_fun*)vfun;
mi_heap_area_ex_t xarea;
const size_t bsize = mi_page_block_size(page);
+ const size_t ubsize = mi_page_usable_block_size(page);
xarea.page = page;
xarea.area.reserved = page->reserved * bsize;
xarea.area.committed = page->capacity * bsize;
xarea.area.blocks = _mi_page_start(_mi_page_segment(page), page, NULL);
- xarea.area.used = page->used;
- xarea.area.block_size = bsize;
+ xarea.area.used = page->used * bsize;
+ xarea.area.block_size = ubsize;
+ xarea.area.full_block_size = bsize;
return fun(heap, &xarea, arg);
}
diff --git a/src/init.c b/src/init.c
index bf3f6d3e..685a6a84 100644
--- a/src/init.c
+++ b/src/init.c
@@ -25,8 +25,8 @@ const mi_page_t _mi_page_empty = {
0, // used
0, // xblock_size
NULL, // local_free
- ATOMIC_VAR_INIT(0), // xthread_free
- ATOMIC_VAR_INIT(0), // xheap
+ MI_ATOMIC_VAR_INIT(0), // xthread_free
+ MI_ATOMIC_VAR_INIT(0), // xheap
NULL, NULL
#if MI_INTPTR_SIZE==8
, { 0 } // padding
@@ -99,7 +99,7 @@ mi_decl_cache_align const mi_heap_t _mi_heap_empty = {
NULL,
MI_SMALL_PAGES_EMPTY,
MI_PAGE_QUEUES_EMPTY,
- ATOMIC_VAR_INIT(NULL),
+ MI_ATOMIC_VAR_INIT(NULL),
0, // tid
0, // cookie
{ 0, 0 }, // keys
@@ -117,7 +117,7 @@ mi_decl_cache_align static const mi_tld_t tld_empty = {
0,
false,
NULL, NULL,
- { MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, 0, 0, NULL, tld_empty_stats, tld_empty_os }, // segments
+ { MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, tld_empty_stats, tld_empty_os }, // segments
{ 0, tld_empty_stats }, // os
{ MI_STATS_NULL } // stats
};
@@ -130,7 +130,7 @@ extern mi_heap_t _mi_heap_main;
static mi_tld_t tld_main = {
0, false,
&_mi_heap_main, & _mi_heap_main,
- { MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, 0, 0, NULL, &tld_main.stats, &tld_main.os }, // segments
+ { MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, &tld_main.stats, &tld_main.os }, // segments
{ 0, &tld_main.stats }, // os
{ MI_STATS_NULL } // stats
};
@@ -139,7 +139,7 @@ mi_heap_t _mi_heap_main = {
&tld_main,
MI_SMALL_PAGES_EMPTY,
MI_PAGE_QUEUES_EMPTY,
- ATOMIC_VAR_INIT(NULL),
+ MI_ATOMIC_VAR_INIT(NULL),
0, // thread id
0, // initial cookie
{ 0, 0 }, // the key of the main heap can be fixed (unlike page keys that need to be secure!)
@@ -181,6 +181,68 @@ typedef struct mi_thread_data_s {
mi_tld_t tld;
} mi_thread_data_t;
+
+// Thread meta-data is allocated directly from the OS. For
+// some programs that do not use thread pools and allocate and
+// destroy many OS threads, this may causes too much overhead
+// per thread so we maintain a small cache of recently freed metadata.
+
+#define TD_CACHE_SIZE (8)
+static _Atomic(mi_thread_data_t*) td_cache[TD_CACHE_SIZE];
+
+static mi_thread_data_t* mi_thread_data_alloc(void) {
+ // try to find thread metadata in the cache
+ mi_thread_data_t* td;
+ for (int i = 0; i < TD_CACHE_SIZE; i++) {
+ td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]);
+ if (td != NULL) {
+ td = mi_atomic_exchange_ptr_acq_rel(mi_thread_data_t, &td_cache[i], NULL);
+ if (td != NULL) {
+ return td;
+ }
+ }
+ }
+ // if that fails, allocate directly from the OS
+ td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &_mi_stats_main);
+ if (td == NULL) {
+ // if this fails, try once more. (issue #257)
+ td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &_mi_stats_main);
+ if (td == NULL) {
+ // really out of memory
+ _mi_error_message(ENOMEM, "unable to allocate thread local heap metadata (%zu bytes)\n", sizeof(mi_thread_data_t));
+ }
+ }
+ return td;
+}
+
+static void mi_thread_data_free( mi_thread_data_t* tdfree ) {
+ // try to add the thread metadata to the cache
+ for (int i = 0; i < TD_CACHE_SIZE; i++) {
+ mi_thread_data_t* td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]);
+ if (td == NULL) {
+ mi_thread_data_t* expected = NULL;
+ if (mi_atomic_cas_ptr_weak_acq_rel(mi_thread_data_t, &td_cache[i], &expected, tdfree)) {
+ return;
+ }
+ }
+ }
+ // if that fails, just free it directly
+ _mi_os_free(tdfree, sizeof(mi_thread_data_t), &_mi_stats_main);
+}
+
+static void mi_thread_data_collect(void) {
+ // free all thread metadata from the cache
+ for (int i = 0; i < TD_CACHE_SIZE; i++) {
+ mi_thread_data_t* td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]);
+ if (td != NULL) {
+ td = mi_atomic_exchange_ptr_acq_rel(mi_thread_data_t, &td_cache[i], NULL);
+ if (td != NULL) {
+ _mi_os_free( td, sizeof(mi_thread_data_t), &_mi_stats_main );
+ }
+ }
+ }
+}
+
// Initialize the thread local default heap, called from `mi_thread_init`
static bool _mi_heap_init(void) {
if (mi_heap_is_initialized(mi_get_default_heap())) return true;
@@ -193,16 +255,9 @@ static bool _mi_heap_init(void) {
}
else {
// use `_mi_os_alloc` to allocate directly from the OS
- mi_thread_data_t* td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &_mi_stats_main); // Todo: more efficient allocation?
- if (td == NULL) {
- // if this fails, try once more. (issue #257)
- td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &_mi_stats_main);
- if (td == NULL) {
- // really out of memory
- _mi_error_message(ENOMEM, "unable to allocate thread local heap metadata (%zu bytes)\n", sizeof(mi_thread_data_t));
- return false;
- }
- }
+ mi_thread_data_t* td = mi_thread_data_alloc();
+ if (td == NULL) return false;
+
// OS allocated so already zero initialized
mi_tld_t* tld = &td->tld;
mi_heap_t* heap = &td->heap;
@@ -262,16 +317,17 @@ static bool _mi_heap_done(mi_heap_t* heap) {
// as abondened: one may allocate it in one thread, but deallocate in another in which case
// the count can be too large or negative. todo: perhaps not count huge segments? see issue #363
// mi_assert_internal(heap->tld->segments.count == 0 || heap->thread_id != _mi_thread_id());
- _mi_os_free(heap, sizeof(mi_thread_data_t), &_mi_stats_main);
+ mi_thread_data_free((mi_thread_data_t*)heap);
}
-#if 0
- // never free the main thread even in debug mode; if a dll is linked statically with mimalloc,
- // there may still be delete/free calls after the mi_fls_done is called. Issue #207
else {
+ mi_thread_data_collect(); // free cached thread metadata
+ #if 0
+ // never free the main thread even in debug mode; if a dll is linked statically with mimalloc,
+ // there may still be delete/free calls after the mi_fls_done is called. Issue #207
_mi_heap_destroy_pages(heap);
mi_assert_internal(heap->tld->heap_backing == &_mi_heap_main);
+ #endif
}
-#endif
return false;
}
@@ -345,7 +401,7 @@ bool _mi_is_main_thread(void) {
return (_mi_heap_main.thread_id==0 || _mi_heap_main.thread_id == _mi_thread_id());
}
-static _Atomic(size_t) thread_count = ATOMIC_VAR_INIT(1);
+static _Atomic(size_t) thread_count = MI_ATOMIC_VAR_INIT(1);
size_t _mi_current_thread_count(void) {
return mi_atomic_load_relaxed(&thread_count);
@@ -422,7 +478,7 @@ bool _mi_preloading(void) {
return os_preloading;
}
-bool mi_is_redirected(void) mi_attr_noexcept {
+mi_decl_nodiscard bool mi_is_redirected(void) mi_attr_noexcept {
return mi_redirected;
}
diff --git a/src/options.c b/src/options.c
index d7f4c29f..f0111b63 100644
--- a/src/options.c
+++ b/src/options.c
@@ -74,7 +74,7 @@ static mi_option_desc_t options[_mi_option_last] =
{ 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, // per 1GiB huge pages
{ -1, UNINIT, MI_OPTION(reserve_huge_os_pages_at) }, // reserve huge pages at node N
{ 0, UNINIT, MI_OPTION(reserve_os_memory) },
- { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread
+ { 0, UNINIT, MI_OPTION(deprecated_segment_cache) }, // cache N segments per thread
{ 0, UNINIT, MI_OPTION(page_reset) }, // reset page memory on free
{ 0, UNINIT, MI_OPTION_LEGACY(abandoned_page_decommit, abandoned_page_reset) },// decommit free page memory when a thread terminates
{ 0, UNINIT, MI_OPTION(deprecated_segment_reset) },
@@ -116,6 +116,7 @@ void _mi_options_init(void) {
mi_decl_nodiscard long mi_option_get(mi_option_t option) {
mi_assert(option >= 0 && option < _mi_option_last);
+ if (option < 0 || option >= _mi_option_last) return 0;
mi_option_desc_t* desc = &options[option];
mi_assert(desc->option == option); // index should match the option
if (mi_unlikely(desc->init == UNINIT)) {
@@ -126,6 +127,7 @@ mi_decl_nodiscard long mi_option_get(mi_option_t option) {
void mi_option_set(mi_option_t option, long value) {
mi_assert(option >= 0 && option < _mi_option_last);
+ if (option < 0 || option >= _mi_option_last) return;
mi_option_desc_t* desc = &options[option];
mi_assert(desc->option == option); // index should match the option
desc->value = value;
@@ -134,6 +136,7 @@ void mi_option_set(mi_option_t option, long value) {
void mi_option_set_default(mi_option_t option, long value) {
mi_assert(option >= 0 && option < _mi_option_last);
+ if (option < 0 || option >= _mi_option_last) return;
mi_option_desc_t* desc = &options[option];
if (desc->init != INITIALIZED) {
desc->value = value;
diff --git a/src/os.c b/src/os.c
index 4a597632..9d3bbb1a 100644
--- a/src/os.c
+++ b/src/os.c
@@ -97,7 +97,7 @@ bool _mi_os_has_overcommit(void) {
}
// OS (small) page size
-size_t _mi_os_page_size() {
+size_t _mi_os_page_size(void) {
return os_page_size;
}
@@ -130,26 +130,47 @@ size_t _mi_os_good_alloc_size(size_t size) {
// We use VirtualAlloc2 for aligned allocation, but it is only supported on Windows 10 and Windows Server 2016.
// So, we need to look it up dynamically to run on older systems. (use __stdcall for 32-bit compatibility)
// NtAllocateVirtualAllocEx is used for huge OS page allocation (1GiB)
-//
-// We hide MEM_EXTENDED_PARAMETER to compile with older SDK's.
+// We define a minimal MEM_EXTENDED_PARAMETER ourselves in order to be able to compile with older SDK's.
+typedef enum MI_MEM_EXTENDED_PARAMETER_TYPE_E {
+ MiMemExtendedParameterInvalidType = 0,
+ MiMemExtendedParameterAddressRequirements,
+ MiMemExtendedParameterNumaNode,
+ MiMemExtendedParameterPartitionHandle,
+ MiMemExtendedParameterUserPhysicalHandle,
+ MiMemExtendedParameterAttributeFlags,
+ MiMemExtendedParameterMax
+} MI_MEM_EXTENDED_PARAMETER_TYPE;
+
+typedef struct DECLSPEC_ALIGN(8) MI_MEM_EXTENDED_PARAMETER_S {
+ struct { DWORD64 Type : 8; DWORD64 Reserved : 56; } Type;
+ union { DWORD64 ULong64; PVOID Pointer; SIZE_T Size; HANDLE Handle; DWORD ULong; } Arg;
+} MI_MEM_EXTENDED_PARAMETER;
+
+typedef struct MI_MEM_ADDRESS_REQUIREMENTS_S {
+ PVOID LowestStartingAddress;
+ PVOID HighestEndingAddress;
+ SIZE_T Alignment;
+} MI_MEM_ADDRESS_REQUIREMENTS;
+
+#define MI_MEM_EXTENDED_PARAMETER_NONPAGED_HUGE 0x00000010
+
#include
-typedef PVOID (__stdcall *PVirtualAlloc2)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, /* MEM_EXTENDED_PARAMETER* */ void*, ULONG);
-typedef NTSTATUS (__stdcall *PNtAllocateVirtualMemoryEx)(HANDLE, PVOID*, SIZE_T*, ULONG, ULONG, /* MEM_EXTENDED_PARAMETER* */ PVOID, ULONG);
+typedef PVOID (__stdcall *PVirtualAlloc2)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, MI_MEM_EXTENDED_PARAMETER*, ULONG);
+typedef NTSTATUS (__stdcall *PNtAllocateVirtualMemoryEx)(HANDLE, PVOID*, SIZE_T*, ULONG, ULONG, MI_MEM_EXTENDED_PARAMETER*, ULONG);
static PVirtualAlloc2 pVirtualAlloc2 = NULL;
static PNtAllocateVirtualMemoryEx pNtAllocateVirtualMemoryEx = NULL;
// Similarly, GetNumaProcesorNodeEx is only supported since Windows 7
-#if (_WIN32_WINNT < 0x601) // before Win7
-typedef struct _PROCESSOR_NUMBER { WORD Group; BYTE Number; BYTE Reserved; } PROCESSOR_NUMBER, *PPROCESSOR_NUMBER;
-#endif
-typedef VOID (__stdcall *PGetCurrentProcessorNumberEx)(PPROCESSOR_NUMBER ProcNumber);
-typedef BOOL (__stdcall *PGetNumaProcessorNodeEx)(PPROCESSOR_NUMBER Processor, PUSHORT NodeNumber);
+typedef struct MI_PROCESSOR_NUMBER_S { WORD Group; BYTE Number; BYTE Reserved; } MI_PROCESSOR_NUMBER;
+
+typedef VOID (__stdcall *PGetCurrentProcessorNumberEx)(MI_PROCESSOR_NUMBER* ProcNumber);
+typedef BOOL (__stdcall *PGetNumaProcessorNodeEx)(MI_PROCESSOR_NUMBER* Processor, PUSHORT NodeNumber);
typedef BOOL (__stdcall* PGetNumaNodeProcessorMaskEx)(USHORT Node, PGROUP_AFFINITY ProcessorMask);
static PGetCurrentProcessorNumberEx pGetCurrentProcessorNumberEx = NULL;
static PGetNumaProcessorNodeEx pGetNumaProcessorNodeEx = NULL;
static PGetNumaNodeProcessorMaskEx pGetNumaNodeProcessorMaskEx = NULL;
-static bool mi_win_enable_large_os_pages()
+static bool mi_win_enable_large_os_pages(void)
{
if (large_os_page_size > 0) return true;
@@ -220,7 +241,7 @@ void _mi_os_init(void)
}
}
#elif defined(__wasi__)
-void _mi_os_init() {
+void _mi_os_init(void) {
os_overcommit = false;
os_page_size = 64*MI_KiB; // WebAssembly has a fixed page size: 64KiB
os_alloc_granularity = 16;
@@ -251,7 +272,7 @@ static void os_detect_overcommit(void) {
#endif
}
-void _mi_os_init() {
+void _mi_os_init(void) {
// get the page size
long result = sysconf(_SC_PAGESIZE);
if (result > 0) {
@@ -276,7 +297,57 @@ static int mi_madvise(void* addr, size_t length, int advice) {
/* -----------------------------------------------------------
- free memory
+ aligned hinting
+-------------------------------------------------------------- */
+
+// On 64-bit systems, we can do efficient aligned allocation by using
+// the 2TiB to 30TiB area to allocate those.
+#if (MI_INTPTR_SIZE >= 8)
+static mi_decl_cache_align _Atomic(uintptr_t)aligned_base;
+
+// Return a MI_SEGMENT_SIZE aligned address that is probably available.
+// If this returns NULL, the OS will determine the address but on some OS's that may not be
+// properly aligned which can be more costly as it needs to be adjusted afterwards.
+// For a size > 1GiB this always returns NULL in order to guarantee good ASLR randomization;
+// (otherwise an initial large allocation of say 2TiB has a 50% chance to include (known) addresses
+// in the middle of the 2TiB - 6TiB address range (see issue #372))
+
+#define MI_HINT_BASE ((uintptr_t)2 << 40) // 2TiB start
+#define MI_HINT_AREA ((uintptr_t)4 << 40) // upto 6TiB (since before win8 there is "only" 8TiB available to processes)
+#define MI_HINT_MAX ((uintptr_t)30 << 40) // wrap after 30TiB (area after 32TiB is used for huge OS pages)
+
+static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size)
+{
+ if (try_alignment <= 1 || try_alignment > MI_SEGMENT_SIZE) return NULL;
+ size = _mi_align_up(size, MI_SEGMENT_SIZE);
+ if (size > 1*MI_GiB) return NULL; // guarantee the chance of fixed valid address is at most 1/(MI_HINT_AREA / 1<<30) = 1/4096.
+ #if (MI_SECURE>0)
+ size += MI_SEGMENT_SIZE; // put in `MI_SEGMENT_SIZE` virtual gaps between hinted blocks; this splits VLA's but increases guarded areas.
+ #endif
+
+ uintptr_t hint = mi_atomic_add_acq_rel(&aligned_base, size);
+ if (hint == 0 || hint > MI_HINT_MAX) { // wrap or initialize
+ uintptr_t init = MI_HINT_BASE;
+ #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of aligned allocations unless in debug mode
+ uintptr_t r = _mi_heap_random_next(mi_get_default_heap());
+ init = init + ((MI_SEGMENT_SIZE * ((r>>17) & 0xFFFFF)) % MI_HINT_AREA); // (randomly 20 bits)*4MiB == 0 to 4TiB
+ #endif
+ uintptr_t expected = hint + size;
+ mi_atomic_cas_strong_acq_rel(&aligned_base, &expected, init);
+ hint = mi_atomic_add_acq_rel(&aligned_base, size); // this may still give 0 or > MI_HINT_MAX but that is ok, it is a hint after all
+ }
+ if (hint%try_alignment != 0) return NULL;
+ return (void*)hint;
+}
+#else
+static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) {
+ MI_UNUSED(try_alignment); MI_UNUSED(size);
+ return NULL;
+}
+#endif
+
+/* -----------------------------------------------------------
+ Free memory
-------------------------------------------------------------- */
static bool mi_os_mem_free(void* addr, size_t size, bool was_committed, mi_stats_t* stats)
@@ -315,9 +386,6 @@ static bool mi_os_mem_free(void* addr, size_t size, bool was_committed, mi_stats
return !err;
}
-#if !(defined(__wasi__) || defined(MI_USE_SBRK) || defined(MAP_ALIGNED))
-static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size);
-#endif
/* -----------------------------------------------------------
Raw allocation on Windows (VirtualAlloc)
@@ -340,20 +408,18 @@ static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment
}
}
#endif
-#if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS)
// on modern Windows try use VirtualAlloc2 for aligned allocation
if (try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0 && pVirtualAlloc2 != NULL) {
- MEM_ADDRESS_REQUIREMENTS reqs = { 0, 0, 0 };
+ MI_MEM_ADDRESS_REQUIREMENTS reqs = { 0, 0, 0 };
reqs.Alignment = try_alignment;
- MEM_EXTENDED_PARAMETER param = { {0, 0}, {0} };
- param.Type = MemExtendedParameterAddressRequirements;
- param.Pointer = &reqs;
+ MI_MEM_EXTENDED_PARAMETER param = { {0, 0}, {0} };
+ param.Type.Type = MiMemExtendedParameterAddressRequirements;
+ param.Arg.Pointer = &reqs;
void* p = (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, ¶m, 1);
if (p != NULL) return p;
_mi_warning_message("unable to allocate aligned OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x)\n", size, GetLastError(), addr, try_alignment, flags);
// fall through on error
}
-#endif
// last resort
return VirtualAlloc(addr, size, flags, PAGE_READWRITE);
}
@@ -630,53 +696,6 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
}
#endif
-// On 64-bit systems, we can do efficient aligned allocation by using
-// the 2TiB to 30TiB area to allocate them.
-#if (MI_INTPTR_SIZE >= 8) && (defined(_WIN32) || defined(MI_OS_USE_MMAP))
-static mi_decl_cache_align _Atomic(uintptr_t) aligned_base;
-
-// Return a 4MiB aligned address that is probably available.
-// If this returns NULL, the OS will determine the address but on some OS's that may not be
-// properly aligned which can be more costly as it needs to be adjusted afterwards.
-// For a size > 1GiB this always returns NULL in order to guarantee good ASLR randomization;
-// (otherwise an initial large allocation of say 2TiB has a 50% chance to include (known) addresses
-// in the middle of the 2TiB - 6TiB address range (see issue #372))
-
-#define MI_HINT_BASE ((uintptr_t)2 << 40) // 2TiB start
-#define MI_HINT_AREA ((uintptr_t)4 << 40) // upto 6TiB (since before win8 there is "only" 8TiB available to processes)
-#define MI_HINT_MAX ((uintptr_t)30 << 40) // wrap after 30TiB (area after 32TiB is used for huge OS pages)
-
-static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size)
-{
- if (try_alignment <= 1 || try_alignment > MI_SEGMENT_SIZE) return NULL;
- size = _mi_align_up(size, MI_SEGMENT_SIZE);
- if (size > 1*MI_GiB) return NULL; // guarantee the chance of fixed valid address is at most 1/(MI_HINT_AREA / 1<<30) = 1/4096.
- #if (MI_SECURE>0)
- size += MI_SEGMENT_SIZE; // put in `MI_SEGMENT_SIZE` virtual gaps between hinted blocks; this splits VLA's but increases guarded areas.
- #endif
-
- uintptr_t hint = mi_atomic_add_acq_rel(&aligned_base, size);
- if (hint == 0 || hint > MI_HINT_MAX) { // wrap or initialize
- uintptr_t init = MI_HINT_BASE;
- #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of aligned allocations unless in debug mode
- uintptr_t r = _mi_heap_random_next(mi_get_default_heap());
- init = init + ((MI_SEGMENT_SIZE * ((r>>17) & 0xFFFFF)) % MI_HINT_AREA); // (randomly 20 bits)*4MiB == 0 to 4TiB
- #endif
- uintptr_t expected = hint + size;
- mi_atomic_cas_strong_acq_rel(&aligned_base, &expected, init);
- hint = mi_atomic_add_acq_rel(&aligned_base, size); // this may still give 0 or > MI_HINT_MAX but that is ok, it is a hint after all
- }
- if (hint%try_alignment != 0) return NULL;
- return (void*)hint;
-}
-#elif defined(__wasi__) || defined(MI_USE_SBRK) || defined(MAP_ALIGNED)
-// no need for mi_os_get_aligned_hint
-#else
-static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) {
- MI_UNUSED(try_alignment); MI_UNUSED(size);
- return NULL;
-}
-#endif
/* -----------------------------------------------------------
Primitive allocation from the OS.
@@ -777,6 +796,7 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit,
return p;
}
+
/* -----------------------------------------------------------
OS API: alloc, free, alloc_aligned
----------------------------------------------------------- */
@@ -804,6 +824,7 @@ void _mi_os_free(void* p, size_t size, mi_stats_t* stats) {
void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_stats_t* tld_stats)
{
+ MI_UNUSED(&mi_os_get_aligned_hint); // suppress unused warnings
MI_UNUSED(tld_stats);
if (size == 0) return NULL;
size = _mi_os_good_alloc_size(size);
@@ -983,7 +1004,7 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats)
if (p != start) return false;
#else
#if defined(MADV_FREE)
- static _Atomic(size_t) advice = ATOMIC_VAR_INIT(MADV_FREE);
+ static _Atomic(size_t) advice = MI_ATOMIC_VAR_INIT(MADV_FREE);
int oadvice = (int)mi_atomic_load_relaxed(&advice);
int err;
while ((err = mi_madvise(start, csize, oadvice)) != 0 && errno == EAGAIN) { errno = 0; };
@@ -1105,21 +1126,17 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node)
mi_win_enable_large_os_pages();
- #if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS)
- MEM_EXTENDED_PARAMETER params[3] = { {{0,0},{0}},{{0,0},{0}},{{0,0},{0}} };
+ MI_MEM_EXTENDED_PARAMETER params[3] = { {{0,0},{0}},{{0,0},{0}},{{0,0},{0}} };
// on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages
static bool mi_huge_pages_available = true;
if (pNtAllocateVirtualMemoryEx != NULL && mi_huge_pages_available) {
- #ifndef MEM_EXTENDED_PARAMETER_NONPAGED_HUGE
- #define MEM_EXTENDED_PARAMETER_NONPAGED_HUGE (0x10)
- #endif
- params[0].Type = 5; // == MemExtendedParameterAttributeFlags;
- params[0].ULong64 = MEM_EXTENDED_PARAMETER_NONPAGED_HUGE;
+ params[0].Type.Type = MiMemExtendedParameterAttributeFlags;
+ params[0].Arg.ULong64 = MI_MEM_EXTENDED_PARAMETER_NONPAGED_HUGE;
ULONG param_count = 1;
if (numa_node >= 0) {
param_count++;
- params[1].Type = MemExtendedParameterNumaNode;
- params[1].ULong = (unsigned)numa_node;
+ params[1].Type.Type = MiMemExtendedParameterNumaNode;
+ params[1].Arg.ULong = (unsigned)numa_node;
}
SIZE_T psize = size;
void* base = addr;
@@ -1135,13 +1152,11 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node)
}
// on modern Windows try use VirtualAlloc2 for numa aware large OS page allocation
if (pVirtualAlloc2 != NULL && numa_node >= 0) {
- params[0].Type = MemExtendedParameterNumaNode;
- params[0].ULong = (unsigned)numa_node;
+ params[0].Type.Type = MiMemExtendedParameterNumaNode;
+ params[0].Arg.ULong = (unsigned)numa_node;
return (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, params, 1);
}
- #else
- MI_UNUSED(numa_node);
- #endif
+
// otherwise use regular virtual alloc on older windows
return VirtualAlloc(addr, size, flags, PAGE_READWRITE);
}
@@ -1291,11 +1306,11 @@ void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats) {
Support NUMA aware allocation
-----------------------------------------------------------------------------*/
#ifdef _WIN32
-static size_t mi_os_numa_nodex() {
+static size_t mi_os_numa_nodex(void) {
USHORT numa_node = 0;
if (pGetCurrentProcessorNumberEx != NULL && pGetNumaProcessorNodeEx != NULL) {
// Extended API is supported
- PROCESSOR_NUMBER pnum;
+ MI_PROCESSOR_NUMBER pnum;
(*pGetCurrentProcessorNumberEx)(&pnum);
USHORT nnode = 0;
BOOL ok = (*pGetNumaProcessorNodeEx)(&pnum, &nnode);
diff --git a/src/random.c b/src/random.c
index 5057a623..d474a53a 100644
--- a/src/random.c
+++ b/src/random.c
@@ -168,16 +168,10 @@ If we cannot get good randomness, we fall back to weak randomness based on a tim
#if defined(_WIN32)
-#if !defined(MI_USE_RTLGENRANDOM)
-// We prefer to use BCryptGenRandom instead of RtlGenRandom but it can lead to a deadlock
-// under the VS debugger when using dynamic overriding.
-#pragma comment (lib,"bcrypt.lib")
-#include
-static bool os_random_buf(void* buf, size_t buf_len) {
- return (BCryptGenRandom(NULL, (PUCHAR)buf, (ULONG)buf_len, BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0);
-}
-#else
-// Use (unofficial) RtlGenRandom
+#if defined(MI_USE_RTLGENRANDOM) || defined(__cplusplus)
+// We prefer to use BCryptGenRandom instead of (the unofficial) RtlGenRandom but when using
+// dynamic overriding, we observed it can raise an exception when compiled with C++, and
+// sometimes deadlocks when also running under the VS debugger.
#pragma comment (lib,"advapi32.lib")
#define RtlGenRandom SystemFunction036
#ifdef __cplusplus
@@ -190,6 +184,12 @@ BOOLEAN NTAPI RtlGenRandom(PVOID RandomBuffer, ULONG RandomBufferLength);
static bool os_random_buf(void* buf, size_t buf_len) {
return (RtlGenRandom(buf, (ULONG)buf_len) != 0);
}
+#else
+#pragma comment (lib,"bcrypt.lib")
+#include
+static bool os_random_buf(void* buf, size_t buf_len) {
+ return (BCryptGenRandom(NULL, (PUCHAR)buf, (ULONG)buf_len, BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0);
+}
#endif
#elif defined(__APPLE__)
diff --git a/src/segment-cache.c b/src/segment-cache.c
index 93908c8f..aacdbc11 100644
--- a/src/segment-cache.c
+++ b/src/segment-cache.c
@@ -21,7 +21,7 @@ terms of the MIT license. A copy of the license can be found in the file
#define MI_CACHE_FIELDS (16)
#define MI_CACHE_MAX (MI_BITMAP_FIELD_BITS*MI_CACHE_FIELDS) // 1024 on 64-bit
-#define BITS_SET() ATOMIC_VAR_INIT(UINTPTR_MAX)
+#define BITS_SET() MI_ATOMIC_VAR_INIT(UINTPTR_MAX)
#define MI_CACHE_BITS_SET MI_INIT16(BITS_SET) // note: update if MI_CACHE_FIELDS changes
typedef struct mi_cache_slot_s {
diff --git a/src/segment.c b/src/segment.c
index 8d3eebe5..470632c5 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -394,69 +394,13 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) {
}
}
-
-// The thread local segment cache is limited to be at most 1/8 of the peak size of segments in use,
-#define MI_SEGMENT_CACHE_FRACTION (8)
-
-// note: returned segment may be partially reset
-static mi_segment_t* mi_segment_cache_pop(size_t segment_slices, mi_segments_tld_t* tld) {
- if (segment_slices != 0 && segment_slices != MI_SLICES_PER_SEGMENT) return NULL;
- mi_segment_t* segment = tld->cache;
- if (segment == NULL) return NULL;
- tld->cache_count--;
- tld->cache = segment->next;
- segment->next = NULL;
- mi_assert_internal(segment->segment_slices == MI_SLICES_PER_SEGMENT);
- _mi_stat_decrease(&tld->stats->segments_cache, 1);
- return segment;
-}
-
-static bool mi_segment_cache_full(mi_segments_tld_t* tld)
-{
- // if (tld->count == 1 && tld->cache_count==0) return false; // always cache at least the final segment of a thread
- size_t max_cache = mi_option_get(mi_option_segment_cache);
- if (tld->cache_count < max_cache
- && tld->cache_count < (1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION)) // at least allow a 1 element cache
- ) {
- return false;
- }
- // take the opportunity to reduce the segment cache if it is too large (now)
- // TODO: this never happens as we check against peak usage, should we use current usage instead?
- while (tld->cache_count > max_cache) { //(1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION))) {
- mi_segment_t* segment = mi_segment_cache_pop(0,tld);
- mi_assert_internal(segment != NULL);
- if (segment != NULL) mi_segment_os_free(segment, tld);
- }
- return true;
-}
-
-static bool mi_segment_cache_push(mi_segment_t* segment, mi_segments_tld_t* tld) {
- mi_assert_internal(segment->next == NULL);
- if (segment->segment_slices != MI_SLICES_PER_SEGMENT || mi_segment_cache_full(tld)) {
- return false;
- }
- // mi_segment_delayed_decommit(segment, true, tld->stats);
- mi_assert_internal(segment->segment_slices == MI_SLICES_PER_SEGMENT);
- mi_assert_internal(segment->next == NULL);
- segment->next = tld->cache;
- tld->cache = segment;
- tld->cache_count++;
- _mi_stat_increase(&tld->stats->segments_cache,1);
- return true;
-}
-
-// called by threads that are terminating to free cached segments
+// called by threads that are terminating
void _mi_segment_thread_collect(mi_segments_tld_t* tld) {
- mi_segment_t* segment;
- while ((segment = mi_segment_cache_pop(0,tld)) != NULL) {
- mi_segment_os_free(segment, tld);
- }
- mi_assert_internal(tld->cache_count == 0);
- mi_assert_internal(tld->cache == NULL);
+ MI_UNUSED(tld);
+ // nothing to do
}
-
/* -----------------------------------------------------------
Span management
----------------------------------------------------------- */
@@ -927,7 +871,7 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_
size_t guard_slices = 0;
if (MI_SECURE>0) {
// in secure mode, we set up a protected page in between the segment info
- // and the page data
+ // and the page data, and at the end of the segment.
size_t os_pagesize = _mi_os_page_size();
mi_assert_internal(mi_segment_info_size(segment) - os_pagesize >= pre_size);
_mi_os_protect((uint8_t*)segment + mi_segment_info_size(segment) - os_pagesize, os_pagesize);
@@ -969,6 +913,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, m
static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t* tld) {
+ MI_UNUSED(force);
mi_assert_internal(segment != NULL);
mi_assert_internal(segment->next == NULL);
mi_assert_internal(segment->used == 0);
@@ -992,13 +937,8 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t
// stats
_mi_stat_decrease(&tld->stats->page_committed, mi_segment_info_size(segment));
- if (!force && mi_segment_cache_push(segment, tld)) {
- // it is put in our cache
- }
- else {
- // otherwise return it to the OS
- mi_segment_os_free(segment, tld);
- }
+ // return it to the OS
+ mi_segment_os_free(segment, tld);
}
@@ -1488,15 +1428,10 @@ static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t needed_
{
mi_assert_internal(block_size < MI_HUGE_BLOCK_SIZE);
mi_assert_internal(block_size <= MI_LARGE_OBJ_SIZE_MAX);
- // 1. try to get a segment from our cache
- mi_segment_t* segment = mi_segment_cache_pop(MI_SEGMENT_SIZE, tld);
- if (segment != NULL) {
- mi_segment_init(segment, 0, tld, os_tld, NULL);
- return segment;
- }
- // 2. try to reclaim an abandoned segment
+
+ // 1. try to reclaim an abandoned segment
bool reclaimed;
- segment = mi_segment_try_reclaim(heap, needed_slices, block_size, &reclaimed, tld);
+ mi_segment_t* segment = mi_segment_try_reclaim(heap, needed_slices, block_size, &reclaimed, tld);
if (reclaimed) {
// reclaimed the right page right into the heap
mi_assert_internal(segment != NULL);
@@ -1506,7 +1441,7 @@ static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t needed_
// reclaimed a segment with a large enough empty span in it
return segment;
}
- // 3. otherwise allocate a fresh segment
+ // 2. otherwise allocate a fresh segment
return mi_segment_alloc(0, tld, os_tld, NULL);
}
diff --git a/test/main-override-static.c b/test/main-override-static.c
index 393199e1..57dc7882 100644
--- a/test/main-override-static.c
+++ b/test/main-override-static.c
@@ -21,6 +21,7 @@ static void test_process_info(void);
static void test_reserved(void);
static void negative_stat(void);
static void alloc_huge(void);
+static void test_heap_walk(void);
int main() {
mi_version();
@@ -37,7 +38,8 @@ int main() {
// invalid_free();
// test_reserved();
// negative_stat();
- alloc_huge();
+ // alloc_huge();
+ test_heap_walk();
void* p1 = malloc(78);
void* p2 = malloc(24);
@@ -57,8 +59,10 @@ int main() {
//free(p1);
//p2 = malloc(32);
//mi_free(p2);
- mi_collect(true);
- mi_stats_print(NULL);
+
+ //mi_collect(true);
+ //mi_stats_print(NULL);
+
// test_process_info();
return 0;
}
@@ -233,6 +237,24 @@ static void alloc_huge(void) {
mi_free(p);
}
+static bool test_visit(const mi_heap_t* heap, const mi_heap_area_t* area, void* block, size_t block_size, void* arg) {
+ if (block == NULL) {
+ printf("visiting an area with blocks of size %zu (including padding)\n", area->full_block_size);
+ }
+ else {
+ printf(" block of size %zu (allocated size is %zu)\n", block_size, mi_usable_size(block));
+ }
+ return true;
+}
+
+static void test_heap_walk(void) {
+ mi_heap_t* heap = mi_heap_new();
+ //mi_heap_malloc(heap, 2097152);
+ mi_heap_malloc(heap, 2067152);
+ mi_heap_malloc(heap, 2097160);
+ mi_heap_malloc(heap, 24576);
+ mi_heap_visit_blocks(heap, true, &test_visit, NULL);
+}
// ----------------------------
// bin size experiments
@@ -404,4 +426,4 @@ static void mi_bins(void) {
last_bsize = bsize;
}
}
-#endif
\ No newline at end of file
+#endif