mirror of
https://github.com/microsoft/mimalloc.git
synced 2025-07-06 19:38:41 +03:00
merge from dev
This commit is contained in:
commit
92ffc25d79
65 changed files with 793 additions and 741 deletions
|
@ -11,9 +11,9 @@ terms of the MIT license. A copy of the license can be found in the file
|
|||
// --------------------------------------------------------------------------------------------
|
||||
// Atomics
|
||||
// We need to be portable between C, C++, and MSVC.
|
||||
// We base the primitives on the C/C++ atomics and create a mimimal wrapper for MSVC in C compilation mode.
|
||||
// This is why we try to use only `uintptr_t` and `<type>*` as atomic types.
|
||||
// To gain better insight in the range of used atomics, we use explicitly named memory order operations
|
||||
// We base the primitives on the C/C++ atomics and create a mimimal wrapper for MSVC in C compilation mode.
|
||||
// This is why we try to use only `uintptr_t` and `<type>*` as atomic types.
|
||||
// To gain better insight in the range of used atomics, we use explicitly named memory order operations
|
||||
// instead of passing the memory order as a parameter.
|
||||
// -----------------------------------------------------------------------------------------------
|
||||
|
||||
|
@ -30,7 +30,7 @@ terms of the MIT license. A copy of the license can be found in the file
|
|||
#endif
|
||||
#elif defined(_MSC_VER)
|
||||
// Use MSVC C wrapper for C11 atomics
|
||||
#define _Atomic(tp) tp
|
||||
#define _Atomic(tp) tp
|
||||
#define MI_ATOMIC_VAR_INIT(x) x
|
||||
#define mi_atomic(name) mi_atomic_##name
|
||||
#define mi_memory_order(name) mi_memory_order_##name
|
||||
|
@ -275,7 +275,7 @@ static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub) {
|
|||
return (intptr_t)mi_atomic_addi(p, -sub);
|
||||
}
|
||||
|
||||
// Yield
|
||||
// Yield
|
||||
#if defined(__cplusplus)
|
||||
#include <thread>
|
||||
static inline void mi_atomic_yield(void) {
|
||||
|
|
|
@ -41,10 +41,10 @@ terms of the MIT license. A copy of the license can be found in the file
|
|||
#if defined(__cplusplus)
|
||||
#define mi_decl_externc extern "C"
|
||||
#else
|
||||
#define mi_decl_externc
|
||||
#define mi_decl_externc
|
||||
#endif
|
||||
|
||||
#if !defined(_WIN32) && !defined(__wasi__)
|
||||
#if !defined(_WIN32) && !defined(__wasi__)
|
||||
#define MI_USE_PTHREADS
|
||||
#include <pthread.h>
|
||||
#endif
|
||||
|
@ -360,14 +360,14 @@ mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing hea
|
|||
|
||||
#if defined(MI_MALLOC_OVERRIDE)
|
||||
#if defined(__APPLE__) // macOS
|
||||
#define MI_TLS_SLOT 89 // seems unused?
|
||||
// #define MI_TLS_RECURSE_GUARD 1
|
||||
#define MI_TLS_SLOT 89 // seems unused?
|
||||
// #define MI_TLS_RECURSE_GUARD 1
|
||||
// other possible unused ones are 9, 29, __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY4 (94), __PTK_FRAMEWORK_GC_KEY9 (112) and __PTK_FRAMEWORK_OLDGC_KEY9 (89)
|
||||
// see <https://github.com/rweichler/substrate/blob/master/include/pthread_machdep.h>
|
||||
#elif defined(__OpenBSD__)
|
||||
// use end bytes of a name; goes wrong if anyone uses names > 23 characters (ptrhread specifies 16)
|
||||
// use end bytes of a name; goes wrong if anyone uses names > 23 characters (ptrhread specifies 16)
|
||||
// see <https://github.com/openbsd/src/blob/master/lib/libc/include/thread_private.h#L371>
|
||||
#define MI_TLS_PTHREAD_SLOT_OFS (6*sizeof(int) + 4*sizeof(void*) + 24)
|
||||
#define MI_TLS_PTHREAD_SLOT_OFS (6*sizeof(int) + 4*sizeof(void*) + 24)
|
||||
// #elif defined(__DragonFly__)
|
||||
// #warning "mimalloc is not working correctly on DragonFly yet."
|
||||
// #define MI_TLS_PTHREAD_SLOT_OFS (4 + 1*sizeof(void*)) // offset `uniqueid` (also used by gdb?) <https://github.com/DragonFlyBSD/DragonFlyBSD/blob/master/lib/libthread_xu/thread/thr_private.h#L458>
|
||||
|
@ -407,7 +407,7 @@ static inline mi_heap_t* mi_get_default_heap(void) {
|
|||
#ifdef __GNUC__
|
||||
__asm(""); // prevent conditional load of the address of _mi_heap_empty
|
||||
#endif
|
||||
heap = (mi_heap_t*)&_mi_heap_empty;
|
||||
heap = (mi_heap_t*)&_mi_heap_empty;
|
||||
}
|
||||
return heap;
|
||||
#elif defined(MI_TLS_PTHREAD_SLOT_OFS)
|
||||
|
@ -417,7 +417,7 @@ static inline mi_heap_t* mi_get_default_heap(void) {
|
|||
mi_heap_t* heap = (mi_unlikely(_mi_heap_default_key == (pthread_key_t)(-1)) ? _mi_heap_main_get() : (mi_heap_t*)pthread_getspecific(_mi_heap_default_key));
|
||||
return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap);
|
||||
#else
|
||||
#if defined(MI_TLS_RECURSE_GUARD)
|
||||
#if defined(MI_TLS_RECURSE_GUARD)
|
||||
if (mi_unlikely(!_mi_process_is_initialized)) return _mi_heap_main_get();
|
||||
#endif
|
||||
return _mi_heap_default;
|
||||
|
@ -461,7 +461,7 @@ static inline mi_page_t* _mi_get_free_small_page(size_t size) {
|
|||
|
||||
// Segment that contains the pointer
|
||||
// Large aligned blocks may be aligned at N*MI_SEGMENT_SIZE (inside a huge segment > MI_SEGMENT_SIZE),
|
||||
// and we need align "down" to the segment info which is `MI_SEGMENT_SIZE` bytes before it;
|
||||
// and we need align "down" to the segment info which is `MI_SEGMENT_SIZE` bytes before it;
|
||||
// therefore we align one byte before `p`.
|
||||
static inline mi_segment_t* _mi_ptr_segment(const void* p) {
|
||||
mi_assert_internal(p != NULL);
|
||||
|
@ -706,7 +706,7 @@ static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* bl
|
|||
next = (mi_block_t*)block->next;
|
||||
#endif
|
||||
mi_track_mem_noaccess(block,sizeof(mi_block_t));
|
||||
return next;
|
||||
return next;
|
||||
}
|
||||
|
||||
static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, const uintptr_t* keys) {
|
||||
|
@ -848,7 +848,7 @@ static inline mi_threadid_t _mi_thread_id(void) mi_attr_noexcept {
|
|||
return (uintptr_t)NtCurrentTeb();
|
||||
}
|
||||
|
||||
// We use assembly for a fast thread id on the main platforms. The TLS layout depends on
|
||||
// We use assembly for a fast thread id on the main platforms. The TLS layout depends on
|
||||
// both the OS and libc implementation so we use specific tests for each main platform.
|
||||
// If you test on another platform and it works please send a PR :-)
|
||||
// see also https://akkadia.org/drepper/tls.pdf for more info on the TLS register.
|
||||
|
@ -961,7 +961,7 @@ static inline size_t mi_ctz(uintptr_t x) {
|
|||
#endif
|
||||
}
|
||||
|
||||
#elif defined(_MSC_VER)
|
||||
#elif defined(_MSC_VER)
|
||||
|
||||
#include <limits.h> // LONG_MAX
|
||||
#define MI_HAVE_FAST_BITSCAN
|
||||
|
@ -972,7 +972,7 @@ static inline size_t mi_clz(uintptr_t x) {
|
|||
_BitScanReverse(&idx, x);
|
||||
#else
|
||||
_BitScanReverse64(&idx, x);
|
||||
#endif
|
||||
#endif
|
||||
return ((MI_INTPTR_BITS - 1) - idx);
|
||||
}
|
||||
static inline size_t mi_ctz(uintptr_t x) {
|
||||
|
@ -982,7 +982,7 @@ static inline size_t mi_ctz(uintptr_t x) {
|
|||
_BitScanForward(&idx, x);
|
||||
#else
|
||||
_BitScanForward64(&idx, x);
|
||||
#endif
|
||||
#endif
|
||||
return idx;
|
||||
}
|
||||
|
||||
|
@ -1012,7 +1012,7 @@ static inline size_t mi_clz32(uint32_t x) {
|
|||
}
|
||||
|
||||
static inline size_t mi_clz(uintptr_t x) {
|
||||
if (x==0) return MI_INTPTR_BITS;
|
||||
if (x==0) return MI_INTPTR_BITS;
|
||||
#if (MI_INTPTR_BITS <= 32)
|
||||
return mi_clz32((uint32_t)x);
|
||||
#else
|
||||
|
@ -1043,9 +1043,9 @@ static inline size_t mi_bsr(uintptr_t x) {
|
|||
// ---------------------------------------------------------------------------------
|
||||
// Provide our own `_mi_memcpy` for potential performance optimizations.
|
||||
//
|
||||
// For now, only on Windows with msvc/clang-cl we optimize to `rep movsb` if
|
||||
// we happen to run on x86/x64 cpu's that have "fast short rep movsb" (FSRM) support
|
||||
// (AMD Zen3+ (~2020) or Intel Ice Lake+ (~2017). See also issue #201 and pr #253.
|
||||
// For now, only on Windows with msvc/clang-cl we optimize to `rep movsb` if
|
||||
// we happen to run on x86/x64 cpu's that have "fast short rep movsb" (FSRM) support
|
||||
// (AMD Zen3+ (~2020) or Intel Ice Lake+ (~2017). See also issue #201 and pr #253.
|
||||
// ---------------------------------------------------------------------------------
|
||||
|
||||
#if !MI_TRACK_ENABLED && defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64))
|
||||
|
@ -1080,7 +1080,7 @@ static inline void _mi_memzero(void* dst, size_t n) {
|
|||
|
||||
|
||||
// -------------------------------------------------------------------------------
|
||||
// The `_mi_memcpy_aligned` can be used if the pointers are machine-word aligned
|
||||
// The `_mi_memcpy_aligned` can be used if the pointers are machine-word aligned
|
||||
// This is used for example in `mi_realloc`.
|
||||
// -------------------------------------------------------------------------------
|
||||
|
||||
|
|
|
@ -24,7 +24,7 @@ terms of the MIT license. A copy of the license can be found in the file
|
|||
|
||||
#if defined(_MSC_VER) && defined(_Ret_notnull_) && defined(_Post_writable_byte_size_)
|
||||
// stay consistent with VCRT definitions
|
||||
#define mi_decl_new(n) mi_decl_nodiscard mi_decl_restrict _Ret_notnull_ _Post_writable_byte_size_(n)
|
||||
#define mi_decl_new(n) mi_decl_nodiscard mi_decl_restrict _Ret_notnull_ _Post_writable_byte_size_(n)
|
||||
#define mi_decl_new_nothrow(n) mi_decl_nodiscard mi_decl_restrict _Ret_maybenull_ _Success_(return != NULL) _Post_writable_byte_size_(n)
|
||||
#else
|
||||
#define mi_decl_new(n) mi_decl_nodiscard mi_decl_restrict
|
||||
|
@ -55,7 +55,7 @@ terms of the MIT license. A copy of the license can be found in the file
|
|||
void operator delete[](void* p, std::size_t n, std::align_val_t al) noexcept { mi_free_size_aligned(p, n, static_cast<size_t>(al)); };
|
||||
void operator delete (void* p, std::align_val_t al, const std::nothrow_t&) noexcept { mi_free_aligned(p, static_cast<size_t>(al)); }
|
||||
void operator delete[](void* p, std::align_val_t al, const std::nothrow_t&) noexcept { mi_free_aligned(p, static_cast<size_t>(al)); }
|
||||
|
||||
|
||||
void* operator new (std::size_t n, std::align_val_t al) noexcept(false) { return mi_new_aligned(n, static_cast<size_t>(al)); }
|
||||
void* operator new[](std::size_t n, std::align_val_t al) noexcept(false) { return mi_new_aligned(n, static_cast<size_t>(al)); }
|
||||
void* operator new (std::size_t n, std::align_val_t al, const std::nothrow_t&) noexcept { return mi_new_aligned_nothrow(n, static_cast<size_t>(al)); }
|
||||
|
|
|
@ -22,7 +22,7 @@ terms of the MIT license. A copy of the license can be found in the file
|
|||
#include <valgrind/memcheck.h>
|
||||
|
||||
#define mi_track_malloc(p,size,zero) VALGRIND_MALLOCLIKE_BLOCK(p,size,MI_PADDING_SIZE /*red zone*/,zero)
|
||||
#define mi_track_resize(p,oldsize,newsize) VALGRIND_RESIZEINPLACE_BLOCK(p,oldsize,newsize,MI_PADDING_SIZE /*red zone*/)
|
||||
#define mi_track_resize(p,oldsize,newsize) VALGRIND_RESIZEINPLACE_BLOCK(p,oldsize,newsize,MI_PADDING_SIZE /*red zone*/)
|
||||
#define mi_track_free(p) VALGRIND_FREELIKE_BLOCK(p,MI_PADDING_SIZE /*red zone*/)
|
||||
#define mi_track_free_size(p,_size) mi_track_free(p)
|
||||
#define mi_track_mem_defined(p,size) VALGRIND_MAKE_MEM_DEFINED(p,size)
|
||||
|
@ -49,13 +49,13 @@ terms of the MIT license. A copy of the license can be found in the file
|
|||
#define MI_TRACK_ENABLED 0
|
||||
#define MI_TRACK_TOOL "none"
|
||||
|
||||
#define mi_track_malloc(p,size,zero)
|
||||
#define mi_track_resize(p,oldsize,newsize)
|
||||
#define mi_track_free(p)
|
||||
#define mi_track_malloc(p,size,zero)
|
||||
#define mi_track_resize(p,oldsize,newsize)
|
||||
#define mi_track_free(p)
|
||||
#define mi_track_free_size(p,_size)
|
||||
#define mi_track_mem_defined(p,size)
|
||||
#define mi_track_mem_undefined(p,size)
|
||||
#define mi_track_mem_noaccess(p,size)
|
||||
#define mi_track_mem_defined(p,size)
|
||||
#define mi_track_mem_undefined(p,size)
|
||||
#define mi_track_mem_noaccess(p,size)
|
||||
|
||||
#endif
|
||||
|
||||
|
|
|
@ -14,7 +14,7 @@ terms of the MIT license. A copy of the license can be found in the file
|
|||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(disable:4214) // bitfield is not int
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// Minimal alignment necessary. On most platforms 16 bytes are needed
|
||||
// due to SSE registers for example. This must be at least `sizeof(void*)`
|
||||
|
@ -67,7 +67,7 @@ terms of the MIT license. A copy of the license can be found in the file
|
|||
// Encoded free lists allow detection of corrupted free lists
|
||||
// and can detect buffer overflows, modify after free, and double `free`s.
|
||||
#if (MI_SECURE>=3 || MI_DEBUG>=1)
|
||||
#define MI_ENCODE_FREELIST 1
|
||||
#define MI_ENCODE_FREELIST 1
|
||||
#endif
|
||||
|
||||
|
||||
|
@ -255,39 +255,39 @@ typedef uintptr_t mi_thread_free_t;
|
|||
// We don't count `freed` (as |free|) but use `used` to reduce
|
||||
// the number of memory accesses in the `mi_page_all_free` function(s).
|
||||
//
|
||||
// Notes:
|
||||
// Notes:
|
||||
// - Access is optimized for `mi_free` and `mi_page_alloc` (in `alloc.c`)
|
||||
// - Using `uint16_t` does not seem to slow things down
|
||||
// - The size is 8 words on 64-bit which helps the page index calculations
|
||||
// (and 10 words on 32-bit, and encoded free lists add 2 words. Sizes 10
|
||||
// (and 10 words on 32-bit, and encoded free lists add 2 words. Sizes 10
|
||||
// and 12 are still good for address calculation)
|
||||
// - To limit the structure size, the `xblock_size` is 32-bits only; for
|
||||
// - To limit the structure size, the `xblock_size` is 32-bits only; for
|
||||
// blocks > MI_HUGE_BLOCK_SIZE the size is determined from the segment page size
|
||||
// - `thread_free` uses the bottom bits as a delayed-free flags to optimize
|
||||
// concurrent frees where only the first concurrent free adds to the owning
|
||||
// heap `thread_delayed_free` list (see `alloc.c:mi_free_block_mt`).
|
||||
// The invariant is that no-delayed-free is only set if there is
|
||||
// at least one block that will be added, or as already been added, to
|
||||
// at least one block that will be added, or as already been added, to
|
||||
// the owning heap `thread_delayed_free` list. This guarantees that pages
|
||||
// will be freed correctly even if only other threads free blocks.
|
||||
typedef struct mi_page_s {
|
||||
// "owned" by the segment
|
||||
uint32_t slice_count; // slices in this page (0 if not a page)
|
||||
uint32_t slice_offset; // distance from the actual page data slice (0 if a page)
|
||||
uint8_t is_reset : 1; // `true` if the page memory was reset
|
||||
uint8_t is_committed : 1; // `true` if the page virtual memory is committed
|
||||
uint8_t is_zero_init : 1; // `true` if the page was zero initialized
|
||||
uint8_t is_reset : 1; // `true` if the page memory was reset
|
||||
uint8_t is_committed : 1; // `true` if the page virtual memory is committed
|
||||
uint8_t is_zero_init : 1; // `true` if the page was zero initialized
|
||||
|
||||
// layout like this to optimize access in `mi_malloc` and `mi_free`
|
||||
uint16_t capacity; // number of blocks committed, must be the first field, see `segment.c:page_clear`
|
||||
uint16_t reserved; // number of blocks reserved in memory
|
||||
mi_page_flags_t flags; // `in_full` and `has_aligned` flags (8 bits)
|
||||
uint8_t is_zero : 1; // `true` if the blocks in the free list are zero initialized
|
||||
uint8_t retire_expire : 7; // expiration count for retired blocks
|
||||
uint8_t is_zero : 1; // `true` if the blocks in the free list are zero initialized
|
||||
uint8_t retire_expire : 7; // expiration count for retired blocks
|
||||
|
||||
mi_block_t* free; // list of available free blocks (`malloc` allocates from this list)
|
||||
uint32_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`)
|
||||
uint32_t xblock_size; // size available in each block (always `>0`)
|
||||
uint32_t xblock_size; // size available in each block (always `>0`)
|
||||
mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`)
|
||||
|
||||
#ifdef MI_ENCODE_FREELIST
|
||||
|
@ -297,8 +297,8 @@ typedef struct mi_page_s {
|
|||
_Atomic(mi_thread_free_t) xthread_free; // list of deferred free blocks freed by other threads
|
||||
_Atomic(uintptr_t) xheap;
|
||||
|
||||
struct mi_page_s* next; // next page owned by this thread with the same `block_size`
|
||||
struct mi_page_s* prev; // previous page owned by this thread with the same `block_size`
|
||||
struct mi_page_s* next; // next page owned by this thread with the same `block_size`
|
||||
struct mi_page_s* prev; // previous page owned by this thread with the same `block_size`
|
||||
|
||||
// 64-bit 9 words, 32-bit 12 words, (+2 for secure)
|
||||
#if MI_INTPTR_SIZE==8
|
||||
|
|
|
@ -28,10 +28,10 @@ terms of the MIT license. A copy of the license can be found in the file
|
|||
#define mi_decl_nodiscard [[nodiscard]]
|
||||
#elif (defined(__GNUC__) && (__GNUC__ >= 4)) || defined(__clang__) // includes clang, icc, and clang-cl
|
||||
#define mi_decl_nodiscard __attribute__((warn_unused_result))
|
||||
#elif defined(_HAS_NODISCARD)
|
||||
#elif defined(_HAS_NODISCARD)
|
||||
#define mi_decl_nodiscard _NODISCARD
|
||||
#elif (_MSC_VER >= 1700)
|
||||
#define mi_decl_nodiscard _Check_return_
|
||||
#define mi_decl_nodiscard _Check_return_
|
||||
#else
|
||||
#define mi_decl_nodiscard
|
||||
#endif
|
||||
|
@ -160,8 +160,8 @@ mi_decl_export void mi_thread_init(void) mi_attr_noexcept;
|
|||
mi_decl_export void mi_thread_done(void) mi_attr_noexcept;
|
||||
mi_decl_export void mi_thread_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept;
|
||||
|
||||
mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs,
|
||||
size_t* current_rss, size_t* peak_rss,
|
||||
mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs,
|
||||
size_t* current_rss, size_t* peak_rss,
|
||||
size_t* current_commit, size_t* peak_commit, size_t* page_faults) mi_attr_noexcept;
|
||||
|
||||
// -------------------------------------------------------------------------------------
|
||||
|
@ -431,7 +431,7 @@ template<class T> struct _mi_stl_allocator_common {
|
|||
typedef value_type const& const_reference;
|
||||
typedef value_type* pointer;
|
||||
typedef value_type const* const_pointer;
|
||||
|
||||
|
||||
#if ((__cplusplus >= 201103L) || (_MSC_VER > 1900)) // C++11
|
||||
using propagate_on_container_copy_assignment = std::true_type;
|
||||
using propagate_on_container_move_assignment = std::true_type;
|
||||
|
@ -504,7 +504,7 @@ template<class T> struct _mi_heap_stl_allocator_common : public _mi_stl_allocato
|
|||
protected:
|
||||
std::shared_ptr<mi_heap_t> heap;
|
||||
template<class U> friend struct _mi_heap_stl_allocator_common;
|
||||
|
||||
|
||||
_mi_heap_stl_allocator_common(bool destroy) {
|
||||
mi_heap_t* hp = mi_heap_new();
|
||||
this->heap.reset(hp, (destroy ? &heap_destroy : &heap_delete)); /* calls heap_delete/destroy when the refcount drops to zero */
|
||||
|
@ -533,7 +533,7 @@ template<class T1, class T2> bool operator==(const mi_heap_stl_allocator<T1>& x,
|
|||
template<class T1, class T2> bool operator!=(const mi_heap_stl_allocator<T1>& x, const mi_heap_stl_allocator<T2>& y) mi_attr_noexcept { return (!x.is_equal(y)); }
|
||||
|
||||
|
||||
// STL allocator allocation in a specific heap, where `free` does nothing and
|
||||
// STL allocator allocation in a specific heap, where `free` does nothing and
|
||||
// the heap is destroyed in one go on destruction -- use with care!
|
||||
template<class T> struct mi_heap_destroy_stl_allocator : public _mi_heap_stl_allocator_common<T> {
|
||||
using typename _mi_heap_stl_allocator_common<T>::size_type;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue