diff --git a/CMakeLists.txt b/CMakeLists.txt index cacbbc8e..479c507f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,6 +10,7 @@ option(MI_SEE_ASM "Generate assembly files" OFF) option(MI_CHECK_FULL "Use full internal invariant checking in DEBUG mode" OFF) option(MI_USE_CXX "Use the C++ compiler to compile the library" OFF) option(MI_SECURE "Use security mitigations (like guard pages and randomization)" OFF) +option(MI_SECURE_FULL "Use full security mitigations, may be more expensive (includes double-free mitigation)" OFF) option(MI_LOCAL_DYNAMIC_TLS "Use slightly slower, dlopen-compatible TLS mechanism (Unix)" OFF) option(MI_BUILD_TESTS "Build test executables" ON) @@ -69,9 +70,15 @@ if(MI_OVERRIDE MATCHES "ON") endif() endif() -if(MI_SECURE MATCHES "ON") - message(STATUS "Set secure build (MI_SECURE=ON)") - list(APPEND mi_defines MI_SECURE=3) +if(MI_SECURE_FULL MATCHES "ON") + message(STATUS "Set full secure build (may be more expensive) (MI_SECURE_FULL=ON)") + list(APPEND mi_defines MI_SECURE=4) + set(MI_SECURE "ON") +else() + if(MI_SECURE MATCHES "ON") + message(STATUS "Set secure build (MI_SECURE=ON)") + list(APPEND mi_defines MI_SECURE=3) + endif() endif() if(MI_SEE_ASM MATCHES "ON") diff --git a/ide/vs2019/mimalloc-override.vcxproj b/ide/vs2019/mimalloc-override.vcxproj index ac559468..9c255ef8 100644 --- a/ide/vs2019/mimalloc-override.vcxproj +++ b/ide/vs2019/mimalloc-override.vcxproj @@ -123,7 +123,7 @@ true true ../../include - MI_DEBUG=2;MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions); + MI_DEBUG=3;MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions); MultiThreadedDebugDLL false Default @@ -232,6 +232,9 @@ + + true + @@ -249,4 +252,4 @@ - \ No newline at end of file + diff --git a/ide/vs2019/mimalloc.vcxproj b/ide/vs2019/mimalloc.vcxproj index 6cfd76fa..b1206cd2 100644 --- a/ide/vs2019/mimalloc.vcxproj +++ b/ide/vs2019/mimalloc.vcxproj @@ -116,7 +116,7 @@ true true ../../include - MI_DEBUG=2;%(PreprocessorDefinitions); + MI_DEBUG=3;%(PreprocessorDefinitions); CompileAsCpp false stdcpp17 @@ -218,6 +218,9 @@ + + true + @@ -243,4 +246,4 @@ - \ No newline at end of file + diff --git a/include/mimalloc-atomic.h b/include/mimalloc-atomic.h index dff0f011..c18f990f 100644 --- a/include/mimalloc-atomic.h +++ b/include/mimalloc-atomic.h @@ -36,6 +36,13 @@ static inline void mi_atomic_add64(volatile int64_t* p, int64_t add); // Atomically add a value; returns the previous value. Memory ordering is relaxed. static inline intptr_t mi_atomic_add(volatile _Atomic(intptr_t)* p, intptr_t add); +// Atomically "and" a value; returns the previous value. Memory ordering is relaxed. +static inline uintptr_t mi_atomic_and(volatile _Atomic(uintptr_t)* p, uintptr_t x); + +// Atomically "or" a value; returns the previous value. Memory ordering is relaxed. +static inline uintptr_t mi_atomic_or(volatile _Atomic(uintptr_t)* p, uintptr_t x); + + // Atomically compare and exchange a value; returns `true` if successful. // May fail spuriously. Memory ordering as release on success, and relaxed on failure. // (Note: expected and desired are in opposite order from atomic_compare_exchange) @@ -121,22 +128,28 @@ static inline void* mi_atomic_exchange_ptr(volatile _Atomic(void*)* p, void* exc #include #ifdef _WIN64 typedef LONG64 msc_intptr_t; -#define RC64(f) f##64 +#define MI_64(f) f##64 #else typedef LONG msc_intptr_t; -#define RC64(f) f +#define MI_64(f) f #endif static inline intptr_t mi_atomic_add(volatile _Atomic(intptr_t)* p, intptr_t add) { - return (intptr_t)RC64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, (msc_intptr_t)add); + return (intptr_t)MI_64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, (msc_intptr_t)add); +} +static inline uintptr_t mi_atomic_and(volatile _Atomic(uintptr_t)* p, uintptr_t x) { + return (uintptr_t)MI_64(_InterlockedAnd)((volatile msc_intptr_t*)p, (msc_intptr_t)x); +} +static inline uintptr_t mi_atomic_or(volatile _Atomic(uintptr_t)* p, uintptr_t x) { + return (uintptr_t)MI_64(_InterlockedOr)((volatile msc_intptr_t*)p, (msc_intptr_t)x); } static inline bool mi_atomic_cas_strong(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) { - return (expected == (uintptr_t)RC64(_InterlockedCompareExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)desired, (msc_intptr_t)expected)); + return (expected == (uintptr_t)MI_64(_InterlockedCompareExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)desired, (msc_intptr_t)expected)); } static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) { return mi_atomic_cas_strong(p,desired,expected); } static inline uintptr_t mi_atomic_exchange(volatile _Atomic(uintptr_t)* p, uintptr_t exchange) { - return (uintptr_t)RC64(_InterlockedExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)exchange); + return (uintptr_t)MI_64(_InterlockedExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)exchange); } static inline uintptr_t mi_atomic_read(volatile _Atomic(uintptr_t) const* p) { return *p; @@ -177,6 +190,14 @@ static inline intptr_t mi_atomic_add(volatile _Atomic(intptr_t)* p, intptr_t add MI_USING_STD return atomic_fetch_add_explicit(p, add, memory_order_relaxed); } +static inline uintptr_t mi_atomic_and(volatile _Atomic(uintptr_t)* p, uintptr_t x) { + MI_USING_STD + return atomic_fetch_and_explicit(p, x, memory_order_relaxed); +} +static inline uintptr_t mi_atomic_or(volatile _Atomic(uintptr_t)* p, uintptr_t x) { + MI_USING_STD + return atomic_fetch_or_explicit(p, x, memory_order_relaxed); +} static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) { MI_USING_STD return atomic_compare_exchange_weak_explicit(p, &expected, desired, memory_order_release, memory_order_relaxed); diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 2b4d53c5..10528877 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -163,7 +163,6 @@ bool _mi_page_is_valid(mi_page_t* page); // Overflow detecting multiply -#define MI_MUL_NO_OVERFLOW ((size_t)1 << (4*sizeof(size_t))) // sqrt(SIZE_MAX) static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) { #if __has_builtin(__builtin_umul_overflow) || __GNUC__ >= 5 #include // UINT_MAX, ULONG_MAX @@ -175,6 +174,7 @@ static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) { return __builtin_umulll_overflow(count, size, total); #endif #else /* __builtin_umul_overflow is unavailable */ + #define MI_MUL_NO_OVERFLOW ((size_t)1 << (4*sizeof(size_t))) // sqrt(SIZE_MAX) *total = count * size; return ((size >= MI_MUL_NO_OVERFLOW || count >= MI_MUL_NO_OVERFLOW) && size > 0 && (SIZE_MAX / size) < count); @@ -188,6 +188,7 @@ static inline bool _mi_is_power_of_two(uintptr_t x) { // Align upwards static inline uintptr_t _mi_align_up(uintptr_t sz, size_t alignment) { + mi_assert_internal(alignment != 0); uintptr_t mask = alignment - 1; if ((alignment & mask) == 0) { // power of two? return ((sz + mask) & ~mask); @@ -201,6 +202,12 @@ static inline uintptr_t _mi_align_down(uintptr_t sz, size_t alignment) { return (sz / alignment) * alignment; } +// Divide upwards: `s <= _mi_divide_up(s,d)*d < s+d`. +static inline uintptr_t _mi_divide_up(uintptr_t size, size_t divider) { + mi_assert_internal(divider != 0); + return (divider == 0 ? size : ((size + divider - 1) / divider)); +} + // Is memory zero initialized? static inline bool mi_mem_is_zero(void* p, size_t size) { for (size_t i = 0; i < size; i++) { diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index b043bebe..cd9c5154 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -26,7 +26,7 @@ terms of the MIT license. A copy of the license can be found in the file // #define MI_SECURE 1 // guard page around metadata // #define MI_SECURE 2 // guard page around each mimalloc page // #define MI_SECURE 3 // encode free lists (detect corrupted free list (buffer overflow), and invalid pointer free) -// #define MI_SECURE 4 // experimental, may be more expensive: checks for double free. +// #define MI_SECURE 4 // experimental, may be more expensive: checks for double free. (cmake -DMI_SECURE_FULL=ON) #if !defined(MI_SECURE) #define MI_SECURE 0 @@ -35,7 +35,7 @@ terms of the MIT license. A copy of the license can be found in the file // Define MI_DEBUG for debug mode // #define MI_DEBUG 1 // basic assertion checks and statistics, check double free, corrupted free list, and invalid pointer free. // #define MI_DEBUG 2 // + internal assertion checks -// #define MI_DEBUG 3 // + extensive internal invariant checking +// #define MI_DEBUG 3 // + extensive internal invariant checking (cmake -DMI_CHECK_FULL=ON) #if !defined(MI_DEBUG) #if !defined(NDEBUG) || defined(_DEBUG) #define MI_DEBUG 2 diff --git a/include/mimalloc.h b/include/mimalloc.h index 7fdcfacd..5a9ac38d 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -230,8 +230,8 @@ mi_decl_export bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_b mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept; mi_decl_export bool mi_is_redirected() mi_attr_noexcept; -mi_decl_export int mi_reserve_huge_os_pages_interleave(size_t pages) mi_attr_noexcept; -mi_decl_export int mi_reserve_huge_os_pages_at(size_t pages, int numa_node) mi_attr_noexcept; +mi_decl_export int mi_reserve_huge_os_pages_interleave(size_t pages, size_t timeout_msecs) mi_attr_noexcept; +mi_decl_export int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept; // deprecated mi_decl_export int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept; diff --git a/src/arena.c b/src/arena.c index 5b503a39..93655033 100644 --- a/src/arena.c +++ b/src/arena.c @@ -7,15 +7,23 @@ terms of the MIT license. A copy of the license can be found in the file /* ---------------------------------------------------------------------------- "Arenas" are fixed area's of OS memory from which we can allocate -large blocks (>= MI_ARENA_BLOCK_SIZE, 16MiB). Currently only used to -allocate in one arena consisting of huge OS pages -- otherwise it -delegates to direct allocation from the OS. +large blocks (>= MI_ARENA_BLOCK_SIZE, 32MiB). +In contrast to the rest of mimalloc, the arenas are shared between +threads and need to be accessed using atomic operations. -In the future, we can expose an API to manually add more arenas which -is sometimes needed for embedded devices or shared memory for example. +Currently arenas are only used to for huge OS page (1GiB) reservations, +otherwise it delegates to direct allocation from the OS. +In the future, we can expose an API to manually add more kinds of arenas +which is sometimes needed for embedded devices or shared memory for example. +(We can also employ this with WASI or `sbrk` systems to reserve large arenas + on demand and be able to reuse them efficiently). -The arena allocation needs to be thread safe and we use a lock-free scan -with on-demand coalescing. +The arena allocation needs to be thread safe and we use an atomic +bitmap to allocate. The current implementation of the bitmap can +only do this within a field (`uintptr_t`) so we can allocate at most +blocks of 2GiB (64*32MiB) and no object can cross the boundary. This +can lead to fragmentation but fortunately most objects will be regions +of 256MiB in practice. -----------------------------------------------------------------------------*/ #include "mimalloc.h" #include "mimalloc-internal.h" @@ -23,9 +31,10 @@ with on-demand coalescing. #include // memset +#include "bitmap.inc.c" // atomic bitmap + // os.c void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld); -//int _mi_os_alloc_huge_os_pages(size_t pages, double max_secs, void** pstart, size_t* pages_reserved, size_t* psize) mi_attr_noexcept; void _mi_os_free(void* p, size_t size, mi_stats_t* stats); void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize); @@ -38,23 +47,27 @@ int _mi_os_numa_node_count(void); Arena allocation ----------------------------------------------------------- */ -#define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE -#define MI_ARENA_BLOCK_SIZE MI_SEGMENT_SIZE -#define MI_MAX_ARENAS (64) // Block info: bit 0 contains the `in_use` bit, the upper bits the // size in count of arena blocks. typedef uintptr_t mi_block_info_t; +#define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE +#define MI_ARENA_BLOCK_SIZE (MI_SEGMENT_SIZE/2) // 32MiB +#define MI_ARENA_MAX_OBJ_SIZE (MI_BITMAP_FIELD_BITS * MI_ARENA_BLOCK_SIZE) // 2GiB +#define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2) // 16MiB +#define MI_MAX_ARENAS (64) // not more than 256 (since we use 8 bits in the memid) // A memory arena descriptor typedef struct mi_arena_s { uint8_t* start; // the start of the memory area size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`) + size_t field_count; // number of bitmap fields int numa_node; // associated NUMA node bool is_zero_init; // is the arena zero initialized? bool is_large; // large OS page allocated - _Atomic(uintptr_t) block_bottom; // optimization to start the search for free blocks - _Atomic(mi_block_info_t) blocks[1]; // `block_count` block info's + volatile _Atomic(uintptr_t) search_idx; // optimization to start the search for free blocks + mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero? + mi_bitmap_field_t blocks_map[1]; // bitmap of in-use blocks } mi_arena_t; @@ -71,184 +84,57 @@ static _Atomic(uintptr_t) mi_arena_count; // = 0 // Use `0` as a special id for direct OS allocated memory. #define MI_MEMID_OS 0 -static size_t mi_memid_create(size_t arena_index, size_t block_index) { - mi_assert_internal(arena_index < 0xFF); - return ((block_index << 8) | ((arena_index+1) & 0xFF)); +static size_t mi_memid_create(size_t arena_index, mi_bitmap_index_t bitmap_index) { + mi_assert_internal(arena_index < 0xFE); + mi_assert_internal(((bitmap_index << 8) >> 8) == bitmap_index); // no overflow? + return ((bitmap_index << 8) | ((arena_index+1) & 0xFF)); } -static void mi_memid_indices(size_t memid, size_t* arena_index, size_t* block_index) { +static void mi_memid_indices(size_t memid, size_t* arena_index, mi_bitmap_index_t* bitmap_index) { mi_assert_internal(memid != MI_MEMID_OS); *arena_index = (memid & 0xFF) - 1; - *block_index = (memid >> 8); + *bitmap_index = (memid >> 8); } -/* ----------------------------------------------------------- - Block info ------------------------------------------------------------ */ - -static bool mi_block_is_in_use(mi_block_info_t info) { - return ((info&1) != 0); +static size_t mi_block_count_of_size(size_t size) { + return _mi_divide_up(size, MI_ARENA_BLOCK_SIZE); } -static size_t mi_block_count(mi_block_info_t info) { - return (info>>1); -} - -static mi_block_info_t mi_block_info_create(size_t bcount, bool in_use) { - return (((mi_block_info_t)bcount << 1) | (in_use ? 1 : 0)); -} - - /* ----------------------------------------------------------- Thread safe allocation in an arena ----------------------------------------------------------- */ - -static void* mi_arena_allocx(mi_arena_t* arena, size_t start_idx, size_t end_idx, size_t needed_bcount, bool* is_zero, size_t* block_index) +static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx) { - // Scan linearly through all block info's - // Skipping used ranges, coalescing free ranges on demand. - mi_assert_internal(needed_bcount > 0); - mi_assert_internal(start_idx <= arena->block_count); - mi_assert_internal(end_idx <= arena->block_count); - _Atomic(mi_block_info_t)* block = &arena->blocks[start_idx]; - _Atomic(mi_block_info_t)* end = &arena->blocks[end_idx]; - while (block < end) { - mi_block_info_t binfo = mi_atomic_read_relaxed(block); - size_t bcount = mi_block_count(binfo); - if (mi_block_is_in_use(binfo)) { - // in-use, skip ahead - mi_assert_internal(bcount > 0); - block += bcount; - } - else { - // free blocks - if (bcount==0) { - // optimization: - // use 0 initialized blocks at the end, to use single atomic operation - // initially to reduce contention (as we don't need to split) - if (block + needed_bcount > end) { - return NULL; // does not fit - } - else if (!mi_atomic_cas_weak(block, mi_block_info_create(needed_bcount, true), binfo)) { - // ouch, someone else was quicker. Try again.. - continue; - } - else { - // we got it: return a pointer to the claimed memory - ptrdiff_t idx = (block - arena->blocks); - *is_zero = arena->is_zero_init; - *block_index = idx; - return (arena->start + (idx*MI_ARENA_BLOCK_SIZE)); - } - } - - mi_assert_internal(bcount>0); - if (needed_bcount > bcount) { -#if 0 // MI_NO_ARENA_COALESCE - block += bcount; // too small, skip to the next range - continue; -#else - // too small, try to coalesce - _Atomic(mi_block_info_t)* block_next = block + bcount; - if (block_next >= end) { - return NULL; // does not fit - } - mi_block_info_t binfo_next = mi_atomic_read(block_next); - size_t bcount_next = mi_block_count(binfo_next); - if (mi_block_is_in_use(binfo_next)) { - // next block is in use, cannot coalesce - block += (bcount + bcount_next); // skip ahea over both blocks - } - else { - // next block is free, try to coalesce - // first set the next one to being used to prevent dangling ranges - if (!mi_atomic_cas_strong(block_next, mi_block_info_create(bcount_next, true), binfo_next)) { - // someone else got in before us.. try again - continue; - } - else { - if (!mi_atomic_cas_strong(block, mi_block_info_create(bcount + bcount_next, true), binfo)) { // use strong to increase success chance - // someone claimed/coalesced the block in the meantime - // first free the next block again.. - bool ok = mi_atomic_cas_strong(block_next, mi_block_info_create(bcount_next, false), binfo_next); // must be strong - mi_assert(ok); UNUSED(ok); - // and try again - continue; - } - else { - // coalesced! try again - // todo: we could optimize here to immediately claim the block if the - // coalesced size is a fit instead of retrying. Keep it simple for now. - continue; - } - } - } -#endif - } - else { // needed_bcount <= bcount - mi_assert_internal(needed_bcount <= bcount); - // it fits, claim the whole block - if (!mi_atomic_cas_weak(block, mi_block_info_create(bcount, true), binfo)) { - // ouch, someone else was quicker. Try again.. - continue; - } - else { - // got it, now split off the needed part - if (needed_bcount < bcount) { - mi_atomic_write(block + needed_bcount, mi_block_info_create(bcount - needed_bcount, false)); - mi_atomic_write(block, mi_block_info_create(needed_bcount, true)); - } - // return a pointer to the claimed memory - ptrdiff_t idx = (block - arena->blocks); - *is_zero = false; - *block_index = idx; - return (arena->start + (idx*MI_ARENA_BLOCK_SIZE)); - } - } + const size_t fcount = arena->field_count; + size_t idx = mi_atomic_read(&arena->search_idx); // start from last search + for (size_t visited = 0; visited < fcount; visited++, idx++) { + if (idx >= fcount) idx = 0; // wrap around + if (mi_bitmap_try_claim_field(arena->blocks_map, idx, blocks, bitmap_idx)) { + mi_atomic_write(&arena->search_idx, idx); // start search from here next time + return true; } } - // no success - return NULL; + return false; } -// Try to reduce search time by starting from bottom and wrap around. -static void* mi_arena_alloc(mi_arena_t* arena, size_t needed_bcount, bool* is_zero, size_t* block_index) -{ - uintptr_t bottom = mi_atomic_read_relaxed(&arena->block_bottom); - void* p = mi_arena_allocx(arena, bottom, arena->block_count, needed_bcount, is_zero, block_index); - if (p == NULL && bottom > 0) { - // try again from the start - p = mi_arena_allocx(arena, 0, bottom, needed_bcount, is_zero, block_index); - } - if (p != NULL) { - mi_atomic_write(&arena->block_bottom, *block_index); - } - return p; -} /* ----------------------------------------------------------- Arena Allocation ----------------------------------------------------------- */ static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t needed_bcount, - bool* commit, bool* large, bool* is_zero, - size_t* memid) + bool* commit, bool* large, bool* is_zero, size_t* memid) { - size_t block_index = SIZE_MAX; - void* p = mi_arena_alloc(arena, needed_bcount, is_zero, &block_index); - if (p != NULL) { - mi_assert_internal(block_index != SIZE_MAX); - #if MI_DEBUG>=1 - _Atomic(mi_block_info_t)* block = &arena->blocks[block_index]; - mi_block_info_t binfo = mi_atomic_read(block); - mi_assert_internal(mi_block_is_in_use(binfo)); - mi_assert_internal(mi_block_count(binfo) >= needed_bcount); - #endif - *memid = mi_memid_create(arena_index, block_index); - *commit = true; // TODO: support commit on demand? - *large = arena->is_large; + mi_bitmap_index_t bitmap_index; + if (mi_arena_alloc(arena, needed_bcount, &bitmap_index)) { + // claimed it! set the dirty bits (todo: no need for an atomic op here?) + *is_zero = mi_bitmap_claim(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL); + *memid = mi_memid_create(arena_index, bitmap_index); + *commit = true; // TODO: support commit on demand? + *large = arena->is_large; + return (arena->start + (mi_bitmap_index_bit(bitmap_index)*MI_ARENA_BLOCK_SIZE)); } - return p; + return NULL; } void* _mi_arena_alloc_aligned(size_t size, size_t alignment, @@ -257,21 +143,19 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, { mi_assert_internal(memid != NULL && tld != NULL); mi_assert_internal(size > 0); - *memid = MI_MEMID_OS; + *memid = MI_MEMID_OS; *is_zero = false; bool default_large = false; if (large==NULL) large = &default_large; // ensure `large != NULL` // try to allocate in an arena if the alignment is small enough - // and if there is not too much waste around the `MI_ARENA_BLOCK_SIZE`. - if (alignment <= MI_SEGMENT_ALIGN && - size >= 3*(MI_ARENA_BLOCK_SIZE/4) && // > 48MiB (not more than 25% waste) - !(size > MI_ARENA_BLOCK_SIZE && size < 3*(MI_ARENA_BLOCK_SIZE/2)) // ! <64MiB - 96MiB> - ) + // and the object is not too large or too small. + if (alignment <= MI_SEGMENT_ALIGN && + size <= MI_ARENA_MAX_OBJ_SIZE && + size >= MI_ARENA_MIN_OBJ_SIZE) { - size_t asize = _mi_align_up(size, MI_ARENA_BLOCK_SIZE); - size_t bcount = asize / MI_ARENA_BLOCK_SIZE; - int numa_node = _mi_os_numa_node(tld); // current numa node + const size_t bcount = mi_block_count_of_size(size); + const int numa_node = _mi_os_numa_node(tld); // current numa node mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE); // try numa affine allocation @@ -302,7 +186,10 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, // finally, fall back to the OS *is_zero = true; - *memid = MI_MEMID_OS; + *memid = MI_MEMID_OS; + if (*large) { + *large = mi_option_is_enabled(mi_option_large_os_pages); // try large OS pages only if enabled and allowed + } return _mi_os_alloc_aligned(size, alignment, *commit, large, tld); } @@ -326,8 +213,8 @@ void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats) { else { // allocated in an arena size_t arena_idx; - size_t block_idx; - mi_memid_indices(memid, &arena_idx, &block_idx); + size_t bitmap_idx; + mi_memid_indices(memid, &arena_idx, &bitmap_idx); mi_assert_internal(arena_idx < MI_MAX_ARENAS); mi_arena_t* arena = (mi_arena_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &mi_arenas[arena_idx])); mi_assert_internal(arena != NULL); @@ -335,27 +222,17 @@ void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats) { _mi_fatal_error("trying to free from non-existent arena: %p, size %zu, memid: 0x%zx\n", p, size, memid); return; } - mi_assert_internal(arena->block_count > block_idx); - if (arena->block_count <= block_idx) { - _mi_fatal_error("trying to free from non-existent block: %p, size %zu, memid: 0x%zx\n", p, size, memid); + mi_assert_internal(arena->field_count > mi_bitmap_index_field(bitmap_idx)); + if (arena->field_count <= mi_bitmap_index_field(bitmap_idx)) { + _mi_fatal_error("trying to free from non-existent arena block: %p, size %zu, memid: 0x%zx\n", p, size, memid); return; } - _Atomic(mi_block_info_t)* block = &arena->blocks[block_idx]; - mi_block_info_t binfo = mi_atomic_read_relaxed(block); - mi_assert_internal(mi_block_is_in_use(binfo)); - mi_assert_internal(mi_block_count(binfo)*MI_ARENA_BLOCK_SIZE >= size); - if (!mi_block_is_in_use(binfo)) { + const size_t blocks = mi_block_count_of_size(size); + bool ones = mi_bitmap_unclaim(arena->blocks_map, arena->field_count, blocks, bitmap_idx); + if (!ones) { _mi_fatal_error("trying to free an already freed block: %p, size %zu\n", p, size); return; }; - bool ok = mi_atomic_cas_strong(block, mi_block_info_create(mi_block_count(binfo), false), binfo); - mi_assert_internal(ok); - if (!ok) { - _mi_warning_message("unable to free arena block: %p, info 0x%zx", p, binfo); - } - if (block_idx < mi_atomic_read_relaxed(&arena->block_bottom)) { - mi_atomic_write(&arena->block_bottom, block_idx); - } } } @@ -367,8 +244,7 @@ static bool mi_arena_add(mi_arena_t* arena) { mi_assert_internal(arena != NULL); mi_assert_internal((uintptr_t)arena->start % MI_SEGMENT_ALIGN == 0); mi_assert_internal(arena->block_count > 0); - mi_assert_internal(mi_mem_is_zero(arena->blocks,arena->block_count*sizeof(mi_block_info_t))); - + uintptr_t i = mi_atomic_addu(&mi_arena_count,1); if (i >= MI_MAX_ARENAS) { mi_atomic_subu(&mi_arena_count, 1); @@ -385,40 +261,51 @@ static bool mi_arena_add(mi_arena_t* arena) { #include // ENOMEM // reserve at a specific numa node -int mi_reserve_huge_os_pages_at(size_t pages, int numa_node) mi_attr_noexcept { +int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept { if (pages==0) return 0; if (numa_node < -1) numa_node = -1; if (numa_node >= 0) numa_node = numa_node % _mi_os_numa_node_count(); size_t hsize = 0; size_t pages_reserved = 0; - void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, pages*500, &pages_reserved, &hsize); + void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, timeout_msecs, &pages_reserved, &hsize); if (p==NULL || pages_reserved==0) { _mi_warning_message("failed to reserve %zu gb huge pages\n", pages); return ENOMEM; } _mi_verbose_message("reserved %zu gb huge pages\n", pages_reserved); - size_t bcount = hsize / MI_ARENA_BLOCK_SIZE; - size_t asize = sizeof(mi_arena_t) + (bcount*sizeof(mi_block_info_t)); // one too much + size_t bcount = mi_block_count_of_size(hsize); + size_t fields = (bcount + MI_BITMAP_FIELD_BITS - 1) / MI_BITMAP_FIELD_BITS; + size_t asize = sizeof(mi_arena_t) + (2*fields*sizeof(mi_bitmap_field_t)); mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main); // TODO: can we avoid allocating from the OS? if (arena == NULL) { _mi_os_free_huge_pages(p, hsize, &_mi_stats_main); return ENOMEM; } arena->block_count = bcount; - arena->start = (uint8_t*)p; - arena->block_bottom = 0; + arena->field_count = fields; + arena->start = (uint8_t*)p; arena->numa_node = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1) arena->is_large = true; arena->is_zero_init = true; - memset(arena->blocks, 0, bcount * sizeof(mi_block_info_t)); + arena->search_idx = 0; + arena->blocks_dirty = &arena->blocks_map[bcount]; + // the bitmaps are already zero initialized due to os_alloc + // just claim leftover blocks if needed + size_t post = (fields * MI_BITMAP_FIELD_BITS) - bcount; + if (post > 0) { + // don't use leftover bits at the end + mi_bitmap_index_t postidx = mi_bitmap_index_create(fields - 1, MI_BITMAP_FIELD_BITS - post); + mi_bitmap_claim(arena->blocks_map, fields, post, postidx, NULL); + } + mi_arena_add(arena); return 0; } // reserve huge pages evenly among all numa nodes. -int mi_reserve_huge_os_pages_interleave(size_t pages) mi_attr_noexcept { +int mi_reserve_huge_os_pages_interleave(size_t pages, size_t timeout_msecs) mi_attr_noexcept { if (pages == 0) return 0; // pages per numa node @@ -426,12 +313,13 @@ int mi_reserve_huge_os_pages_interleave(size_t pages) mi_attr_noexcept { if (numa_count <= 0) numa_count = 1; const size_t pages_per = pages / numa_count; const size_t pages_mod = pages % numa_count; + const size_t timeout_per = (timeout_msecs / numa_count) + 50; // reserve evenly among numa nodes for (int numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) { size_t node_pages = pages_per; // can be 0 if ((size_t)numa_node < pages_mod) node_pages++; - int err = mi_reserve_huge_os_pages_at(node_pages, numa_node); + int err = mi_reserve_huge_os_pages_at(node_pages, numa_node, timeout_per); if (err) return err; if (pages < node_pages) { pages = 0; @@ -448,7 +336,7 @@ int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserv UNUSED(max_secs); _mi_warning_message("mi_reserve_huge_os_pages is deprecated: use mi_reserve_huge_os_pages_interleave/at instead\n"); if (pages_reserved != NULL) *pages_reserved = 0; - int err = mi_reserve_huge_os_pages_interleave(pages); + int err = mi_reserve_huge_os_pages_interleave(pages, (size_t)(max_secs * 1000.0)); if (err==0 && pages_reserved!=NULL) *pages_reserved = pages; return err; } diff --git a/src/bitmap.inc.c b/src/bitmap.inc.c new file mode 100644 index 00000000..3847e712 --- /dev/null +++ b/src/bitmap.inc.c @@ -0,0 +1,208 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2019, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +/* ---------------------------------------------------------------------------- +This file is meant to be included in other files for efficiency. +It implements a bitmap that can set/reset sequences of bits atomically +and is used to concurrently claim memory ranges. + +A bitmap is an array of fields where each field is a machine word (`uintptr_t`) + +A current limitation is that the bit sequences cannot cross fields +and that the sequence must be smaller or equal to the bits in a field. +---------------------------------------------------------------------------- */ +#pragma once +#ifndef MI_BITMAP_C +#define MI_BITMAP_C + +#include "mimalloc.h" +#include "mimalloc-internal.h" + +/* ----------------------------------------------------------- + Bitmap definition +----------------------------------------------------------- */ + +#define MI_BITMAP_FIELD_BITS (8*MI_INTPTR_SIZE) +#define MI_BITMAP_FIELD_FULL (~((uintptr_t)0)) // all bits set + +// An atomic bitmap of `uintptr_t` fields +typedef volatile _Atomic(uintptr_t) mi_bitmap_field_t; +typedef mi_bitmap_field_t* mi_bitmap_t; + +// A bitmap index is the index of the bit in a bitmap. +typedef size_t mi_bitmap_index_t; + +// Create a bit index. +static inline mi_bitmap_index_t mi_bitmap_index_create(size_t idx, size_t bitidx) { + mi_assert_internal(bitidx < MI_BITMAP_FIELD_BITS); + return (idx*MI_BITMAP_FIELD_BITS) + bitidx; +} + +// Get the field index from a bit index. +static inline size_t mi_bitmap_index_field(mi_bitmap_index_t bitmap_idx) { + return (bitmap_idx / MI_BITMAP_FIELD_BITS); +} + +// Get the bit index in a bitmap field +static inline size_t mi_bitmap_index_bit_in_field(mi_bitmap_index_t bitmap_idx) { + return (bitmap_idx % MI_BITMAP_FIELD_BITS); +} + +// Get the full bit index +static inline size_t mi_bitmap_index_bit(mi_bitmap_index_t bitmap_idx) { + return bitmap_idx; +} + + +// The bit mask for a given number of blocks at a specified bit index. +static uintptr_t mi_bitmap_mask_(size_t count, size_t bitidx) { + mi_assert_internal(count + bitidx <= MI_BITMAP_FIELD_BITS); + if (count == MI_BITMAP_FIELD_BITS) return MI_BITMAP_FIELD_FULL; + return ((((uintptr_t)1 << count) - 1) << bitidx); +} + + +/* ----------------------------------------------------------- + Use bit scan forward/reverse to quickly find the first zero bit if it is available +----------------------------------------------------------- */ +#if defined(_MSC_VER) +#define MI_HAVE_BITSCAN +#include +static inline size_t mi_bsf(uintptr_t x) { + if (x==0) return 8*MI_INTPTR_SIZE; + DWORD idx; + MI_64(_BitScanForward)(&idx, x); + return idx; +} +static inline size_t mi_bsr(uintptr_t x) { + if (x==0) return 8*MI_INTPTR_SIZE; + DWORD idx; + MI_64(_BitScanReverse)(&idx, x); + return idx; +} +#elif defined(__GNUC__) || defined(__clang__) +#include // LONG_MAX +#define MI_HAVE_BITSCAN +#if (INTPTR_MAX == LONG_MAX) +# define MI_L(x) x##l +#else +# define MI_L(x) x##ll +#endif +static inline size_t mi_bsf(uintptr_t x) { + return (x==0 ? 8*MI_INTPTR_SIZE : MI_L(__builtin_ctz)(x)); +} +static inline size_t mi_bsr(uintptr_t x) { + return (x==0 ? 8*MI_INTPTR_SIZE : (8*MI_INTPTR_SIZE - 1) - MI_L(__builtin_clz)(x)); +} +#endif + +/* ----------------------------------------------------------- + Claim a bit sequence atomically +----------------------------------------------------------- */ + +// Try to atomically claim a sequence of `count` bits in a single +// field at `idx` in `bitmap`. Returns `true` on success. +static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx) +{ + mi_assert_internal(bitmap_idx != NULL); + volatile _Atomic(uintptr_t)* field = &bitmap[idx]; + uintptr_t map = mi_atomic_read(field); + if (map==MI_BITMAP_FIELD_FULL) return false; // short cut + + // search for 0-bit sequence of length count + const uintptr_t mask = mi_bitmap_mask_(count, 0); + const size_t bitidx_max = MI_BITMAP_FIELD_BITS - count; + +#ifdef MI_HAVE_BITSCAN + size_t bitidx = mi_bsf(~map); // quickly find the first zero bit if possible +#else + size_t bitidx = 0; // otherwise start at 0 +#endif + uintptr_t m = (mask << bitidx); // invariant: m == mask shifted by bitidx + + // scan linearly for a free range of zero bits + while (bitidx <= bitidx_max) { + if ((map & m) == 0) { // are the mask bits free at bitidx? + mi_assert_internal((m >> bitidx) == mask); // no overflow? + const uintptr_t newmap = map | m; + mi_assert_internal((newmap^map) >> bitidx == mask); + if (!mi_atomic_cas_weak(field, newmap, map)) { // TODO: use strong cas here? + // no success, another thread claimed concurrently.. keep going + map = mi_atomic_read(field); + continue; + } + else { + // success, we claimed the bits! + *bitmap_idx = mi_bitmap_index_create(idx, bitidx); + return true; + } + } + else { + // on to the next bit range +#ifdef MI_HAVE_BITSCAN + const size_t shift = (count == 1 ? 1 : mi_bsr(map & m) - bitidx + 1); + mi_assert_internal(shift > 0 && shift <= count); +#else + const size_t shift = 1; +#endif + bitidx += shift; + m <<= shift; + } + } + // no bits found + return false; +} + + +// Find `count` bits of 0 and set them to 1 atomically; returns `true` on success. +// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never span fields. +static inline bool mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t* bitmap_idx) { + for (size_t idx = 0; idx < bitmap_fields; idx++) { + if (mi_bitmap_try_claim_field(bitmap, idx, count, bitmap_idx)) { + return true; + } + } + return false; +} + +// Set `count` bits at `bitmap_idx` to 0 atomically +// Returns `true` if all `count` bits were 1 previously +static inline bool mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { + const size_t idx = mi_bitmap_index_field(bitmap_idx); + const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); + const uintptr_t mask = mi_bitmap_mask_(count, bitidx); + mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields); + mi_assert_internal((bitmap[idx] & mask) == mask); + uintptr_t prev = mi_atomic_and(&bitmap[idx], ~mask); + return ((prev & mask) == mask); +} + + +// Set `count` bits at `bitmap_idx` to 1 atomically +// Returns `true` if all `count` bits were 0 previously +static inline bool mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_zero) { + const size_t idx = mi_bitmap_index_field(bitmap_idx); + const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); + const uintptr_t mask = mi_bitmap_mask_(count, bitidx); + mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields); + // mi_assert_internal((bitmap[idx] & mask) == 0); + uintptr_t prev = mi_atomic_or(&bitmap[idx], mask); + if (any_zero != NULL) *any_zero = ((prev & mask) != mask); + return ((prev & mask) == 0); +} + +// Returns `true` if all `count` bits were 1 +static inline bool mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { + const size_t idx = mi_bitmap_index_field(bitmap_idx); + const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); + const uintptr_t mask = mi_bitmap_mask_(count, bitidx); + mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields); + // mi_assert_internal((bitmap[idx] & mask) == 0); + return ((mi_atomic_read(&bitmap[idx]) & mask) == mask); +} + +#endif \ No newline at end of file diff --git a/src/init.c b/src/init.c index 2f4c300b..f9b4f85c 100644 --- a/src/init.c +++ b/src/init.c @@ -457,9 +457,8 @@ static void mi_process_load(void) { } if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { - size_t pages = mi_option_get(mi_option_reserve_huge_os_pages); - // double max_secs = (double)pages / 2.0; // 0.5s per page (1GiB) - mi_reserve_huge_os_pages_interleave(pages); + size_t pages = mi_option_get(mi_option_reserve_huge_os_pages); + mi_reserve_huge_os_pages_interleave(pages, pages*500); } } diff --git a/src/os.c b/src/os.c index 36288a7c..aea95b3d 100644 --- a/src/os.c +++ b/src/os.c @@ -939,16 +939,18 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse _mi_stat_increase(&_mi_stats_main.reserved, MI_HUGE_OS_PAGE_SIZE); // check for timeout - mi_msecs_t elapsed = _mi_clock_end(start_t); - if (page >= 1) { - mi_msecs_t estimate = ((elapsed / (page+1)) * pages); - if (estimate > 2*max_msecs) { // seems like we are going to timeout, break - elapsed = max_msecs + 1; + if (max_msecs > 0) { + mi_msecs_t elapsed = _mi_clock_end(start_t); + if (page >= 1) { + mi_msecs_t estimate = ((elapsed / (page+1)) * pages); + if (estimate > 2*max_msecs) { // seems like we are going to timeout, break + elapsed = max_msecs + 1; + } + } + if (elapsed > max_msecs) { + _mi_warning_message("huge page allocation timed out\n"); + break; } - } - if (elapsed > max_msecs) { - _mi_warning_message("huge page allocation timed out\n"); - break; } } mi_assert_internal(page*MI_HUGE_OS_PAGE_SIZE <= size); @@ -1045,9 +1047,10 @@ int _mi_os_numa_node_count(void) { int _mi_os_numa_node(mi_os_tld_t* tld) { UNUSED(tld); - int numa_node = mi_os_numa_nodex(); - // never more than the node count and >= 0 int numa_count = _mi_os_numa_node_count(); + if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0 + // never more than the node count and >= 0 + int numa_node = mi_os_numa_nodex(); if (numa_node >= numa_count) { numa_node = numa_node % numa_count; } if (numa_node < 0) numa_node = 0; return numa_node; diff --git a/src/page.c b/src/page.c index dcb5133b..986a76d7 100644 --- a/src/page.c +++ b/src/page.c @@ -436,15 +436,15 @@ void _mi_page_retire(mi_page_t* page) { #define MI_MAX_SLICES (1UL << MI_MAX_SLICE_SHIFT) #define MI_MIN_SLICES (2) -static void mi_page_free_list_extend_secure(mi_heap_t* heap, mi_page_t* page, size_t extend, mi_stats_t* stats) { +static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* const page, const size_t extend, mi_stats_t* const stats) { UNUSED(stats); #if (MI_SECURE<=2) mi_assert_internal(page->free == NULL); mi_assert_internal(page->local_free == NULL); #endif mi_assert_internal(page->capacity + extend <= page->reserved); - void* page_area = _mi_page_start(_mi_page_segment(page), page, NULL); - size_t bsize = page->block_size; + void* const page_area = _mi_page_start(_mi_page_segment(page), page, NULL); + const size_t bsize = page->block_size; // initialize a randomized free list // set up `slice_count` slices to alternate between @@ -452,8 +452,8 @@ static void mi_page_free_list_extend_secure(mi_heap_t* heap, mi_page_t* page, si while ((extend >> shift) == 0) { shift--; } - size_t slice_count = (size_t)1U << shift; - size_t slice_extend = extend / slice_count; + const size_t slice_count = (size_t)1U << shift; + const size_t slice_extend = extend / slice_count; mi_assert_internal(slice_extend >= 1); mi_block_t* blocks[MI_MAX_SLICES]; // current start of the slice size_t counts[MI_MAX_SLICES]; // available objects in the slice @@ -467,12 +467,12 @@ static void mi_page_free_list_extend_secure(mi_heap_t* heap, mi_page_t* page, si // set up first element size_t current = _mi_heap_random(heap) % slice_count; counts[current]--; - page->free = blocks[current]; + mi_block_t* const free_start = blocks[current]; // and iterate through the rest uintptr_t rnd = heap->random; for (size_t i = 1; i < extend; i++) { // call random_shuffle only every INTPTR_SIZE rounds - size_t round = i%MI_INTPTR_SIZE; + const size_t round = i%MI_INTPTR_SIZE; if (round == 0) rnd = _mi_random_shuffle(rnd); // select a random next slice index size_t next = ((rnd >> 8*round) & (slice_count-1)); @@ -482,34 +482,39 @@ static void mi_page_free_list_extend_secure(mi_heap_t* heap, mi_page_t* page, si } // and link the current block to it counts[next]--; - mi_block_t* block = blocks[current]; + mi_block_t* const block = blocks[current]; blocks[current] = (mi_block_t*)((uint8_t*)block + bsize); // bump to the following block mi_block_set_next(page, block, blocks[next]); // and set next; note: we may have `current == next` current = next; } - mi_block_set_next(page, blocks[current], NULL); // end of the list + // prepend to the free list (usually NULL) + mi_block_set_next(page, blocks[current], page->free); // end of the list + page->free = free_start; heap->random = _mi_random_shuffle(rnd); } -static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* page, size_t extend, mi_stats_t* stats) +static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, const size_t extend, mi_stats_t* const stats) { UNUSED(stats); + #if (MI_SECURE <= 2) mi_assert_internal(page->free == NULL); mi_assert_internal(page->local_free == NULL); + #endif mi_assert_internal(page->capacity + extend <= page->reserved); - void* page_area = _mi_page_start(_mi_page_segment(page), page, NULL ); - size_t bsize = page->block_size; - mi_block_t* start = mi_page_block_at(page, page_area, page->capacity); + void* const page_area = _mi_page_start(_mi_page_segment(page), page, NULL ); + const size_t bsize = page->block_size; + mi_block_t* const start = mi_page_block_at(page, page_area, page->capacity); // initialize a sequential free list - mi_block_t* last = mi_page_block_at(page, page_area, page->capacity + extend - 1); + mi_block_t* const last = mi_page_block_at(page, page_area, page->capacity + extend - 1); mi_block_t* block = start; while(block <= last) { mi_block_t* next = (mi_block_t*)((uint8_t*)block + bsize); mi_block_set_next(page,block,next); block = next; } - mi_block_set_next(page, last, NULL); + // prepend to free list (usually `NULL`) + mi_block_set_next(page, last, page->free); page->free = start; } diff --git a/src/stats.c b/src/stats.c index 2b106acc..5b0d2787 100644 --- a/src/stats.c +++ b/src/stats.c @@ -130,19 +130,23 @@ static void mi_printf_amount(int64_t n, int64_t unit, mi_output_fun* out, const char buf[32]; int len = 32; const char* suffix = (unit <= 0 ? " " : "b"); - double base = (unit == 0 ? 1000.0 : 1024.0); + const int64_t base = (unit == 0 ? 1000 : 1024); if (unit>0) n *= unit; - double pos = (double)(n < 0 ? -n : n); - if (pos < base) - snprintf(buf,len, "%d %s ", (int)n, suffix); - else if (pos < base*base) - snprintf(buf, len, "%.1f k%s", (double)n / base, suffix); - else if (pos < base*base*base) - snprintf(buf, len, "%.1f m%s", (double)n / (base*base), suffix); - else - snprintf(buf, len, "%.1f g%s", (double)n / (base*base*base), suffix); - + const int64_t pos = (n < 0 ? -n : n); + if (pos < base) { + snprintf(buf, len, "%d %s ", (int)n, suffix); + } + else { + int64_t divider = base; + const char* magnitude = "k"; + if (pos >= divider*base) { divider *= base; magnitude = "m"; } + if (pos >= divider*base) { divider *= base; magnitude = "g"; } + const int64_t tens = (n / (divider/10)); + const long whole = (long)(tens/10); + const long frac1 = (long)(tens%10); + snprintf(buf, len, "%ld.%ld %s%s", whole, frac1, magnitude, suffix); + } _mi_fprintf(out, (fmt==NULL ? "%11s" : fmt), buf); } @@ -199,8 +203,10 @@ static void mi_stat_counter_print(const mi_stat_counter_t* stat, const char* msg } static void mi_stat_counter_print_avg(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out) { - double avg = (stat->count == 0 ? 0.0 : (double)stat->total / (double)stat->count); - _mi_fprintf(out, "%10s: %7.1f avg\n", msg, avg); + const int64_t avg_tens = (stat->count == 0 ? 0 : (stat->total*10 / stat->count)); + const long avg_whole = (long)(avg_tens/10); + const long avg_frac1 = (long)(avg_tens%10); + _mi_fprintf(out, "%10s: %5ld.%ld avg\n", msg, avg_whole, avg_frac1); } diff --git a/test/test-stress.c b/test/test-stress.c index 08406ec7..07f4a2aa 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -18,7 +18,7 @@ terms of the MIT license. // argument defaults static int THREADS = 32; // more repeatable if THREADS <= #processors -static int N = 20; // scaling factor +static int N = 40; // scaling factor // static int THREADS = 8; // more repeatable if THREADS <= #processors // static int N = 100; // scaling factor @@ -70,7 +70,9 @@ static void* alloc_items(size_t items, random_t r) { } if (items==40) items++; // pthreads uses that size for stack increases uintptr_t* p = (uintptr_t*)mi_malloc(items*sizeof(uintptr_t)); - for (uintptr_t i = 0; i < items; i++) p[i] = (items - i) ^ cookie; + if (p != NULL) { + for (uintptr_t i = 0; i < items; i++) p[i] = (items - i) ^ cookie; + } return p; }