merge from dev-exp; bitmap based arena

This commit is contained in:
Daan Leijen 2019-11-10 07:56:40 -08:00
commit fed0068dac
14 changed files with 420 additions and 268 deletions

View file

@ -10,6 +10,7 @@ option(MI_SEE_ASM "Generate assembly files" OFF)
option(MI_CHECK_FULL "Use full internal invariant checking in DEBUG mode" OFF)
option(MI_USE_CXX "Use the C++ compiler to compile the library" OFF)
option(MI_SECURE "Use security mitigations (like guard pages and randomization)" OFF)
option(MI_SECURE_FULL "Use full security mitigations, may be more expensive (includes double-free mitigation)" OFF)
option(MI_LOCAL_DYNAMIC_TLS "Use slightly slower, dlopen-compatible TLS mechanism (Unix)" OFF)
option(MI_BUILD_TESTS "Build test executables" ON)
@ -69,9 +70,15 @@ if(MI_OVERRIDE MATCHES "ON")
endif()
endif()
if(MI_SECURE MATCHES "ON")
message(STATUS "Set secure build (MI_SECURE=ON)")
list(APPEND mi_defines MI_SECURE=3)
if(MI_SECURE_FULL MATCHES "ON")
message(STATUS "Set full secure build (may be more expensive) (MI_SECURE_FULL=ON)")
list(APPEND mi_defines MI_SECURE=4)
set(MI_SECURE "ON")
else()
if(MI_SECURE MATCHES "ON")
message(STATUS "Set secure build (MI_SECURE=ON)")
list(APPEND mi_defines MI_SECURE=3)
endif()
endif()
if(MI_SEE_ASM MATCHES "ON")

View file

@ -123,7 +123,7 @@
<SDLCheck>true</SDLCheck>
<ConformanceMode>true</ConformanceMode>
<AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
<PreprocessorDefinitions>MI_DEBUG=2;MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions);</PreprocessorDefinitions>
<PreprocessorDefinitions>MI_DEBUG=3;MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions);</PreprocessorDefinitions>
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
<SupportJustMyCode>false</SupportJustMyCode>
<CompileAs>Default</CompileAs>
@ -232,6 +232,9 @@
<ClCompile Include="..\..\src\alloc-posix.c" />
<ClCompile Include="..\..\src\alloc.c" />
<ClCompile Include="..\..\src\arena.c" />
<ClCompile Include="..\..\src\bitmap.inc.c">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="..\..\src\heap.c" />
<ClCompile Include="..\..\src\init.c" />
<ClCompile Include="..\..\src\options.c" />

View file

@ -116,7 +116,7 @@
<SDLCheck>true</SDLCheck>
<ConformanceMode>true</ConformanceMode>
<AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
<PreprocessorDefinitions>MI_DEBUG=2;%(PreprocessorDefinitions);</PreprocessorDefinitions>
<PreprocessorDefinitions>MI_DEBUG=3;%(PreprocessorDefinitions);</PreprocessorDefinitions>
<CompileAs>CompileAsCpp</CompileAs>
<SupportJustMyCode>false</SupportJustMyCode>
<LanguageStandard>stdcpp17</LanguageStandard>
@ -218,6 +218,9 @@
<ClCompile Include="..\..\src\alloc-posix.c" />
<ClCompile Include="..\..\src\alloc.c" />
<ClCompile Include="..\..\src\arena.c" />
<ClCompile Include="..\..\src\bitmap.inc.c">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="..\..\src\heap.c" />
<ClCompile Include="..\..\src\init.c" />
<ClCompile Include="..\..\src\options.c" />

View file

@ -36,6 +36,13 @@ static inline void mi_atomic_add64(volatile int64_t* p, int64_t add);
// Atomically add a value; returns the previous value. Memory ordering is relaxed.
static inline intptr_t mi_atomic_add(volatile _Atomic(intptr_t)* p, intptr_t add);
// Atomically "and" a value; returns the previous value. Memory ordering is relaxed.
static inline uintptr_t mi_atomic_and(volatile _Atomic(uintptr_t)* p, uintptr_t x);
// Atomically "or" a value; returns the previous value. Memory ordering is relaxed.
static inline uintptr_t mi_atomic_or(volatile _Atomic(uintptr_t)* p, uintptr_t x);
// Atomically compare and exchange a value; returns `true` if successful.
// May fail spuriously. Memory ordering as release on success, and relaxed on failure.
// (Note: expected and desired are in opposite order from atomic_compare_exchange)
@ -121,22 +128,28 @@ static inline void* mi_atomic_exchange_ptr(volatile _Atomic(void*)* p, void* exc
#include <intrin.h>
#ifdef _WIN64
typedef LONG64 msc_intptr_t;
#define RC64(f) f##64
#define MI_64(f) f##64
#else
typedef LONG msc_intptr_t;
#define RC64(f) f
#define MI_64(f) f
#endif
static inline intptr_t mi_atomic_add(volatile _Atomic(intptr_t)* p, intptr_t add) {
return (intptr_t)RC64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, (msc_intptr_t)add);
return (intptr_t)MI_64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, (msc_intptr_t)add);
}
static inline uintptr_t mi_atomic_and(volatile _Atomic(uintptr_t)* p, uintptr_t x) {
return (uintptr_t)MI_64(_InterlockedAnd)((volatile msc_intptr_t*)p, (msc_intptr_t)x);
}
static inline uintptr_t mi_atomic_or(volatile _Atomic(uintptr_t)* p, uintptr_t x) {
return (uintptr_t)MI_64(_InterlockedOr)((volatile msc_intptr_t*)p, (msc_intptr_t)x);
}
static inline bool mi_atomic_cas_strong(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) {
return (expected == (uintptr_t)RC64(_InterlockedCompareExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)desired, (msc_intptr_t)expected));
return (expected == (uintptr_t)MI_64(_InterlockedCompareExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)desired, (msc_intptr_t)expected));
}
static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) {
return mi_atomic_cas_strong(p,desired,expected);
}
static inline uintptr_t mi_atomic_exchange(volatile _Atomic(uintptr_t)* p, uintptr_t exchange) {
return (uintptr_t)RC64(_InterlockedExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)exchange);
return (uintptr_t)MI_64(_InterlockedExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)exchange);
}
static inline uintptr_t mi_atomic_read(volatile _Atomic(uintptr_t) const* p) {
return *p;
@ -177,6 +190,14 @@ static inline intptr_t mi_atomic_add(volatile _Atomic(intptr_t)* p, intptr_t add
MI_USING_STD
return atomic_fetch_add_explicit(p, add, memory_order_relaxed);
}
static inline uintptr_t mi_atomic_and(volatile _Atomic(uintptr_t)* p, uintptr_t x) {
MI_USING_STD
return atomic_fetch_and_explicit(p, x, memory_order_relaxed);
}
static inline uintptr_t mi_atomic_or(volatile _Atomic(uintptr_t)* p, uintptr_t x) {
MI_USING_STD
return atomic_fetch_or_explicit(p, x, memory_order_relaxed);
}
static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) {
MI_USING_STD
return atomic_compare_exchange_weak_explicit(p, &expected, desired, memory_order_release, memory_order_relaxed);

View file

@ -163,7 +163,6 @@ bool _mi_page_is_valid(mi_page_t* page);
// Overflow detecting multiply
#define MI_MUL_NO_OVERFLOW ((size_t)1 << (4*sizeof(size_t))) // sqrt(SIZE_MAX)
static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) {
#if __has_builtin(__builtin_umul_overflow) || __GNUC__ >= 5
#include <limits.h> // UINT_MAX, ULONG_MAX
@ -175,6 +174,7 @@ static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) {
return __builtin_umulll_overflow(count, size, total);
#endif
#else /* __builtin_umul_overflow is unavailable */
#define MI_MUL_NO_OVERFLOW ((size_t)1 << (4*sizeof(size_t))) // sqrt(SIZE_MAX)
*total = count * size;
return ((size >= MI_MUL_NO_OVERFLOW || count >= MI_MUL_NO_OVERFLOW)
&& size > 0 && (SIZE_MAX / size) < count);
@ -188,6 +188,7 @@ static inline bool _mi_is_power_of_two(uintptr_t x) {
// Align upwards
static inline uintptr_t _mi_align_up(uintptr_t sz, size_t alignment) {
mi_assert_internal(alignment != 0);
uintptr_t mask = alignment - 1;
if ((alignment & mask) == 0) { // power of two?
return ((sz + mask) & ~mask);
@ -201,6 +202,12 @@ static inline uintptr_t _mi_align_down(uintptr_t sz, size_t alignment) {
return (sz / alignment) * alignment;
}
// Divide upwards: `s <= _mi_divide_up(s,d)*d < s+d`.
static inline uintptr_t _mi_divide_up(uintptr_t size, size_t divider) {
mi_assert_internal(divider != 0);
return (divider == 0 ? size : ((size + divider - 1) / divider));
}
// Is memory zero initialized?
static inline bool mi_mem_is_zero(void* p, size_t size) {
for (size_t i = 0; i < size; i++) {

View file

@ -26,7 +26,7 @@ terms of the MIT license. A copy of the license can be found in the file
// #define MI_SECURE 1 // guard page around metadata
// #define MI_SECURE 2 // guard page around each mimalloc page
// #define MI_SECURE 3 // encode free lists (detect corrupted free list (buffer overflow), and invalid pointer free)
// #define MI_SECURE 4 // experimental, may be more expensive: checks for double free.
// #define MI_SECURE 4 // experimental, may be more expensive: checks for double free. (cmake -DMI_SECURE_FULL=ON)
#if !defined(MI_SECURE)
#define MI_SECURE 0
@ -35,7 +35,7 @@ terms of the MIT license. A copy of the license can be found in the file
// Define MI_DEBUG for debug mode
// #define MI_DEBUG 1 // basic assertion checks and statistics, check double free, corrupted free list, and invalid pointer free.
// #define MI_DEBUG 2 // + internal assertion checks
// #define MI_DEBUG 3 // + extensive internal invariant checking
// #define MI_DEBUG 3 // + extensive internal invariant checking (cmake -DMI_CHECK_FULL=ON)
#if !defined(MI_DEBUG)
#if !defined(NDEBUG) || defined(_DEBUG)
#define MI_DEBUG 2

View file

@ -230,8 +230,8 @@ mi_decl_export bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_b
mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept;
mi_decl_export bool mi_is_redirected() mi_attr_noexcept;
mi_decl_export int mi_reserve_huge_os_pages_interleave(size_t pages) mi_attr_noexcept;
mi_decl_export int mi_reserve_huge_os_pages_at(size_t pages, int numa_node) mi_attr_noexcept;
mi_decl_export int mi_reserve_huge_os_pages_interleave(size_t pages, size_t timeout_msecs) mi_attr_noexcept;
mi_decl_export int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept;
// deprecated
mi_decl_export int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept;

View file

@ -7,15 +7,23 @@ terms of the MIT license. A copy of the license can be found in the file
/* ----------------------------------------------------------------------------
"Arenas" are fixed area's of OS memory from which we can allocate
large blocks (>= MI_ARENA_BLOCK_SIZE, 16MiB). Currently only used to
allocate in one arena consisting of huge OS pages -- otherwise it
delegates to direct allocation from the OS.
large blocks (>= MI_ARENA_BLOCK_SIZE, 32MiB).
In contrast to the rest of mimalloc, the arenas are shared between
threads and need to be accessed using atomic operations.
In the future, we can expose an API to manually add more arenas which
is sometimes needed for embedded devices or shared memory for example.
Currently arenas are only used to for huge OS page (1GiB) reservations,
otherwise it delegates to direct allocation from the OS.
In the future, we can expose an API to manually add more kinds of arenas
which is sometimes needed for embedded devices or shared memory for example.
(We can also employ this with WASI or `sbrk` systems to reserve large arenas
on demand and be able to reuse them efficiently).
The arena allocation needs to be thread safe and we use a lock-free scan
with on-demand coalescing.
The arena allocation needs to be thread safe and we use an atomic
bitmap to allocate. The current implementation of the bitmap can
only do this within a field (`uintptr_t`) so we can allocate at most
blocks of 2GiB (64*32MiB) and no object can cross the boundary. This
can lead to fragmentation but fortunately most objects will be regions
of 256MiB in practice.
-----------------------------------------------------------------------------*/
#include "mimalloc.h"
#include "mimalloc-internal.h"
@ -23,9 +31,10 @@ with on-demand coalescing.
#include <string.h> // memset
#include "bitmap.inc.c" // atomic bitmap
// os.c
void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld);
//int _mi_os_alloc_huge_os_pages(size_t pages, double max_secs, void** pstart, size_t* pages_reserved, size_t* psize) mi_attr_noexcept;
void _mi_os_free(void* p, size_t size, mi_stats_t* stats);
void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize);
@ -38,23 +47,27 @@ int _mi_os_numa_node_count(void);
Arena allocation
----------------------------------------------------------- */
#define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE
#define MI_ARENA_BLOCK_SIZE MI_SEGMENT_SIZE
#define MI_MAX_ARENAS (64)
// Block info: bit 0 contains the `in_use` bit, the upper bits the
// size in count of arena blocks.
typedef uintptr_t mi_block_info_t;
#define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE
#define MI_ARENA_BLOCK_SIZE (MI_SEGMENT_SIZE/2) // 32MiB
#define MI_ARENA_MAX_OBJ_SIZE (MI_BITMAP_FIELD_BITS * MI_ARENA_BLOCK_SIZE) // 2GiB
#define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2) // 16MiB
#define MI_MAX_ARENAS (64) // not more than 256 (since we use 8 bits in the memid)
// A memory arena descriptor
typedef struct mi_arena_s {
uint8_t* start; // the start of the memory area
size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`)
size_t field_count; // number of bitmap fields
int numa_node; // associated NUMA node
bool is_zero_init; // is the arena zero initialized?
bool is_large; // large OS page allocated
_Atomic(uintptr_t) block_bottom; // optimization to start the search for free blocks
_Atomic(mi_block_info_t) blocks[1]; // `block_count` block info's
volatile _Atomic(uintptr_t) search_idx; // optimization to start the search for free blocks
mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero?
mi_bitmap_field_t blocks_map[1]; // bitmap of in-use blocks
} mi_arena_t;
@ -71,184 +84,57 @@ static _Atomic(uintptr_t) mi_arena_count; // = 0
// Use `0` as a special id for direct OS allocated memory.
#define MI_MEMID_OS 0
static size_t mi_memid_create(size_t arena_index, size_t block_index) {
mi_assert_internal(arena_index < 0xFF);
return ((block_index << 8) | ((arena_index+1) & 0xFF));
static size_t mi_memid_create(size_t arena_index, mi_bitmap_index_t bitmap_index) {
mi_assert_internal(arena_index < 0xFE);
mi_assert_internal(((bitmap_index << 8) >> 8) == bitmap_index); // no overflow?
return ((bitmap_index << 8) | ((arena_index+1) & 0xFF));
}
static void mi_memid_indices(size_t memid, size_t* arena_index, size_t* block_index) {
static void mi_memid_indices(size_t memid, size_t* arena_index, mi_bitmap_index_t* bitmap_index) {
mi_assert_internal(memid != MI_MEMID_OS);
*arena_index = (memid & 0xFF) - 1;
*block_index = (memid >> 8);
*bitmap_index = (memid >> 8);
}
/* -----------------------------------------------------------
Block info
----------------------------------------------------------- */
static bool mi_block_is_in_use(mi_block_info_t info) {
return ((info&1) != 0);
static size_t mi_block_count_of_size(size_t size) {
return _mi_divide_up(size, MI_ARENA_BLOCK_SIZE);
}
static size_t mi_block_count(mi_block_info_t info) {
return (info>>1);
}
static mi_block_info_t mi_block_info_create(size_t bcount, bool in_use) {
return (((mi_block_info_t)bcount << 1) | (in_use ? 1 : 0));
}
/* -----------------------------------------------------------
Thread safe allocation in an arena
----------------------------------------------------------- */
static void* mi_arena_allocx(mi_arena_t* arena, size_t start_idx, size_t end_idx, size_t needed_bcount, bool* is_zero, size_t* block_index)
static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx)
{
// Scan linearly through all block info's
// Skipping used ranges, coalescing free ranges on demand.
mi_assert_internal(needed_bcount > 0);
mi_assert_internal(start_idx <= arena->block_count);
mi_assert_internal(end_idx <= arena->block_count);
_Atomic(mi_block_info_t)* block = &arena->blocks[start_idx];
_Atomic(mi_block_info_t)* end = &arena->blocks[end_idx];
while (block < end) {
mi_block_info_t binfo = mi_atomic_read_relaxed(block);
size_t bcount = mi_block_count(binfo);
if (mi_block_is_in_use(binfo)) {
// in-use, skip ahead
mi_assert_internal(bcount > 0);
block += bcount;
}
else {
// free blocks
if (bcount==0) {
// optimization:
// use 0 initialized blocks at the end, to use single atomic operation
// initially to reduce contention (as we don't need to split)
if (block + needed_bcount > end) {
return NULL; // does not fit
}
else if (!mi_atomic_cas_weak(block, mi_block_info_create(needed_bcount, true), binfo)) {
// ouch, someone else was quicker. Try again..
continue;
}
else {
// we got it: return a pointer to the claimed memory
ptrdiff_t idx = (block - arena->blocks);
*is_zero = arena->is_zero_init;
*block_index = idx;
return (arena->start + (idx*MI_ARENA_BLOCK_SIZE));
}
}
mi_assert_internal(bcount>0);
if (needed_bcount > bcount) {
#if 0 // MI_NO_ARENA_COALESCE
block += bcount; // too small, skip to the next range
continue;
#else
// too small, try to coalesce
_Atomic(mi_block_info_t)* block_next = block + bcount;
if (block_next >= end) {
return NULL; // does not fit
}
mi_block_info_t binfo_next = mi_atomic_read(block_next);
size_t bcount_next = mi_block_count(binfo_next);
if (mi_block_is_in_use(binfo_next)) {
// next block is in use, cannot coalesce
block += (bcount + bcount_next); // skip ahea over both blocks
}
else {
// next block is free, try to coalesce
// first set the next one to being used to prevent dangling ranges
if (!mi_atomic_cas_strong(block_next, mi_block_info_create(bcount_next, true), binfo_next)) {
// someone else got in before us.. try again
continue;
}
else {
if (!mi_atomic_cas_strong(block, mi_block_info_create(bcount + bcount_next, true), binfo)) { // use strong to increase success chance
// someone claimed/coalesced the block in the meantime
// first free the next block again..
bool ok = mi_atomic_cas_strong(block_next, mi_block_info_create(bcount_next, false), binfo_next); // must be strong
mi_assert(ok); UNUSED(ok);
// and try again
continue;
}
else {
// coalesced! try again
// todo: we could optimize here to immediately claim the block if the
// coalesced size is a fit instead of retrying. Keep it simple for now.
continue;
}
}
}
#endif
}
else { // needed_bcount <= bcount
mi_assert_internal(needed_bcount <= bcount);
// it fits, claim the whole block
if (!mi_atomic_cas_weak(block, mi_block_info_create(bcount, true), binfo)) {
// ouch, someone else was quicker. Try again..
continue;
}
else {
// got it, now split off the needed part
if (needed_bcount < bcount) {
mi_atomic_write(block + needed_bcount, mi_block_info_create(bcount - needed_bcount, false));
mi_atomic_write(block, mi_block_info_create(needed_bcount, true));
}
// return a pointer to the claimed memory
ptrdiff_t idx = (block - arena->blocks);
*is_zero = false;
*block_index = idx;
return (arena->start + (idx*MI_ARENA_BLOCK_SIZE));
}
}
const size_t fcount = arena->field_count;
size_t idx = mi_atomic_read(&arena->search_idx); // start from last search
for (size_t visited = 0; visited < fcount; visited++, idx++) {
if (idx >= fcount) idx = 0; // wrap around
if (mi_bitmap_try_claim_field(arena->blocks_map, idx, blocks, bitmap_idx)) {
mi_atomic_write(&arena->search_idx, idx); // start search from here next time
return true;
}
}
// no success
return NULL;
return false;
}
// Try to reduce search time by starting from bottom and wrap around.
static void* mi_arena_alloc(mi_arena_t* arena, size_t needed_bcount, bool* is_zero, size_t* block_index)
{
uintptr_t bottom = mi_atomic_read_relaxed(&arena->block_bottom);
void* p = mi_arena_allocx(arena, bottom, arena->block_count, needed_bcount, is_zero, block_index);
if (p == NULL && bottom > 0) {
// try again from the start
p = mi_arena_allocx(arena, 0, bottom, needed_bcount, is_zero, block_index);
}
if (p != NULL) {
mi_atomic_write(&arena->block_bottom, *block_index);
}
return p;
}
/* -----------------------------------------------------------
Arena Allocation
----------------------------------------------------------- */
static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t needed_bcount,
bool* commit, bool* large, bool* is_zero,
size_t* memid)
bool* commit, bool* large, bool* is_zero, size_t* memid)
{
size_t block_index = SIZE_MAX;
void* p = mi_arena_alloc(arena, needed_bcount, is_zero, &block_index);
if (p != NULL) {
mi_assert_internal(block_index != SIZE_MAX);
#if MI_DEBUG>=1
_Atomic(mi_block_info_t)* block = &arena->blocks[block_index];
mi_block_info_t binfo = mi_atomic_read(block);
mi_assert_internal(mi_block_is_in_use(binfo));
mi_assert_internal(mi_block_count(binfo) >= needed_bcount);
#endif
*memid = mi_memid_create(arena_index, block_index);
*commit = true; // TODO: support commit on demand?
*large = arena->is_large;
mi_bitmap_index_t bitmap_index;
if (mi_arena_alloc(arena, needed_bcount, &bitmap_index)) {
// claimed it! set the dirty bits (todo: no need for an atomic op here?)
*is_zero = mi_bitmap_claim(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL);
*memid = mi_memid_create(arena_index, bitmap_index);
*commit = true; // TODO: support commit on demand?
*large = arena->is_large;
return (arena->start + (mi_bitmap_index_bit(bitmap_index)*MI_ARENA_BLOCK_SIZE));
}
return p;
return NULL;
}
void* _mi_arena_alloc_aligned(size_t size, size_t alignment,
@ -257,21 +143,19 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment,
{
mi_assert_internal(memid != NULL && tld != NULL);
mi_assert_internal(size > 0);
*memid = MI_MEMID_OS;
*memid = MI_MEMID_OS;
*is_zero = false;
bool default_large = false;
if (large==NULL) large = &default_large; // ensure `large != NULL`
// try to allocate in an arena if the alignment is small enough
// and if there is not too much waste around the `MI_ARENA_BLOCK_SIZE`.
// and the object is not too large or too small.
if (alignment <= MI_SEGMENT_ALIGN &&
size >= 3*(MI_ARENA_BLOCK_SIZE/4) && // > 48MiB (not more than 25% waste)
!(size > MI_ARENA_BLOCK_SIZE && size < 3*(MI_ARENA_BLOCK_SIZE/2)) // ! <64MiB - 96MiB>
)
size <= MI_ARENA_MAX_OBJ_SIZE &&
size >= MI_ARENA_MIN_OBJ_SIZE)
{
size_t asize = _mi_align_up(size, MI_ARENA_BLOCK_SIZE);
size_t bcount = asize / MI_ARENA_BLOCK_SIZE;
int numa_node = _mi_os_numa_node(tld); // current numa node
const size_t bcount = mi_block_count_of_size(size);
const int numa_node = _mi_os_numa_node(tld); // current numa node
mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE);
// try numa affine allocation
@ -302,7 +186,10 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment,
// finally, fall back to the OS
*is_zero = true;
*memid = MI_MEMID_OS;
*memid = MI_MEMID_OS;
if (*large) {
*large = mi_option_is_enabled(mi_option_large_os_pages); // try large OS pages only if enabled and allowed
}
return _mi_os_alloc_aligned(size, alignment, *commit, large, tld);
}
@ -326,8 +213,8 @@ void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats) {
else {
// allocated in an arena
size_t arena_idx;
size_t block_idx;
mi_memid_indices(memid, &arena_idx, &block_idx);
size_t bitmap_idx;
mi_memid_indices(memid, &arena_idx, &bitmap_idx);
mi_assert_internal(arena_idx < MI_MAX_ARENAS);
mi_arena_t* arena = (mi_arena_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &mi_arenas[arena_idx]));
mi_assert_internal(arena != NULL);
@ -335,27 +222,17 @@ void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats) {
_mi_fatal_error("trying to free from non-existent arena: %p, size %zu, memid: 0x%zx\n", p, size, memid);
return;
}
mi_assert_internal(arena->block_count > block_idx);
if (arena->block_count <= block_idx) {
_mi_fatal_error("trying to free from non-existent block: %p, size %zu, memid: 0x%zx\n", p, size, memid);
mi_assert_internal(arena->field_count > mi_bitmap_index_field(bitmap_idx));
if (arena->field_count <= mi_bitmap_index_field(bitmap_idx)) {
_mi_fatal_error("trying to free from non-existent arena block: %p, size %zu, memid: 0x%zx\n", p, size, memid);
return;
}
_Atomic(mi_block_info_t)* block = &arena->blocks[block_idx];
mi_block_info_t binfo = mi_atomic_read_relaxed(block);
mi_assert_internal(mi_block_is_in_use(binfo));
mi_assert_internal(mi_block_count(binfo)*MI_ARENA_BLOCK_SIZE >= size);
if (!mi_block_is_in_use(binfo)) {
const size_t blocks = mi_block_count_of_size(size);
bool ones = mi_bitmap_unclaim(arena->blocks_map, arena->field_count, blocks, bitmap_idx);
if (!ones) {
_mi_fatal_error("trying to free an already freed block: %p, size %zu\n", p, size);
return;
};
bool ok = mi_atomic_cas_strong(block, mi_block_info_create(mi_block_count(binfo), false), binfo);
mi_assert_internal(ok);
if (!ok) {
_mi_warning_message("unable to free arena block: %p, info 0x%zx", p, binfo);
}
if (block_idx < mi_atomic_read_relaxed(&arena->block_bottom)) {
mi_atomic_write(&arena->block_bottom, block_idx);
}
}
}
@ -367,7 +244,6 @@ static bool mi_arena_add(mi_arena_t* arena) {
mi_assert_internal(arena != NULL);
mi_assert_internal((uintptr_t)arena->start % MI_SEGMENT_ALIGN == 0);
mi_assert_internal(arena->block_count > 0);
mi_assert_internal(mi_mem_is_zero(arena->blocks,arena->block_count*sizeof(mi_block_info_t)));
uintptr_t i = mi_atomic_addu(&mi_arena_count,1);
if (i >= MI_MAX_ARENAS) {
@ -385,40 +261,51 @@ static bool mi_arena_add(mi_arena_t* arena) {
#include <errno.h> // ENOMEM
// reserve at a specific numa node
int mi_reserve_huge_os_pages_at(size_t pages, int numa_node) mi_attr_noexcept {
int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept {
if (pages==0) return 0;
if (numa_node < -1) numa_node = -1;
if (numa_node >= 0) numa_node = numa_node % _mi_os_numa_node_count();
size_t hsize = 0;
size_t pages_reserved = 0;
void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, pages*500, &pages_reserved, &hsize);
void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, timeout_msecs, &pages_reserved, &hsize);
if (p==NULL || pages_reserved==0) {
_mi_warning_message("failed to reserve %zu gb huge pages\n", pages);
return ENOMEM;
}
_mi_verbose_message("reserved %zu gb huge pages\n", pages_reserved);
size_t bcount = hsize / MI_ARENA_BLOCK_SIZE;
size_t asize = sizeof(mi_arena_t) + (bcount*sizeof(mi_block_info_t)); // one too much
size_t bcount = mi_block_count_of_size(hsize);
size_t fields = (bcount + MI_BITMAP_FIELD_BITS - 1) / MI_BITMAP_FIELD_BITS;
size_t asize = sizeof(mi_arena_t) + (2*fields*sizeof(mi_bitmap_field_t));
mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main); // TODO: can we avoid allocating from the OS?
if (arena == NULL) {
_mi_os_free_huge_pages(p, hsize, &_mi_stats_main);
return ENOMEM;
}
arena->block_count = bcount;
arena->field_count = fields;
arena->start = (uint8_t*)p;
arena->block_bottom = 0;
arena->numa_node = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1)
arena->is_large = true;
arena->is_zero_init = true;
memset(arena->blocks, 0, bcount * sizeof(mi_block_info_t));
arena->search_idx = 0;
arena->blocks_dirty = &arena->blocks_map[bcount];
// the bitmaps are already zero initialized due to os_alloc
// just claim leftover blocks if needed
size_t post = (fields * MI_BITMAP_FIELD_BITS) - bcount;
if (post > 0) {
// don't use leftover bits at the end
mi_bitmap_index_t postidx = mi_bitmap_index_create(fields - 1, MI_BITMAP_FIELD_BITS - post);
mi_bitmap_claim(arena->blocks_map, fields, post, postidx, NULL);
}
mi_arena_add(arena);
return 0;
}
// reserve huge pages evenly among all numa nodes.
int mi_reserve_huge_os_pages_interleave(size_t pages) mi_attr_noexcept {
int mi_reserve_huge_os_pages_interleave(size_t pages, size_t timeout_msecs) mi_attr_noexcept {
if (pages == 0) return 0;
// pages per numa node
@ -426,12 +313,13 @@ int mi_reserve_huge_os_pages_interleave(size_t pages) mi_attr_noexcept {
if (numa_count <= 0) numa_count = 1;
const size_t pages_per = pages / numa_count;
const size_t pages_mod = pages % numa_count;
const size_t timeout_per = (timeout_msecs / numa_count) + 50;
// reserve evenly among numa nodes
for (int numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) {
size_t node_pages = pages_per; // can be 0
if ((size_t)numa_node < pages_mod) node_pages++;
int err = mi_reserve_huge_os_pages_at(node_pages, numa_node);
int err = mi_reserve_huge_os_pages_at(node_pages, numa_node, timeout_per);
if (err) return err;
if (pages < node_pages) {
pages = 0;
@ -448,7 +336,7 @@ int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserv
UNUSED(max_secs);
_mi_warning_message("mi_reserve_huge_os_pages is deprecated: use mi_reserve_huge_os_pages_interleave/at instead\n");
if (pages_reserved != NULL) *pages_reserved = 0;
int err = mi_reserve_huge_os_pages_interleave(pages);
int err = mi_reserve_huge_os_pages_interleave(pages, (size_t)(max_secs * 1000.0));
if (err==0 && pages_reserved!=NULL) *pages_reserved = pages;
return err;
}

208
src/bitmap.inc.c Normal file
View file

@ -0,0 +1,208 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2019, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
/* ----------------------------------------------------------------------------
This file is meant to be included in other files for efficiency.
It implements a bitmap that can set/reset sequences of bits atomically
and is used to concurrently claim memory ranges.
A bitmap is an array of fields where each field is a machine word (`uintptr_t`)
A current limitation is that the bit sequences cannot cross fields
and that the sequence must be smaller or equal to the bits in a field.
---------------------------------------------------------------------------- */
#pragma once
#ifndef MI_BITMAP_C
#define MI_BITMAP_C
#include "mimalloc.h"
#include "mimalloc-internal.h"
/* -----------------------------------------------------------
Bitmap definition
----------------------------------------------------------- */
#define MI_BITMAP_FIELD_BITS (8*MI_INTPTR_SIZE)
#define MI_BITMAP_FIELD_FULL (~((uintptr_t)0)) // all bits set
// An atomic bitmap of `uintptr_t` fields
typedef volatile _Atomic(uintptr_t) mi_bitmap_field_t;
typedef mi_bitmap_field_t* mi_bitmap_t;
// A bitmap index is the index of the bit in a bitmap.
typedef size_t mi_bitmap_index_t;
// Create a bit index.
static inline mi_bitmap_index_t mi_bitmap_index_create(size_t idx, size_t bitidx) {
mi_assert_internal(bitidx < MI_BITMAP_FIELD_BITS);
return (idx*MI_BITMAP_FIELD_BITS) + bitidx;
}
// Get the field index from a bit index.
static inline size_t mi_bitmap_index_field(mi_bitmap_index_t bitmap_idx) {
return (bitmap_idx / MI_BITMAP_FIELD_BITS);
}
// Get the bit index in a bitmap field
static inline size_t mi_bitmap_index_bit_in_field(mi_bitmap_index_t bitmap_idx) {
return (bitmap_idx % MI_BITMAP_FIELD_BITS);
}
// Get the full bit index
static inline size_t mi_bitmap_index_bit(mi_bitmap_index_t bitmap_idx) {
return bitmap_idx;
}
// The bit mask for a given number of blocks at a specified bit index.
static uintptr_t mi_bitmap_mask_(size_t count, size_t bitidx) {
mi_assert_internal(count + bitidx <= MI_BITMAP_FIELD_BITS);
if (count == MI_BITMAP_FIELD_BITS) return MI_BITMAP_FIELD_FULL;
return ((((uintptr_t)1 << count) - 1) << bitidx);
}
/* -----------------------------------------------------------
Use bit scan forward/reverse to quickly find the first zero bit if it is available
----------------------------------------------------------- */
#if defined(_MSC_VER)
#define MI_HAVE_BITSCAN
#include <intrin.h>
static inline size_t mi_bsf(uintptr_t x) {
if (x==0) return 8*MI_INTPTR_SIZE;
DWORD idx;
MI_64(_BitScanForward)(&idx, x);
return idx;
}
static inline size_t mi_bsr(uintptr_t x) {
if (x==0) return 8*MI_INTPTR_SIZE;
DWORD idx;
MI_64(_BitScanReverse)(&idx, x);
return idx;
}
#elif defined(__GNUC__) || defined(__clang__)
#include <limits.h> // LONG_MAX
#define MI_HAVE_BITSCAN
#if (INTPTR_MAX == LONG_MAX)
# define MI_L(x) x##l
#else
# define MI_L(x) x##ll
#endif
static inline size_t mi_bsf(uintptr_t x) {
return (x==0 ? 8*MI_INTPTR_SIZE : MI_L(__builtin_ctz)(x));
}
static inline size_t mi_bsr(uintptr_t x) {
return (x==0 ? 8*MI_INTPTR_SIZE : (8*MI_INTPTR_SIZE - 1) - MI_L(__builtin_clz)(x));
}
#endif
/* -----------------------------------------------------------
Claim a bit sequence atomically
----------------------------------------------------------- */
// Try to atomically claim a sequence of `count` bits in a single
// field at `idx` in `bitmap`. Returns `true` on success.
static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx)
{
mi_assert_internal(bitmap_idx != NULL);
volatile _Atomic(uintptr_t)* field = &bitmap[idx];
uintptr_t map = mi_atomic_read(field);
if (map==MI_BITMAP_FIELD_FULL) return false; // short cut
// search for 0-bit sequence of length count
const uintptr_t mask = mi_bitmap_mask_(count, 0);
const size_t bitidx_max = MI_BITMAP_FIELD_BITS - count;
#ifdef MI_HAVE_BITSCAN
size_t bitidx = mi_bsf(~map); // quickly find the first zero bit if possible
#else
size_t bitidx = 0; // otherwise start at 0
#endif
uintptr_t m = (mask << bitidx); // invariant: m == mask shifted by bitidx
// scan linearly for a free range of zero bits
while (bitidx <= bitidx_max) {
if ((map & m) == 0) { // are the mask bits free at bitidx?
mi_assert_internal((m >> bitidx) == mask); // no overflow?
const uintptr_t newmap = map | m;
mi_assert_internal((newmap^map) >> bitidx == mask);
if (!mi_atomic_cas_weak(field, newmap, map)) { // TODO: use strong cas here?
// no success, another thread claimed concurrently.. keep going
map = mi_atomic_read(field);
continue;
}
else {
// success, we claimed the bits!
*bitmap_idx = mi_bitmap_index_create(idx, bitidx);
return true;
}
}
else {
// on to the next bit range
#ifdef MI_HAVE_BITSCAN
const size_t shift = (count == 1 ? 1 : mi_bsr(map & m) - bitidx + 1);
mi_assert_internal(shift > 0 && shift <= count);
#else
const size_t shift = 1;
#endif
bitidx += shift;
m <<= shift;
}
}
// no bits found
return false;
}
// Find `count` bits of 0 and set them to 1 atomically; returns `true` on success.
// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never span fields.
static inline bool mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t* bitmap_idx) {
for (size_t idx = 0; idx < bitmap_fields; idx++) {
if (mi_bitmap_try_claim_field(bitmap, idx, count, bitmap_idx)) {
return true;
}
}
return false;
}
// Set `count` bits at `bitmap_idx` to 0 atomically
// Returns `true` if all `count` bits were 1 previously
static inline bool mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
const size_t idx = mi_bitmap_index_field(bitmap_idx);
const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
const uintptr_t mask = mi_bitmap_mask_(count, bitidx);
mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields);
mi_assert_internal((bitmap[idx] & mask) == mask);
uintptr_t prev = mi_atomic_and(&bitmap[idx], ~mask);
return ((prev & mask) == mask);
}
// Set `count` bits at `bitmap_idx` to 1 atomically
// Returns `true` if all `count` bits were 0 previously
static inline bool mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_zero) {
const size_t idx = mi_bitmap_index_field(bitmap_idx);
const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
const uintptr_t mask = mi_bitmap_mask_(count, bitidx);
mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields);
// mi_assert_internal((bitmap[idx] & mask) == 0);
uintptr_t prev = mi_atomic_or(&bitmap[idx], mask);
if (any_zero != NULL) *any_zero = ((prev & mask) != mask);
return ((prev & mask) == 0);
}
// Returns `true` if all `count` bits were 1
static inline bool mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
const size_t idx = mi_bitmap_index_field(bitmap_idx);
const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
const uintptr_t mask = mi_bitmap_mask_(count, bitidx);
mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields);
// mi_assert_internal((bitmap[idx] & mask) == 0);
return ((mi_atomic_read(&bitmap[idx]) & mask) == mask);
}
#endif

View file

@ -457,9 +457,8 @@ static void mi_process_load(void) {
}
if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) {
size_t pages = mi_option_get(mi_option_reserve_huge_os_pages);
// double max_secs = (double)pages / 2.0; // 0.5s per page (1GiB)
mi_reserve_huge_os_pages_interleave(pages);
size_t pages = mi_option_get(mi_option_reserve_huge_os_pages);
mi_reserve_huge_os_pages_interleave(pages, pages*500);
}
}

View file

@ -939,16 +939,18 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse
_mi_stat_increase(&_mi_stats_main.reserved, MI_HUGE_OS_PAGE_SIZE);
// check for timeout
mi_msecs_t elapsed = _mi_clock_end(start_t);
if (page >= 1) {
mi_msecs_t estimate = ((elapsed / (page+1)) * pages);
if (estimate > 2*max_msecs) { // seems like we are going to timeout, break
elapsed = max_msecs + 1;
if (max_msecs > 0) {
mi_msecs_t elapsed = _mi_clock_end(start_t);
if (page >= 1) {
mi_msecs_t estimate = ((elapsed / (page+1)) * pages);
if (estimate > 2*max_msecs) { // seems like we are going to timeout, break
elapsed = max_msecs + 1;
}
}
if (elapsed > max_msecs) {
_mi_warning_message("huge page allocation timed out\n");
break;
}
}
if (elapsed > max_msecs) {
_mi_warning_message("huge page allocation timed out\n");
break;
}
}
mi_assert_internal(page*MI_HUGE_OS_PAGE_SIZE <= size);
@ -1045,9 +1047,10 @@ int _mi_os_numa_node_count(void) {
int _mi_os_numa_node(mi_os_tld_t* tld) {
UNUSED(tld);
int numa_node = mi_os_numa_nodex();
// never more than the node count and >= 0
int numa_count = _mi_os_numa_node_count();
if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0
// never more than the node count and >= 0
int numa_node = mi_os_numa_nodex();
if (numa_node >= numa_count) { numa_node = numa_node % numa_count; }
if (numa_node < 0) numa_node = 0;
return numa_node;

View file

@ -436,15 +436,15 @@ void _mi_page_retire(mi_page_t* page) {
#define MI_MAX_SLICES (1UL << MI_MAX_SLICE_SHIFT)
#define MI_MIN_SLICES (2)
static void mi_page_free_list_extend_secure(mi_heap_t* heap, mi_page_t* page, size_t extend, mi_stats_t* stats) {
static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* const page, const size_t extend, mi_stats_t* const stats) {
UNUSED(stats);
#if (MI_SECURE<=2)
mi_assert_internal(page->free == NULL);
mi_assert_internal(page->local_free == NULL);
#endif
mi_assert_internal(page->capacity + extend <= page->reserved);
void* page_area = _mi_page_start(_mi_page_segment(page), page, NULL);
size_t bsize = page->block_size;
void* const page_area = _mi_page_start(_mi_page_segment(page), page, NULL);
const size_t bsize = page->block_size;
// initialize a randomized free list
// set up `slice_count` slices to alternate between
@ -452,8 +452,8 @@ static void mi_page_free_list_extend_secure(mi_heap_t* heap, mi_page_t* page, si
while ((extend >> shift) == 0) {
shift--;
}
size_t slice_count = (size_t)1U << shift;
size_t slice_extend = extend / slice_count;
const size_t slice_count = (size_t)1U << shift;
const size_t slice_extend = extend / slice_count;
mi_assert_internal(slice_extend >= 1);
mi_block_t* blocks[MI_MAX_SLICES]; // current start of the slice
size_t counts[MI_MAX_SLICES]; // available objects in the slice
@ -467,12 +467,12 @@ static void mi_page_free_list_extend_secure(mi_heap_t* heap, mi_page_t* page, si
// set up first element
size_t current = _mi_heap_random(heap) % slice_count;
counts[current]--;
page->free = blocks[current];
mi_block_t* const free_start = blocks[current];
// and iterate through the rest
uintptr_t rnd = heap->random;
for (size_t i = 1; i < extend; i++) {
// call random_shuffle only every INTPTR_SIZE rounds
size_t round = i%MI_INTPTR_SIZE;
const size_t round = i%MI_INTPTR_SIZE;
if (round == 0) rnd = _mi_random_shuffle(rnd);
// select a random next slice index
size_t next = ((rnd >> 8*round) & (slice_count-1));
@ -482,34 +482,39 @@ static void mi_page_free_list_extend_secure(mi_heap_t* heap, mi_page_t* page, si
}
// and link the current block to it
counts[next]--;
mi_block_t* block = blocks[current];
mi_block_t* const block = blocks[current];
blocks[current] = (mi_block_t*)((uint8_t*)block + bsize); // bump to the following block
mi_block_set_next(page, block, blocks[next]); // and set next; note: we may have `current == next`
current = next;
}
mi_block_set_next(page, blocks[current], NULL); // end of the list
// prepend to the free list (usually NULL)
mi_block_set_next(page, blocks[current], page->free); // end of the list
page->free = free_start;
heap->random = _mi_random_shuffle(rnd);
}
static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* page, size_t extend, mi_stats_t* stats)
static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, const size_t extend, mi_stats_t* const stats)
{
UNUSED(stats);
#if (MI_SECURE <= 2)
mi_assert_internal(page->free == NULL);
mi_assert_internal(page->local_free == NULL);
#endif
mi_assert_internal(page->capacity + extend <= page->reserved);
void* page_area = _mi_page_start(_mi_page_segment(page), page, NULL );
size_t bsize = page->block_size;
mi_block_t* start = mi_page_block_at(page, page_area, page->capacity);
void* const page_area = _mi_page_start(_mi_page_segment(page), page, NULL );
const size_t bsize = page->block_size;
mi_block_t* const start = mi_page_block_at(page, page_area, page->capacity);
// initialize a sequential free list
mi_block_t* last = mi_page_block_at(page, page_area, page->capacity + extend - 1);
mi_block_t* const last = mi_page_block_at(page, page_area, page->capacity + extend - 1);
mi_block_t* block = start;
while(block <= last) {
mi_block_t* next = (mi_block_t*)((uint8_t*)block + bsize);
mi_block_set_next(page,block,next);
block = next;
}
mi_block_set_next(page, last, NULL);
// prepend to free list (usually `NULL`)
mi_block_set_next(page, last, page->free);
page->free = start;
}

View file

@ -130,19 +130,23 @@ static void mi_printf_amount(int64_t n, int64_t unit, mi_output_fun* out, const
char buf[32];
int len = 32;
const char* suffix = (unit <= 0 ? " " : "b");
double base = (unit == 0 ? 1000.0 : 1024.0);
const int64_t base = (unit == 0 ? 1000 : 1024);
if (unit>0) n *= unit;
double pos = (double)(n < 0 ? -n : n);
if (pos < base)
snprintf(buf,len, "%d %s ", (int)n, suffix);
else if (pos < base*base)
snprintf(buf, len, "%.1f k%s", (double)n / base, suffix);
else if (pos < base*base*base)
snprintf(buf, len, "%.1f m%s", (double)n / (base*base), suffix);
else
snprintf(buf, len, "%.1f g%s", (double)n / (base*base*base), suffix);
const int64_t pos = (n < 0 ? -n : n);
if (pos < base) {
snprintf(buf, len, "%d %s ", (int)n, suffix);
}
else {
int64_t divider = base;
const char* magnitude = "k";
if (pos >= divider*base) { divider *= base; magnitude = "m"; }
if (pos >= divider*base) { divider *= base; magnitude = "g"; }
const int64_t tens = (n / (divider/10));
const long whole = (long)(tens/10);
const long frac1 = (long)(tens%10);
snprintf(buf, len, "%ld.%ld %s%s", whole, frac1, magnitude, suffix);
}
_mi_fprintf(out, (fmt==NULL ? "%11s" : fmt), buf);
}
@ -199,8 +203,10 @@ static void mi_stat_counter_print(const mi_stat_counter_t* stat, const char* msg
}
static void mi_stat_counter_print_avg(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out) {
double avg = (stat->count == 0 ? 0.0 : (double)stat->total / (double)stat->count);
_mi_fprintf(out, "%10s: %7.1f avg\n", msg, avg);
const int64_t avg_tens = (stat->count == 0 ? 0 : (stat->total*10 / stat->count));
const long avg_whole = (long)(avg_tens/10);
const long avg_frac1 = (long)(avg_tens%10);
_mi_fprintf(out, "%10s: %5ld.%ld avg\n", msg, avg_whole, avg_frac1);
}

View file

@ -18,7 +18,7 @@ terms of the MIT license.
// argument defaults
static int THREADS = 32; // more repeatable if THREADS <= #processors
static int N = 20; // scaling factor
static int N = 40; // scaling factor
// static int THREADS = 8; // more repeatable if THREADS <= #processors
// static int N = 100; // scaling factor
@ -70,7 +70,9 @@ static void* alloc_items(size_t items, random_t r) {
}
if (items==40) items++; // pthreads uses that size for stack increases
uintptr_t* p = (uintptr_t*)mi_malloc(items*sizeof(uintptr_t));
for (uintptr_t i = 0; i < items; i++) p[i] = (items - i) ^ cookie;
if (p != NULL) {
for (uintptr_t i = 0; i < items; i++) p[i] = (items - i) ^ cookie;
}
return p;
}