Merge branch 'dev' into dev-exp

This commit is contained in:
daan 2019-08-11 09:00:39 -07:00
commit d71e67b689
12 changed files with 157 additions and 123 deletions

View file

@ -314,16 +314,37 @@ static inline mi_page_queue_t* mi_page_queue(const mi_heap_t* heap, size_t size)
return &((mi_heap_t*)heap)->pages[_mi_bin(size)]; return &((mi_heap_t*)heap)->pages[_mi_bin(size)];
} }
//-----------------------------------------------------------
// Page flags
//-----------------------------------------------------------
static inline uintptr_t mi_page_thread_id(const mi_page_t* page) { static inline uintptr_t mi_page_thread_id(const mi_page_t* page) {
return (page->flags.xthread_id << MI_PAGE_FLAGS_BITS); return (page->flags & ~MI_PAGE_FLAGS_MASK);
} }
static inline void mi_page_init_flags(mi_page_t* page, uintptr_t thread_id) { static inline void mi_page_init_flags(mi_page_t* page, uintptr_t thread_id) {
page->flags.value = 0; page->flags = thread_id;
page->flags.xthread_id = (thread_id >> MI_PAGE_FLAGS_BITS);
mi_assert(page->flags.value == thread_id);
} }
static inline bool mi_page_is_in_full(const mi_page_t* page) {
return ((page->flags & 0x01) != 0);
}
static inline void mi_page_set_in_full(mi_page_t* page, bool in_full) {
if (in_full) page->flags |= 0x01;
else page->flags &= ~0x01;
}
static inline bool mi_page_has_aligned(const mi_page_t* page) {
return ((page->flags & 0x02) != 0);
}
static inline void mi_page_set_has_aligned(mi_page_t* page, bool has_aligned) {
if (has_aligned) page->flags |= 0x02;
else page->flags &= ~0x02;
}
// ------------------------------------------------------------------- // -------------------------------------------------------------------
// Encoding/Decoding the free list next pointers // Encoding/Decoding the free list next pointers
// ------------------------------------------------------------------- // -------------------------------------------------------------------

View file

@ -94,16 +94,16 @@ terms of the MIT license. A copy of the license can be found in the file
#define MI_MEDIUM_SIZE_MAX (MI_MEDIUM_PAGE_SIZE/4) // 128kb on 64-bit #define MI_MEDIUM_SIZE_MAX (MI_MEDIUM_PAGE_SIZE/4) // 128kb on 64-bit
#define MI_LARGE_SIZE_MAX (MI_LARGE_PAGE_SIZE/4) // 1Mb on 64-bit #define MI_LARGE_SIZE_MAX (MI_LARGE_PAGE_SIZE/4) // 1Mb on 64-bit
#define MI_LARGE_WSIZE_MAX (MI_LARGE_SIZE_MAX>>MI_INTPTR_SHIFT) #define MI_LARGE_WSIZE_MAX (MI_LARGE_SIZE_MAX>>MI_INTPTR_SHIFT)
#define MI_HUGE_SIZE_MAX (2*MI_INTPTR_SIZE*MI_SEGMENT_SIZE) // (must match MI_REGION_MAX_ALLOC_SIZE in memory.c)
// Minimal alignment necessary. On most platforms 16 bytes are needed // Minimal alignment necessary. On most platforms 16 bytes are needed
// due to SSE registers for example. This must be at least `MI_INTPTR_SIZE` // due to SSE registers for example. This must be at least `MI_INTPTR_SIZE`
#define MI_MAX_ALIGN_SIZE 16 // sizeof(max_align_t) #define MI_MAX_ALIGN_SIZE 16 // sizeof(max_align_t)
// Maximum number of size classes. (spaced exponentially in 12.5% increments) // Maximum number of size classes. (spaced exponentially in 12.5% increments)
#define MI_BIN_HUGE (70U) #define MI_BIN_HUGE (73U)
#if (MI_LARGE_WSIZE_MAX > 393216) #if (MI_LARGE_WSIZE_MAX >= 655360)
#error "define more bins" #error "define more bins"
#endif #endif
@ -123,25 +123,12 @@ typedef enum mi_delayed_e {
} mi_delayed_t; } mi_delayed_t;
// Use the lowest two bits of a thread id for the `in_full` and `has_aligned` flags // Use the bottom 2 bits for the `in_full` and `has_aligned` flags
// and the rest for the threadid (we assume tid's never use those lower 2 bits).
// This allows a single test in `mi_free` to check for unlikely cases // This allows a single test in `mi_free` to check for unlikely cases
// (namely, non-local free, aligned free, or freeing in a full page) // (namely, non-local free, aligned free, or freeing in a full page)
#define MI_PAGE_FLAGS_BITS (2) #define MI_PAGE_FLAGS_MASK ((uintptr_t)0x03)
#define MI_PAGE_FLAGS_TID_BITS (MI_INTPTR_SIZE*8 - MI_PAGE_FLAGS_BITS) typedef uintptr_t mi_page_flags_t;
typedef union mi_page_flags_u {
uintptr_t value;
struct {
#ifdef MI_BIG_ENDIAN
uintptr_t xthread_id : MI_PAGE_FLAGS_TID_BITS;
#endif
uintptr_t in_full : 1;
uintptr_t has_aligned : 1;
#ifndef MI_BIG_ENDIAN
uintptr_t xthread_id : MI_PAGE_FLAGS_TID_BITS;
#endif
};
} mi_page_flags_t;
// Thread free list. // Thread free list.
// We use the bottom 2 bits of the pointer for mi_delayed_t flags // We use the bottom 2 bits of the pointer for mi_delayed_t flags
@ -339,10 +326,13 @@ typedef struct mi_stats_s {
mi_stat_count_t commit_calls; mi_stat_count_t commit_calls;
mi_stat_count_t threads; mi_stat_count_t threads;
mi_stat_count_t huge; mi_stat_count_t huge;
mi_stat_count_t giant;
mi_stat_count_t malloc; mi_stat_count_t malloc;
mi_stat_count_t segments_cache; mi_stat_count_t segments_cache;
mi_stat_counter_t page_no_retire; mi_stat_counter_t page_no_retire;
mi_stat_counter_t searches; mi_stat_counter_t searches;
mi_stat_counter_t huge_count;
mi_stat_counter_t giant_count;
#if MI_STAT>1 #if MI_STAT>1
mi_stat_count_t normal[MI_BIN_HUGE+1]; mi_stat_count_t normal[MI_BIN_HUGE+1];
#endif #endif
@ -393,12 +383,8 @@ typedef struct mi_segments_tld_s {
} mi_segments_tld_t; } mi_segments_tld_t;
// OS thread local data // OS thread local data
typedef struct mi_os_tld_s { typedef struct mi_os_tld_s {
uintptr_t mmap_next_probable; // probable next address start allocated by mmap (to guess which path to take on alignment) mi_stats_t* stats; // points to tld stats
void* mmap_previous; // previous address returned by mmap
uint8_t* pool; // pool of segments to reduce mmap calls on some platforms
size_t pool_available; // bytes available in the pool
mi_stats_t* stats; // points to tld stats
} mi_os_tld_t; } mi_os_tld_t;
// Thread local data // Thread local data

View file

@ -43,7 +43,7 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* heap, size_t size, size_t
if (p == NULL) return NULL; if (p == NULL) return NULL;
// .. and align within the allocation // .. and align within the allocation
_mi_ptr_page(p)->flags.has_aligned = true; mi_page_set_has_aligned( _mi_ptr_page(p), true );
uintptr_t adjust = alignment - (((uintptr_t)p + offset) % alignment); uintptr_t adjust = alignment - (((uintptr_t)p + offset) % alignment);
mi_assert_internal(adjust % sizeof(uintptr_t) == 0); mi_assert_internal(adjust % sizeof(uintptr_t) == 0);
void* aligned_p = (adjust == alignment ? p : (void*)((uintptr_t)p + adjust)); void* aligned_p = (adjust == alignment ? p : (void*)((uintptr_t)p + adjust));

View file

@ -174,7 +174,7 @@ static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block
if (mi_unlikely(mi_page_all_free(page))) { if (mi_unlikely(mi_page_all_free(page))) {
_mi_page_retire(page); _mi_page_retire(page);
} }
else if (mi_unlikely(page->flags.in_full)) { else if (mi_unlikely(mi_page_is_in_full(page))) {
_mi_page_unfull(page); _mi_page_unfull(page);
} }
} }
@ -194,7 +194,7 @@ mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* p
static void mi_decl_noinline mi_free_generic(const mi_segment_t* segment, mi_page_t* page, bool local, void* p) { static void mi_decl_noinline mi_free_generic(const mi_segment_t* segment, mi_page_t* page, bool local, void* p) {
mi_block_t* block = (page->flags.has_aligned ? _mi_page_ptr_unalign(segment, page, p) : (mi_block_t*)p); mi_block_t* block = (mi_page_has_aligned(page) ? _mi_page_ptr_unalign(segment, page, p) : (mi_block_t*)p);
_mi_free_block(page, local, block); _mi_free_block(page, local, block);
} }
@ -237,7 +237,7 @@ void mi_free(void* p) mi_attr_noexcept
#endif #endif
uintptr_t tid = _mi_thread_id(); uintptr_t tid = _mi_thread_id();
if (mi_likely(tid == page->flags.value)) { if (mi_likely(tid == page->flags)) { // if equal, the thread id matches and it is not a full page, nor has aligned blocks
// local, and not full or aligned // local, and not full or aligned
mi_block_t* block = (mi_block_t*)p; mi_block_t* block = (mi_block_t*)p;
mi_block_set_next(page, block, page->local_free); mi_block_set_next(page, block, page->local_free);
@ -273,7 +273,7 @@ size_t mi_usable_size(const void* p) mi_attr_noexcept {
const mi_segment_t* segment = _mi_ptr_segment(p); const mi_segment_t* segment = _mi_ptr_segment(p);
const mi_page_t* page = _mi_segment_page_of(segment,p); const mi_page_t* page = _mi_segment_page_of(segment,p);
size_t size = page->block_size; size_t size = page->block_size;
if (mi_unlikely(page->flags.has_aligned)) { if (mi_unlikely(mi_page_has_aligned(page))) {
ptrdiff_t adjust = (uint8_t*)p - (uint8_t*)_mi_page_ptr_unalign(segment,page,p); ptrdiff_t adjust = (uint8_t*)p - (uint8_t*)_mi_page_ptr_unalign(segment,page,p);
mi_assert_internal(adjust >= 0 && (size_t)adjust <= size); mi_assert_internal(adjust >= 0 && (size_t)adjust <= size);
return (size - adjust); return (size - adjust);

View file

@ -246,7 +246,12 @@ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_
// stats // stats
if (page->block_size > MI_LARGE_SIZE_MAX) { if (page->block_size > MI_LARGE_SIZE_MAX) {
mi_heap_stat_decrease(heap,huge,page->block_size); if (page->block_size > MI_HUGE_SIZE_MAX) {
_mi_stat_decrease(&heap->tld->stats.giant,page->block_size);
}
else {
_mi_stat_decrease(&heap->tld->stats.huge, page->block_size);
}
} }
#if (MI_STAT>1) #if (MI_STAT>1)
size_t inuse = page->used - page->thread_freed; size_t inuse = page->used - page->thread_freed;

View file

@ -42,8 +42,8 @@ const mi_page_t _mi_page_empty = {
QNULL( 2560), QNULL( 3072), QNULL( 3584), QNULL( 4096), QNULL( 5120), QNULL( 6144), QNULL( 7168), QNULL( 8192), /* 48 */ \ QNULL( 2560), QNULL( 3072), QNULL( 3584), QNULL( 4096), QNULL( 5120), QNULL( 6144), QNULL( 7168), QNULL( 8192), /* 48 */ \
QNULL( 10240), QNULL( 12288), QNULL( 14336), QNULL( 16384), QNULL( 20480), QNULL( 24576), QNULL( 28672), QNULL( 32768), /* 56 */ \ QNULL( 10240), QNULL( 12288), QNULL( 14336), QNULL( 16384), QNULL( 20480), QNULL( 24576), QNULL( 28672), QNULL( 32768), /* 56 */ \
QNULL( 40960), QNULL( 49152), QNULL( 57344), QNULL( 65536), QNULL( 81920), QNULL( 98304), QNULL(114688), QNULL(131072), /* 64 */ \ QNULL( 40960), QNULL( 49152), QNULL( 57344), QNULL( 65536), QNULL( 81920), QNULL( 98304), QNULL(114688), QNULL(131072), /* 64 */ \
QNULL(163840), QNULL(196608), QNULL(229376), QNULL(262144), QNULL(327680), /* 69 */ \ QNULL(163840), QNULL(196608), QNULL(229376), QNULL(262144), QNULL(327680), QNULL(393216), QNULL(458752), QNULL(524288), /* 72 */ \
QNULL(MI_LARGE_WSIZE_MAX + 1 /* 393216, Huge queue */), \ QNULL(MI_LARGE_WSIZE_MAX + 1 /* 655360, Huge queue */), \
QNULL(MI_LARGE_WSIZE_MAX + 2) /* Full queue */ } QNULL(MI_LARGE_WSIZE_MAX + 2) /* Full queue */ }
#define MI_STAT_COUNT_NULL() {0,0,0,0} #define MI_STAT_COUNT_NULL() {0,0,0,0}
@ -63,9 +63,8 @@ const mi_page_t _mi_page_empty = {
MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \
MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \
MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \
MI_STAT_COUNT_NULL(), \ MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \
{ 0, 0 }, \ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 } \
{ 0, 0 } \
MI_STAT_COUNT_END_NULL() MI_STAT_COUNT_END_NULL()
// -------------------------------------------------------- // --------------------------------------------------------
@ -98,8 +97,8 @@ static mi_tld_t tld_main = {
0, 0,
&_mi_heap_main, &_mi_heap_main,
{ { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats }, // segments { { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats }, // segments
{ 0, NULL, NULL, 0, tld_main_stats }, // os { tld_main_stats }, // os
{ MI_STATS_NULL } // stats { MI_STATS_NULL } // stats
}; };
mi_heap_t _mi_heap_main = { mi_heap_t _mi_heap_main = {

View file

@ -126,7 +126,8 @@ Commit from a region
// Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written // Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written
// if the blocks were successfully claimed so ensure they are initialized to NULL/SIZE_MAX before the call. // if the blocks were successfully claimed so ensure they are initialized to NULL/SIZE_MAX before the call.
// (not being able to claim is not considered an error so check for `p != NULL` afterwards). // (not being able to claim is not considered an error so check for `p != NULL` afterwards).
static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bitidx, size_t blocks, size_t size, bool commit, void** p, size_t* id, mi_os_tld_t* tld) { static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bitidx, size_t blocks, size_t size, bool commit, void** p, size_t* id, mi_os_tld_t* tld)
{
size_t mask = mi_region_block_mask(blocks,bitidx); size_t mask = mi_region_block_mask(blocks,bitidx);
mi_assert_internal(mask != 0); mi_assert_internal(mask != 0);
mi_assert_internal((mask & mi_atomic_read(&region->map)) == mask); mi_assert_internal((mask & mi_atomic_read(&region->map)) == mask);
@ -139,7 +140,13 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit
start = ALLOCATING; // try to start allocating start = ALLOCATING; // try to start allocating
} }
else if (start == ALLOCATING) { else if (start == ALLOCATING) {
mi_atomic_yield(); // another thead is already allocating.. wait it out // another thead is already allocating.. wait it out
// note: the wait here is not great (but should not happen often). Another
// strategy might be to just allocate another region in parallel. This tends
// to be bad for benchmarks though as these often start many threads at the
// same time leading to the allocation of too many regions. (Still, this might
// be the most performant and it's ok on 64-bit virtual memory with over-commit.)
mi_atomic_yield();
continue; continue;
} }
} while( start == ALLOCATING && !mi_atomic_compare_exchange_ptr(&region->start, ALLOCATING, NULL) ); } while( start == ALLOCATING && !mi_atomic_compare_exchange_ptr(&region->start, ALLOCATING, NULL) );
@ -183,47 +190,35 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit
// Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written // Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written
// if the blocks were successfully claimed so ensure they are initialized to NULL/SIZE_MAX before the call. // if the blocks were successfully claimed so ensure they are initialized to NULL/SIZE_MAX before the call.
// (not being able to claim is not considered an error so check for `p != NULL` afterwards). // (not being able to claim is not considered an error so check for `p != NULL` afterwards).
static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t blocks, size_t size, bool commit, void** p, size_t* id, mi_os_tld_t* tld) { static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t blocks, size_t size, bool commit, void** p, size_t* id, mi_os_tld_t* tld)
{
mi_assert_internal(p != NULL && id != NULL); mi_assert_internal(p != NULL && id != NULL);
mi_assert_internal(blocks < MI_REGION_MAP_BITS); mi_assert_internal(blocks < MI_REGION_MAP_BITS);
const uintptr_t mask = mi_region_block_mask(blocks,0); const uintptr_t mask = mi_region_block_mask(blocks,0);
const size_t bitidx_max = MI_REGION_MAP_BITS - blocks; const size_t bitidx_max = MI_REGION_MAP_BITS - blocks;
size_t bitidx ;
uintptr_t map;
uintptr_t newmap;
do { // while no atomic claim success and not all bits seen
// find the first free range of bits
map = mi_atomic_read(&region->map);
size_t m = map;
bitidx = 0;
do {
// skip ones
while ((m&1) != 0) { bitidx++; m>>=1; }
// count zeros
mi_assert_internal((m&1)==0);
size_t zeros = 1;
m >>= 1;
while(zeros < blocks && (m&1)==0) { zeros++; m>>=1; }
if (zeros == blocks) break; // found a range that fits
bitidx += zeros;
}
while(bitidx <= bitidx_max);
if (bitidx > bitidx_max) {
return true; // no error, but could not find a range either
}
// try to claim it // scan linearly for a free range of zero bits
mi_assert_internal( (mask << bitidx) >> bitidx == mask ); // no overflow? uintptr_t map = mi_atomic_read(&region->map);
mi_assert_internal( (map & (mask << bitidx)) == 0); // fits in zero range uintptr_t m = mask; // the mask shifted by bitidx
newmap = map | (mask << bitidx); for(size_t bitidx = 0; bitidx <= bitidx_max; bitidx++, m <<= 1) {
mi_assert_internal((newmap^map) >> bitidx == mask); if ((map & m) == 0) { // are the mask bits free at bitidx?
mi_assert_internal((m >> bitidx) == mask); // no overflow?
uintptr_t newmap = map | m;
mi_assert_internal((newmap^map) >> bitidx == mask);
if (!mi_atomic_compare_exchange(&region->map, newmap, map)) {
// no success, another thread claimed concurrently.. keep going
map = mi_atomic_read(&region->map);
}
else {
// success, we claimed the bits
// now commit the block memory -- this can still fail
return mi_region_commit_blocks(region, idx, bitidx, blocks, size, commit, p, id, tld);
}
}
} }
while(!mi_atomic_compare_exchange(&region->map, newmap, map)); // no error, but also no bits found
return true;
// success, we claimed the blocks atomically
// now commit the block memory -- this can still fail
return mi_region_commit_blocks(region, idx, bitidx, blocks, size, commit, p, id, tld);
} }
// Try to allocate `blocks` in a `region` at `idx` of a given `size`. Does a quick check before trying to claim. // Try to allocate `blocks` in a `region` at `idx` of a given `size`. Does a quick check before trying to claim.
@ -274,13 +269,14 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool commit, size_t*
size_t count = mi_atomic_read(&regions_count); size_t count = mi_atomic_read(&regions_count);
size_t idx = mi_atomic_read(&region_next_idx); size_t idx = mi_atomic_read(&region_next_idx);
for (size_t visited = 0; visited < count; visited++, idx++) { for (size_t visited = 0; visited < count; visited++, idx++) {
if (!mi_region_try_alloc_blocks(idx%count, blocks, size, commit, &p, id, tld)) return NULL; // error if (idx >= count) idx = 0; // wrap around
if (!mi_region_try_alloc_blocks(idx, blocks, size, commit, &p, id, tld)) return NULL; // error
if (p != NULL) break; if (p != NULL) break;
} }
if (p == NULL) { if (p == NULL) {
// no free range in existing regions -- try to extend beyond the count // no free range in existing regions -- try to extend beyond the count.. but at most 4 regions
for (idx = count; idx < MI_REGION_MAX; idx++) { for (idx = count; idx < count + 4 && idx < MI_REGION_MAX; idx++) {
if (!mi_region_try_alloc_blocks(idx, blocks, size, commit, &p, id, tld)) return NULL; // error if (!mi_region_try_alloc_blocks(idx, blocks, size, commit, &p, id, tld)) return NULL; // error
if (p != NULL) break; if (p != NULL) break;
} }

View file

@ -225,27 +225,29 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment,
#elif defined(__wasi__) #elif defined(__wasi__)
static void* mi_wasm_heap_grow(size_t size, size_t try_alignment) { static void* mi_wasm_heap_grow(size_t size, size_t try_alignment) {
uintptr_t base = __builtin_wasm_memory_size(0) * os_page_size; uintptr_t base = __builtin_wasm_memory_size(0) * _mi_os_page_size();
uintptr_t aligned_base = _mi_align_up(base, (uintptr_t) try_alignment); uintptr_t aligned_base = _mi_align_up(base, (uintptr_t) try_alignment);
size_t alloc_size = aligned_base - base + size; size_t alloc_size = _mi_align_up( aligned_base - base + size, _mi_os_page_size());
mi_assert(alloc_size >= size); mi_assert(alloc_size >= size && (alloc_size % _mi_os_page_size()) == 0);
if (alloc_size < size) return NULL; if (alloc_size < size) return NULL;
if (__builtin_wasm_memory_grow(0, alloc_size / os_page_size) == SIZE_MAX) { if (__builtin_wasm_memory_grow(0, alloc_size / _mi_os_page_size()) == SIZE_MAX) {
errno = ENOMEM; errno = ENOMEM;
return NULL; return NULL;
} }
return (void*) aligned_base; return (void*)aligned_base;
} }
#else #else
static void* mi_unix_mmapx(size_t size, size_t try_alignment, int protect_flags, int flags, int fd) { static void* mi_unix_mmapx(size_t size, size_t try_alignment, int protect_flags, int flags, int fd) {
void* p = NULL; void* p = NULL;
#if (MI_INTPTR_SIZE >= 8) && !defined(MAP_ALIGNED) #if (MI_INTPTR_SIZE >= 8) && !defined(MAP_ALIGNED)
// on 64-bit systems, use a special area for 4MiB aligned allocations // on 64-bit systems, use the virtual address area after 4TiB for 4MiB aligned allocations
static volatile intptr_t aligned_base = ((intptr_t)1 << 42); // starting at 4TiB static volatile intptr_t aligned_base = ((intptr_t)1 << 42); // starting at 4TiB
if (try_alignment <= MI_SEGMENT_SIZE && (size%MI_SEGMENT_SIZE)==0 && (aligned_base%try_alignment)==0) { if (try_alignment <= MI_SEGMENT_SIZE && (size%MI_SEGMENT_SIZE)==0) {
intptr_t hint = mi_atomic_add(&aligned_base,size) - size; intptr_t hint = mi_atomic_add(&aligned_base,size) - size;
p = mmap((void*)hint,size,protect_flags,flags,fd,0); if (hint%try_alignment == 0) {
if (p==MAP_FAILED) p = NULL; // fall back to regular mmap p = mmap((void*)hint,size,protect_flags,flags,fd,0);
if (p==MAP_FAILED) p = NULL; // fall back to regular mmap
}
} }
#endif #endif
if (p==NULL) { if (p==NULL) {
@ -273,10 +275,10 @@ static void* mi_unix_mmap(size_t size, size_t try_alignment, int protect_flags)
protect_flags |= PROT_MAX(PROT_READ | PROT_WRITE); // BSD protect_flags |= PROT_MAX(PROT_READ | PROT_WRITE); // BSD
#endif #endif
#if defined(VM_MAKE_TAG) #if defined(VM_MAKE_TAG)
// darwin: tracking anonymous page with a specific ID all up to 98 are taken officially but LLVM sanitizers had taken 99 // macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99)
fd = VM_MAKE_TAG(100); fd = VM_MAKE_TAG(100);
#endif #endif
if (large_os_page_size > 0 && use_large_os_page(size, try_alignment)) { if (use_large_os_page(size, try_alignment)) {
int lflags = flags; int lflags = flags;
int lfd = fd; int lfd = fd;
#ifdef MAP_ALIGNED_SUPER #ifdef MAP_ALIGNED_SUPER
@ -308,7 +310,7 @@ static void* mi_unix_mmap(size_t size, size_t try_alignment, int protect_flags)
#endif #endif
// Primitive allocation from the OS. // Primitive allocation from the OS.
// Note: the `alignment` is just a hint and the returned pointer is not guaranteed to be aligned. // Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned.
static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, mi_stats_t* stats) { static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, mi_stats_t* stats) {
mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0); mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0);
if (size == 0) return NULL; if (size == 0) return NULL;

View file

@ -177,7 +177,7 @@ static bool mi_heap_contains_queue(const mi_heap_t* heap, const mi_page_queue_t*
#endif #endif
static mi_page_queue_t* mi_page_queue_of(const mi_page_t* page) { static mi_page_queue_t* mi_page_queue_of(const mi_page_t* page) {
uint8_t bin = (page->flags.in_full ? MI_BIN_FULL : _mi_bin(page->block_size)); uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : _mi_bin(page->block_size));
mi_heap_t* heap = page->heap; mi_heap_t* heap = page->heap;
mi_assert_internal(heap != NULL && bin <= MI_BIN_FULL); mi_assert_internal(heap != NULL && bin <= MI_BIN_FULL);
mi_page_queue_t* pq = &heap->pages[bin]; mi_page_queue_t* pq = &heap->pages[bin];
@ -187,10 +187,10 @@ static mi_page_queue_t* mi_page_queue_of(const mi_page_t* page) {
} }
static mi_page_queue_t* mi_heap_page_queue_of(mi_heap_t* heap, const mi_page_t* page) { static mi_page_queue_t* mi_heap_page_queue_of(mi_heap_t* heap, const mi_page_t* page) {
uint8_t bin = (page->flags.in_full ? MI_BIN_FULL : _mi_bin(page->block_size)); uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : _mi_bin(page->block_size));
mi_assert_internal(bin <= MI_BIN_FULL); mi_assert_internal(bin <= MI_BIN_FULL);
mi_page_queue_t* pq = &heap->pages[bin]; mi_page_queue_t* pq = &heap->pages[bin];
mi_assert_internal(page->flags.in_full || page->block_size == pq->block_size); mi_assert_internal(mi_page_is_in_full(page) || page->block_size == pq->block_size);
return pq; return pq;
} }
@ -245,7 +245,7 @@ static bool mi_page_queue_is_empty(mi_page_queue_t* queue) {
static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) { static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) {
mi_assert_internal(page != NULL); mi_assert_internal(page != NULL);
mi_assert_expensive(mi_page_queue_contains(queue, page)); mi_assert_expensive(mi_page_queue_contains(queue, page));
mi_assert_internal(page->block_size == queue->block_size || (page->block_size > MI_LARGE_SIZE_MAX && mi_page_queue_is_huge(queue)) || (page->flags.in_full && mi_page_queue_is_full(queue))); mi_assert_internal(page->block_size == queue->block_size || (page->block_size > MI_LARGE_SIZE_MAX && mi_page_queue_is_huge(queue)) || (mi_page_is_in_full(page) && mi_page_queue_is_full(queue)));
if (page->prev != NULL) page->prev->next = page->next; if (page->prev != NULL) page->prev->next = page->next;
if (page->next != NULL) page->next->prev = page->prev; if (page->next != NULL) page->next->prev = page->prev;
if (page == queue->last) queue->last = page->prev; if (page == queue->last) queue->last = page->prev;
@ -260,7 +260,7 @@ static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) {
page->next = NULL; page->next = NULL;
page->prev = NULL; page->prev = NULL;
page->heap = NULL; page->heap = NULL;
page->flags.in_full = false; mi_page_set_in_full(page,false);
} }
@ -269,9 +269,9 @@ static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_
mi_assert_internal(!mi_page_queue_contains(queue, page)); mi_assert_internal(!mi_page_queue_contains(queue, page));
mi_assert_internal(page->block_size == queue->block_size || mi_assert_internal(page->block_size == queue->block_size ||
(page->block_size > MI_LARGE_SIZE_MAX && mi_page_queue_is_huge(queue)) || (page->block_size > MI_LARGE_SIZE_MAX && mi_page_queue_is_huge(queue)) ||
(page->flags.in_full && mi_page_queue_is_full(queue))); (mi_page_is_in_full(page) && mi_page_queue_is_full(queue)));
page->flags.in_full = mi_page_queue_is_full(queue); mi_page_set_in_full(page, mi_page_queue_is_full(queue));
page->heap = heap; page->heap = heap;
page->next = queue->first; page->next = queue->first;
page->prev = NULL; page->prev = NULL;
@ -324,7 +324,7 @@ static void mi_page_queue_enqueue_from(mi_page_queue_t* to, mi_page_queue_t* fro
mi_heap_queue_first_update(page->heap, to); mi_heap_queue_first_update(page->heap, to);
} }
page->flags.in_full = mi_page_queue_is_full(to); mi_page_set_in_full(page, mi_page_queue_is_full(to));
} }
size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue_t* append) { size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue_t* append) {

View file

@ -102,7 +102,7 @@ bool _mi_page_is_valid(mi_page_t* page) {
mi_assert_internal(!_mi_process_is_initialized || segment->thread_id == page->heap->thread_id); mi_assert_internal(!_mi_process_is_initialized || segment->thread_id == page->heap->thread_id);
mi_page_queue_t* pq = mi_page_queue_of(page); mi_page_queue_t* pq = mi_page_queue_of(page);
mi_assert_internal(mi_page_queue_contains(pq, page)); mi_assert_internal(mi_page_queue_contains(pq, page));
mi_assert_internal(pq->block_size==page->block_size || page->block_size > MI_LARGE_SIZE_MAX || page->flags.in_full); mi_assert_internal(pq->block_size==page->block_size || page->block_size > MI_LARGE_SIZE_MAX || mi_page_is_in_full(page));
mi_assert_internal(mi_heap_contains_queue(page->heap,pq)); mi_assert_internal(mi_heap_contains_queue(page->heap,pq));
} }
return true; return true;
@ -282,26 +282,26 @@ void _mi_heap_delayed_free(mi_heap_t* heap) {
void _mi_page_unfull(mi_page_t* page) { void _mi_page_unfull(mi_page_t* page) {
mi_assert_internal(page != NULL); mi_assert_internal(page != NULL);
mi_assert_expensive(_mi_page_is_valid(page)); mi_assert_expensive(_mi_page_is_valid(page));
mi_assert_internal(page->flags.in_full); mi_assert_internal(mi_page_is_in_full(page));
_mi_page_use_delayed_free(page, MI_NO_DELAYED_FREE); _mi_page_use_delayed_free(page, MI_NO_DELAYED_FREE);
if (!page->flags.in_full) return; if (!mi_page_is_in_full(page)) return;
mi_heap_t* heap = page->heap; mi_heap_t* heap = page->heap;
mi_page_queue_t* pqfull = &heap->pages[MI_BIN_FULL]; mi_page_queue_t* pqfull = &heap->pages[MI_BIN_FULL];
page->flags.in_full = false; // to get the right queue mi_page_set_in_full(page, false); // to get the right queue
mi_page_queue_t* pq = mi_heap_page_queue_of(heap, page); mi_page_queue_t* pq = mi_heap_page_queue_of(heap, page);
page->flags.in_full = true; mi_page_set_in_full(page, true);
mi_page_queue_enqueue_from(pq, pqfull, page); mi_page_queue_enqueue_from(pq, pqfull, page);
} }
static void mi_page_to_full(mi_page_t* page, mi_page_queue_t* pq) { static void mi_page_to_full(mi_page_t* page, mi_page_queue_t* pq) {
mi_assert_internal(pq == mi_page_queue_of(page)); mi_assert_internal(pq == mi_page_queue_of(page));
mi_assert_internal(!mi_page_immediate_available(page)); mi_assert_internal(!mi_page_immediate_available(page));
mi_assert_internal(!page->flags.in_full); mi_assert_internal(!mi_page_is_in_full(page));
_mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE); _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE);
if (page->flags.in_full) return; if (mi_page_is_in_full(page)) return;
mi_page_queue_enqueue_from(&page->heap->pages[MI_BIN_FULL], pq, page); mi_page_queue_enqueue_from(&page->heap->pages[MI_BIN_FULL], pq, page);
mi_page_thread_free_collect(page); // try to collect right away in case another thread freed just before MI_USE_DELAYED_FREE was set mi_page_thread_free_collect(page); // try to collect right away in case another thread freed just before MI_USE_DELAYED_FREE was set
@ -349,11 +349,16 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) {
mi_assert_internal(mi_tf_delayed(free) != MI_DELAYED_FREEING); mi_assert_internal(mi_tf_delayed(free) != MI_DELAYED_FREEING);
#endif #endif
page->flags.has_aligned = false; mi_page_set_has_aligned(page, false);
// account for huge pages here // account for huge pages here
if (page->block_size > MI_LARGE_SIZE_MAX) { if (page->block_size > MI_LARGE_SIZE_MAX) {
_mi_stat_decrease(&page->heap->tld->stats.huge, page->block_size); if (page->block_size > MI_HUGE_SIZE_MAX) {
_mi_stat_decrease(&page->heap->tld->stats.giant, page->block_size);
}
else {
_mi_stat_decrease(&page->heap->tld->stats.huge, page->block_size);
}
} }
// remove from the page list // remove from the page list
@ -377,7 +382,7 @@ void _mi_page_retire(mi_page_t* page) {
mi_assert_expensive(_mi_page_is_valid(page)); mi_assert_expensive(_mi_page_is_valid(page));
mi_assert_internal(mi_page_all_free(page)); mi_assert_internal(mi_page_all_free(page));
page->flags.has_aligned = false; mi_page_set_has_aligned(page, false);
// don't retire too often.. // don't retire too often..
// (or we end up retiring and re-allocating most of the time) // (or we end up retiring and re-allocating most of the time)
@ -560,7 +565,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
mi_assert_internal(page->thread_freed == 0); mi_assert_internal(page->thread_freed == 0);
mi_assert_internal(page->next == NULL); mi_assert_internal(page->next == NULL);
mi_assert_internal(page->prev == NULL); mi_assert_internal(page->prev == NULL);
mi_assert_internal(page->flags.has_aligned == false); mi_assert_internal(!mi_page_has_aligned(page));
#if MI_SECURE #if MI_SECURE
mi_assert_internal(page->cookie != 0); mi_assert_internal(page->cookie != 0);
#endif #endif
@ -619,7 +624,7 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p
// 3. If the page is completely full, move it to the `mi_pages_full` // 3. If the page is completely full, move it to the `mi_pages_full`
// queue so we don't visit long-lived pages too often. // queue so we don't visit long-lived pages too often.
mi_assert_internal(!page->flags.in_full && !mi_page_immediate_available(page)); mi_assert_internal(!mi_page_is_in_full(page) && !mi_page_immediate_available(page));
mi_page_to_full(page,pq); mi_page_to_full(page,pq);
page = next; page = next;
@ -702,7 +707,14 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) {
if (page != NULL) { if (page != NULL) {
mi_assert_internal(mi_page_immediate_available(page)); mi_assert_internal(mi_page_immediate_available(page));
mi_assert_internal(page->block_size == block_size); mi_assert_internal(page->block_size == block_size);
_mi_stat_increase( &heap->tld->stats.huge, block_size); if (page->block_size > MI_HUGE_SIZE_MAX) {
_mi_stat_increase(&heap->tld->stats.giant, block_size);
_mi_stat_counter_increase(&heap->tld->stats.giant_count, 1);
}
else {
_mi_stat_increase(&heap->tld->stats.huge, block_size);
_mi_stat_counter_increase(&heap->tld->stats.huge_count, 1);
}
} }
return page; return page;
} }

View file

@ -106,8 +106,11 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) {
mi_stat_add(&stats->malloc, &src->malloc, 1); mi_stat_add(&stats->malloc, &src->malloc, 1);
mi_stat_add(&stats->segments_cache, &src->segments_cache, 1); mi_stat_add(&stats->segments_cache, &src->segments_cache, 1);
mi_stat_add(&stats->huge, &src->huge, 1); mi_stat_add(&stats->huge, &src->huge, 1);
mi_stat_add(&stats->giant, &src->giant, 1);
mi_stat_counter_add(&stats->page_no_retire, &src->page_no_retire, 1); mi_stat_counter_add(&stats->page_no_retire, &src->page_no_retire, 1);
mi_stat_counter_add(&stats->searches, &src->searches, 1); mi_stat_counter_add(&stats->searches, &src->searches, 1);
mi_stat_counter_add(&stats->huge_count, &src->huge_count, 1);
mi_stat_counter_add(&stats->giant_count, &src->giant_count, 1);
#if MI_STAT>1 #if MI_STAT>1
for (size_t i = 0; i <= MI_BIN_HUGE; i++) { for (size_t i = 0; i <= MI_BIN_HUGE; i++) {
if (src->normal[i].allocated > 0 || src->normal[i].freed > 0) { if (src->normal[i].allocated > 0 || src->normal[i].freed > 0) {
@ -152,20 +155,29 @@ static void mi_print_count(int64_t n, int64_t unit, FILE* out) {
} }
static void mi_stat_print(const mi_stat_count_t* stat, const char* msg, int64_t unit, FILE* out ) { static void mi_stat_print(const mi_stat_count_t* stat, const char* msg, int64_t unit, FILE* out ) {
_mi_fprintf(out,"%10s:", msg); _mi_fprintf(out,"%10s:", msg);
mi_print_amount(stat->peak, unit, out); if (unit>0) {
if (unit!=0) { mi_print_amount(stat->peak, unit, out);
mi_print_amount(stat->allocated, unit, out); mi_print_amount(stat->allocated, unit, out);
mi_print_amount(stat->freed, unit, out); mi_print_amount(stat->freed, unit, out);
} mi_print_amount(unit, 1, out);
if (unit>0) {
mi_print_amount(unit, (unit==0 ? 0 : 1), out);
mi_print_count(stat->allocated, unit, out); mi_print_count(stat->allocated, unit, out);
if (stat->allocated > stat->freed) if (stat->allocated > stat->freed)
_mi_fprintf(out, " not all freed!\n"); _mi_fprintf(out, " not all freed!\n");
else else
_mi_fprintf(out, " ok\n"); _mi_fprintf(out, " ok\n");
} }
else if (unit<0) {
mi_print_amount(stat->peak, 1, out);
mi_print_amount(stat->allocated, 1, out);
mi_print_amount(stat->freed, 1, out);
mi_print_amount(-unit, 1, out);
mi_print_count((stat->allocated / -unit), 0, out);
if (stat->allocated > stat->freed)
_mi_fprintf(out, " not all freed!\n");
else
_mi_fprintf(out, " ok\n");
}
else { else {
_mi_fprintf(out, "\n"); _mi_fprintf(out, "\n");
} }
@ -219,10 +231,12 @@ static void _mi_stats_print(mi_stats_t* stats, double secs, FILE* out) mi_attr_n
mi_stat_count_t normal = { 0,0,0,0 }; mi_stat_count_t normal = { 0,0,0,0 };
mi_stats_print_bins(&normal, stats->normal, MI_BIN_HUGE, "normal",out); mi_stats_print_bins(&normal, stats->normal, MI_BIN_HUGE, "normal",out);
mi_stat_print(&normal, "normal", 1, out); mi_stat_print(&normal, "normal", 1, out);
mi_stat_print(&stats->huge, "huge", 1, out); mi_stat_print(&stats->huge, "huge", (stats->huge_count.count == 0 ? 1 : -(stats->huge.allocated / stats->huge_count.count)), out);
mi_stat_print(&stats->giant, "giant", (stats->giant_count.count == 0 ? 1 : -(stats->giant.allocated / stats->giant_count.count)), out);
mi_stat_count_t total = { 0,0,0,0 }; mi_stat_count_t total = { 0,0,0,0 };
mi_stat_add(&total, &normal, 1); mi_stat_add(&total, &normal, 1);
mi_stat_add(&total, &stats->huge, 1); mi_stat_add(&total, &stats->huge, 1);
mi_stat_add(&total, &stats->giant, 1);
mi_stat_print(&total, "total", 1, out); mi_stat_print(&total, "total", 1, out);
_mi_fprintf(out, "malloc requested: "); _mi_fprintf(out, "malloc requested: ");
mi_print_amount(stats->malloc.allocated, 1, out); mi_print_amount(stats->malloc.allocated, 1, out);

View file

@ -6,7 +6,6 @@
#include <mimalloc.h> #include <mimalloc.h>
#include <mimalloc-override.h> // redefines malloc etc. #include <mimalloc-override.h> // redefines malloc etc.
int main() { int main() {
mi_version(); mi_version();
void* p1 = malloc(78); void* p1 = malloc(78);