mirror of
https://github.com/microsoft/mimalloc.git
synced 2025-07-06 11:34:38 +03:00
add good-fit for allowing larger blocks in smaller segments
This commit is contained in:
parent
5e56b40fe6
commit
442bad9190
7 changed files with 214 additions and 35 deletions
|
@ -57,6 +57,7 @@ extern inline void* mi_malloc_small(size_t size) mi_attr_noexcept {
|
|||
return mi_heap_malloc_small(mi_get_default_heap(), size);
|
||||
}
|
||||
|
||||
|
||||
// zero initialized small block
|
||||
void* mi_zalloc_small(size_t size) mi_attr_noexcept {
|
||||
void* p = mi_malloc_small(size);
|
||||
|
@ -71,7 +72,7 @@ extern inline void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcep
|
|||
void* p;
|
||||
if (mi_likely(size <= MI_SMALL_SIZE_MAX)) {
|
||||
p = mi_heap_malloc_small(heap, size);
|
||||
}
|
||||
}
|
||||
else {
|
||||
p = _mi_malloc_generic(heap, size);
|
||||
}
|
||||
|
@ -235,11 +236,11 @@ void mi_free(void* p) mi_attr_noexcept
|
|||
// huge page stat is accounted for in `_mi_page_retire`
|
||||
#endif
|
||||
|
||||
// adjust if it might be an un-aligned block
|
||||
uintptr_t tid = _mi_thread_id();
|
||||
if (mi_likely(tid == page->flags.value)) { // local, and not full or aligned
|
||||
if (mi_likely(tid == page->flags.value)) {
|
||||
// local, and not full or aligned
|
||||
mi_block_t* block = (mi_block_t*)p;
|
||||
mi_block_set_next(page, block, page->local_free); // note: moving this write earlier does not matter for performance
|
||||
mi_block_set_next(page, block, page->local_free);
|
||||
page->local_free = block;
|
||||
page->used--;
|
||||
if (mi_unlikely(mi_page_all_free(page))) { _mi_page_retire(page); }
|
||||
|
|
37
src/init.c
37
src/init.c
|
@ -32,24 +32,37 @@ const mi_page_t _mi_page_empty = {
|
|||
|
||||
// Empty page queues for every bin
|
||||
#define QNULL(sz) { NULL, NULL, (sz)*sizeof(uintptr_t) }
|
||||
#ifdef MI_BIN4
|
||||
#define MI_PAGE_QUEUES_EMPTY \
|
||||
{ QNULL(1), \
|
||||
QNULL(1), QNULL(2), QNULL(3), QNULL(4), QNULL(5), QNULL(6), QNULL(7), QNULL(8), \
|
||||
QNULL(10), QNULL(12), QNULL(14), QNULL(16), QNULL(20), QNULL(24), QNULL(28), QNULL(32), \
|
||||
QNULL(40), QNULL(48), QNULL(56), QNULL(64), QNULL(80), QNULL(96), QNULL(112), QNULL(128), \
|
||||
QNULL(160), QNULL(192), QNULL(224), QNULL(256), QNULL(320), QNULL(384), QNULL(448), QNULL(512), \
|
||||
QNULL(640), QNULL(768), QNULL(896), QNULL(1024), QNULL(1280), QNULL(1536), QNULL(1792), QNULL(2048), \
|
||||
QNULL(2560), QNULL(3072), QNULL(3584), QNULL(4096), QNULL(5120), QNULL(6144), QNULL(7168), QNULL(8192), \
|
||||
QNULL(10240), QNULL(12288), QNULL(14336), QNULL(16384), QNULL(20480), QNULL(24576), QNULL(28672), QNULL(32768), \
|
||||
QNULL(40960), QNULL(49152), QNULL(57344), QNULL(65536), QNULL(81920), QNULL(98304), QNULL(114688), \
|
||||
QNULL(MI_LARGE_WSIZE_MAX + 1 /*131072, Huge queue */), \
|
||||
QNULL( 1), QNULL( 2), QNULL( 3), QNULL( 4), QNULL( 5), QNULL( 6), QNULL( 7), QNULL( 8), /* 8 */ \
|
||||
QNULL( 11), QNULL( 15), QNULL( 23), QNULL( 31), QNULL( 47), QNULL( 63), QNULL( 95), QNULL( 127), /* 16 */ \
|
||||
QNULL( 191), QNULL( 255), QNULL( 383), QNULL( 511), QNULL( 767), QNULL( 1023), QNULL( 1535), QNULL( 2047), /* 24 */ \
|
||||
QNULL( 3071), QNULL( 4095), QNULL( 6143), QNULL( 8191), QNULL( 12287), QNULL( 16383), QNULL( 24575), QNULL( 32767), /* 32 */ \
|
||||
QNULL( 49151), QNULL( 65535), QNULL( 98303), QNULL(131071), QNULL(196607), QNULL(262143), QNULL(393215), /* 39 */ \
|
||||
QNULL(MI_LARGE_WSIZE_MAX + 1 /* 524287, Huge queue */), \
|
||||
QNULL(MI_LARGE_WSIZE_MAX + 2) /* Full queue */ }
|
||||
#else
|
||||
#define MI_PAGE_QUEUES_EMPTY \
|
||||
{ QNULL(1), \
|
||||
QNULL( 1), QNULL( 2), QNULL( 3), QNULL( 4), QNULL( 5), QNULL( 6), QNULL( 7), QNULL( 8), /* 8 */ \
|
||||
QNULL( 10), QNULL( 12), QNULL( 14), QNULL( 16), QNULL( 20), QNULL( 24), QNULL( 28), QNULL( 32), /* 16 */ \
|
||||
QNULL( 40), QNULL( 48), QNULL( 56), QNULL( 64), QNULL( 80), QNULL( 96), QNULL( 112), QNULL( 128), /* 24 */ \
|
||||
QNULL( 160), QNULL( 192), QNULL( 224), QNULL( 256), QNULL( 320), QNULL( 384), QNULL( 448), QNULL( 512), /* 32 */ \
|
||||
QNULL( 640), QNULL( 768), QNULL( 896), QNULL( 1024), QNULL( 1280), QNULL( 1536), QNULL( 1792), QNULL( 2048), /* 40 */ \
|
||||
QNULL( 2560), QNULL( 3072), QNULL( 3584), QNULL( 4096), QNULL( 5120), QNULL( 6144), QNULL( 7168), QNULL( 8192), /* 48 */ \
|
||||
QNULL( 10240), QNULL( 12288), QNULL( 14336), QNULL( 16384), QNULL( 20480), QNULL( 24576), QNULL( 28672), QNULL( 32768), /* 56 */ \
|
||||
QNULL( 40960), QNULL( 49152), QNULL( 57344), QNULL( 65536), QNULL( 81920), QNULL( 98304), QNULL(114688), QNULL(131072), /* 64 */ \
|
||||
QNULL(163840), QNULL(196608), QNULL(229376), QNULL(262144), QNULL(327680), /* 69 */ \
|
||||
QNULL(MI_LARGE_WSIZE_MAX + 1 /* 393216, Huge queue */), \
|
||||
QNULL(MI_LARGE_WSIZE_MAX + 2) /* Full queue */ }
|
||||
#endif
|
||||
|
||||
#define MI_STAT_COUNT_NULL() {0,0,0,0}
|
||||
|
||||
// Empty statistics
|
||||
#if MI_STAT>1
|
||||
#define MI_STAT_COUNT_END_NULL() , { MI_STAT_COUNT_NULL(), MI_INIT64(MI_STAT_COUNT_NULL) }
|
||||
#define MI_STAT_COUNT_END_NULL() , { MI_STAT_COUNT_NULL(), MI_INIT32(MI_STAT_COUNT_NULL) }
|
||||
#else
|
||||
#define MI_STAT_COUNT_END_NULL()
|
||||
#endif
|
||||
|
@ -97,8 +110,8 @@ static mi_tld_t tld_main = {
|
|||
0,
|
||||
&_mi_heap_main,
|
||||
{ { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats }, // segments
|
||||
{ 0, NULL, NULL, 0, tld_main_stats }, // os
|
||||
{ MI_STATS_NULL } // stats
|
||||
{ 0, NULL, NULL, 0, tld_main_stats }, // os
|
||||
{ MI_STATS_NULL } // stats
|
||||
};
|
||||
|
||||
mi_heap_t _mi_heap_main = {
|
||||
|
|
|
@ -97,7 +97,7 @@ uint8_t _mi_bsr(uintptr_t x) {
|
|||
// Returns MI_BIN_HUGE if the size is too large.
|
||||
// We use `wsize` for the size in "machine word sizes",
|
||||
// i.e. byte size == `wsize*sizeof(void*)`.
|
||||
inline uint8_t _mi_bin(size_t size) {
|
||||
extern inline uint8_t _mi_bin(size_t size) {
|
||||
size_t wsize = _mi_wsize_from_size(size);
|
||||
uint8_t bin;
|
||||
if (wsize <= 1) {
|
||||
|
@ -120,16 +120,21 @@ inline uint8_t _mi_bin(size_t size) {
|
|||
bin = MI_BIN_HUGE;
|
||||
}
|
||||
else {
|
||||
#if defined(MI_ALIGN4W)
|
||||
#if defined(MI_ALIGN4W)
|
||||
if (wsize <= 16) { wsize = (wsize+3)&~3; } // round to 4x word sizes
|
||||
#endif
|
||||
#ifdef MI_BIN4
|
||||
uint8_t b = mi_bsr32((uint32_t)wsize);
|
||||
bin = ((b << 1) + (uint8_t)((wsize >> (b - 1)) & 0x01)) + 3;
|
||||
#else
|
||||
wsize--;
|
||||
// find the highest bit
|
||||
uint8_t b = mi_bsr32((uint32_t)wsize);
|
||||
// and use the top 3 bits to determine the bin (~16% worst internal fragmentation).
|
||||
// and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation).
|
||||
// - adjust with 3 because we use do not round the first 8 sizes
|
||||
// which each get an exact bin
|
||||
bin = ((b << 2) + (uint8_t)((wsize >> (b - 2)) & 0x03)) - 3;
|
||||
#endif
|
||||
}
|
||||
mi_assert_internal(bin > 0 && bin <= MI_BIN_HUGE);
|
||||
return bin;
|
||||
|
|
|
@ -385,7 +385,7 @@ void _mi_page_retire(mi_page_t* page) {
|
|||
// is the only page left with free blocks. It is not clear
|
||||
// how to check this efficiently though... for now we just check
|
||||
// if its neighbours are almost fully used.
|
||||
if (mi_likely(page->block_size <= MI_SMALL_SIZE_MAX)) {
|
||||
if (mi_likely(page->block_size <= MI_MEDIUM_SIZE_MAX)) {
|
||||
if (mi_page_mostly_used(page->prev) && mi_page_mostly_used(page->next)) {
|
||||
_mi_stat_counter_increase(&_mi_stats_main.page_no_retire,1);
|
||||
return; // dont't retire after all
|
||||
|
@ -722,10 +722,10 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept
|
|||
|
||||
// call potential deferred free routines
|
||||
_mi_deferred_free(heap, false);
|
||||
|
||||
|
||||
// free delayed frees from other threads
|
||||
_mi_heap_delayed_free(heap);
|
||||
|
||||
|
||||
// huge allocation?
|
||||
mi_page_t* page;
|
||||
if (mi_unlikely(size > MI_LARGE_SIZE_MAX)) {
|
||||
|
|
|
@ -236,8 +236,8 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se
|
|||
|
||||
|
||||
// The thread local segment cache is limited to be at most 1/8 of the peak size of segments in use,
|
||||
// and no more than 2.
|
||||
#define MI_SEGMENT_CACHE_MAX (2)
|
||||
// and no more than 4.
|
||||
#define MI_SEGMENT_CACHE_MAX (4)
|
||||
#define MI_SEGMENT_CACHE_FRACTION (8)
|
||||
|
||||
// note: returned segment may be partially reset
|
||||
|
@ -708,16 +708,20 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld
|
|||
/* -----------------------------------------------------------
|
||||
Page allocation and free
|
||||
----------------------------------------------------------- */
|
||||
static bool mi_is_good_fit(size_t bsize, size_t size) {
|
||||
// good fit if no more than 25% wasted
|
||||
return (bsize > 0 && size > 0 && bsize < size && (size - (size % bsize)) < (size/4));
|
||||
}
|
||||
|
||||
mi_page_t* _mi_segment_page_alloc(size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) {
|
||||
mi_page_t* page;
|
||||
if (block_size <= (MI_SMALL_PAGE_SIZE/4)) {
|
||||
if (block_size <= MI_SMALL_SIZE_MAX || mi_is_good_fit(block_size,MI_SMALL_PAGE_SIZE)) {
|
||||
page = mi_segment_small_page_alloc(tld,os_tld);
|
||||
}
|
||||
else if (block_size <= (MI_MEDIUM_PAGE_SIZE/4)) {
|
||||
else if (block_size <= MI_MEDIUM_SIZE_MAX || mi_is_good_fit(block_size, MI_MEDIUM_PAGE_SIZE)) {
|
||||
page = mi_segment_medium_page_alloc(tld, os_tld);
|
||||
}
|
||||
else if (block_size < (MI_LARGE_SIZE_MAX - sizeof(mi_segment_t))) {
|
||||
else if (block_size < MI_LARGE_SIZE_MAX || mi_is_good_fit(block_size, MI_LARGE_PAGE_SIZE - sizeof(mi_segment_t))) {
|
||||
page = mi_segment_large_page_alloc(tld, os_tld);
|
||||
}
|
||||
else {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue