add good-fit for allowing larger blocks in smaller segments

This commit is contained in:
daan 2019-08-09 11:18:38 -07:00
parent 5e56b40fe6
commit 442bad9190
7 changed files with 214 additions and 35 deletions

View file

@ -57,6 +57,7 @@ extern inline void* mi_malloc_small(size_t size) mi_attr_noexcept {
return mi_heap_malloc_small(mi_get_default_heap(), size);
}
// zero initialized small block
void* mi_zalloc_small(size_t size) mi_attr_noexcept {
void* p = mi_malloc_small(size);
@ -71,7 +72,7 @@ extern inline void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcep
void* p;
if (mi_likely(size <= MI_SMALL_SIZE_MAX)) {
p = mi_heap_malloc_small(heap, size);
}
}
else {
p = _mi_malloc_generic(heap, size);
}
@ -235,11 +236,11 @@ void mi_free(void* p) mi_attr_noexcept
// huge page stat is accounted for in `_mi_page_retire`
#endif
// adjust if it might be an un-aligned block
uintptr_t tid = _mi_thread_id();
if (mi_likely(tid == page->flags.value)) { // local, and not full or aligned
if (mi_likely(tid == page->flags.value)) {
// local, and not full or aligned
mi_block_t* block = (mi_block_t*)p;
mi_block_set_next(page, block, page->local_free); // note: moving this write earlier does not matter for performance
mi_block_set_next(page, block, page->local_free);
page->local_free = block;
page->used--;
if (mi_unlikely(mi_page_all_free(page))) { _mi_page_retire(page); }

View file

@ -32,24 +32,37 @@ const mi_page_t _mi_page_empty = {
// Empty page queues for every bin
#define QNULL(sz) { NULL, NULL, (sz)*sizeof(uintptr_t) }
#ifdef MI_BIN4
#define MI_PAGE_QUEUES_EMPTY \
{ QNULL(1), \
QNULL(1), QNULL(2), QNULL(3), QNULL(4), QNULL(5), QNULL(6), QNULL(7), QNULL(8), \
QNULL(10), QNULL(12), QNULL(14), QNULL(16), QNULL(20), QNULL(24), QNULL(28), QNULL(32), \
QNULL(40), QNULL(48), QNULL(56), QNULL(64), QNULL(80), QNULL(96), QNULL(112), QNULL(128), \
QNULL(160), QNULL(192), QNULL(224), QNULL(256), QNULL(320), QNULL(384), QNULL(448), QNULL(512), \
QNULL(640), QNULL(768), QNULL(896), QNULL(1024), QNULL(1280), QNULL(1536), QNULL(1792), QNULL(2048), \
QNULL(2560), QNULL(3072), QNULL(3584), QNULL(4096), QNULL(5120), QNULL(6144), QNULL(7168), QNULL(8192), \
QNULL(10240), QNULL(12288), QNULL(14336), QNULL(16384), QNULL(20480), QNULL(24576), QNULL(28672), QNULL(32768), \
QNULL(40960), QNULL(49152), QNULL(57344), QNULL(65536), QNULL(81920), QNULL(98304), QNULL(114688), \
QNULL(MI_LARGE_WSIZE_MAX + 1 /*131072, Huge queue */), \
QNULL( 1), QNULL( 2), QNULL( 3), QNULL( 4), QNULL( 5), QNULL( 6), QNULL( 7), QNULL( 8), /* 8 */ \
QNULL( 11), QNULL( 15), QNULL( 23), QNULL( 31), QNULL( 47), QNULL( 63), QNULL( 95), QNULL( 127), /* 16 */ \
QNULL( 191), QNULL( 255), QNULL( 383), QNULL( 511), QNULL( 767), QNULL( 1023), QNULL( 1535), QNULL( 2047), /* 24 */ \
QNULL( 3071), QNULL( 4095), QNULL( 6143), QNULL( 8191), QNULL( 12287), QNULL( 16383), QNULL( 24575), QNULL( 32767), /* 32 */ \
QNULL( 49151), QNULL( 65535), QNULL( 98303), QNULL(131071), QNULL(196607), QNULL(262143), QNULL(393215), /* 39 */ \
QNULL(MI_LARGE_WSIZE_MAX + 1 /* 524287, Huge queue */), \
QNULL(MI_LARGE_WSIZE_MAX + 2) /* Full queue */ }
#else
#define MI_PAGE_QUEUES_EMPTY \
{ QNULL(1), \
QNULL( 1), QNULL( 2), QNULL( 3), QNULL( 4), QNULL( 5), QNULL( 6), QNULL( 7), QNULL( 8), /* 8 */ \
QNULL( 10), QNULL( 12), QNULL( 14), QNULL( 16), QNULL( 20), QNULL( 24), QNULL( 28), QNULL( 32), /* 16 */ \
QNULL( 40), QNULL( 48), QNULL( 56), QNULL( 64), QNULL( 80), QNULL( 96), QNULL( 112), QNULL( 128), /* 24 */ \
QNULL( 160), QNULL( 192), QNULL( 224), QNULL( 256), QNULL( 320), QNULL( 384), QNULL( 448), QNULL( 512), /* 32 */ \
QNULL( 640), QNULL( 768), QNULL( 896), QNULL( 1024), QNULL( 1280), QNULL( 1536), QNULL( 1792), QNULL( 2048), /* 40 */ \
QNULL( 2560), QNULL( 3072), QNULL( 3584), QNULL( 4096), QNULL( 5120), QNULL( 6144), QNULL( 7168), QNULL( 8192), /* 48 */ \
QNULL( 10240), QNULL( 12288), QNULL( 14336), QNULL( 16384), QNULL( 20480), QNULL( 24576), QNULL( 28672), QNULL( 32768), /* 56 */ \
QNULL( 40960), QNULL( 49152), QNULL( 57344), QNULL( 65536), QNULL( 81920), QNULL( 98304), QNULL(114688), QNULL(131072), /* 64 */ \
QNULL(163840), QNULL(196608), QNULL(229376), QNULL(262144), QNULL(327680), /* 69 */ \
QNULL(MI_LARGE_WSIZE_MAX + 1 /* 393216, Huge queue */), \
QNULL(MI_LARGE_WSIZE_MAX + 2) /* Full queue */ }
#endif
#define MI_STAT_COUNT_NULL() {0,0,0,0}
// Empty statistics
#if MI_STAT>1
#define MI_STAT_COUNT_END_NULL() , { MI_STAT_COUNT_NULL(), MI_INIT64(MI_STAT_COUNT_NULL) }
#define MI_STAT_COUNT_END_NULL() , { MI_STAT_COUNT_NULL(), MI_INIT32(MI_STAT_COUNT_NULL) }
#else
#define MI_STAT_COUNT_END_NULL()
#endif
@ -97,8 +110,8 @@ static mi_tld_t tld_main = {
0,
&_mi_heap_main,
{ { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats }, // segments
{ 0, NULL, NULL, 0, tld_main_stats }, // os
{ MI_STATS_NULL } // stats
{ 0, NULL, NULL, 0, tld_main_stats }, // os
{ MI_STATS_NULL } // stats
};
mi_heap_t _mi_heap_main = {

View file

@ -97,7 +97,7 @@ uint8_t _mi_bsr(uintptr_t x) {
// Returns MI_BIN_HUGE if the size is too large.
// We use `wsize` for the size in "machine word sizes",
// i.e. byte size == `wsize*sizeof(void*)`.
inline uint8_t _mi_bin(size_t size) {
extern inline uint8_t _mi_bin(size_t size) {
size_t wsize = _mi_wsize_from_size(size);
uint8_t bin;
if (wsize <= 1) {
@ -120,16 +120,21 @@ inline uint8_t _mi_bin(size_t size) {
bin = MI_BIN_HUGE;
}
else {
#if defined(MI_ALIGN4W)
#if defined(MI_ALIGN4W)
if (wsize <= 16) { wsize = (wsize+3)&~3; } // round to 4x word sizes
#endif
#ifdef MI_BIN4
uint8_t b = mi_bsr32((uint32_t)wsize);
bin = ((b << 1) + (uint8_t)((wsize >> (b - 1)) & 0x01)) + 3;
#else
wsize--;
// find the highest bit
uint8_t b = mi_bsr32((uint32_t)wsize);
// and use the top 3 bits to determine the bin (~16% worst internal fragmentation).
// and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation).
// - adjust with 3 because we use do not round the first 8 sizes
// which each get an exact bin
bin = ((b << 2) + (uint8_t)((wsize >> (b - 2)) & 0x03)) - 3;
#endif
}
mi_assert_internal(bin > 0 && bin <= MI_BIN_HUGE);
return bin;

View file

@ -385,7 +385,7 @@ void _mi_page_retire(mi_page_t* page) {
// is the only page left with free blocks. It is not clear
// how to check this efficiently though... for now we just check
// if its neighbours are almost fully used.
if (mi_likely(page->block_size <= MI_SMALL_SIZE_MAX)) {
if (mi_likely(page->block_size <= MI_MEDIUM_SIZE_MAX)) {
if (mi_page_mostly_used(page->prev) && mi_page_mostly_used(page->next)) {
_mi_stat_counter_increase(&_mi_stats_main.page_no_retire,1);
return; // dont't retire after all
@ -722,10 +722,10 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept
// call potential deferred free routines
_mi_deferred_free(heap, false);
// free delayed frees from other threads
_mi_heap_delayed_free(heap);
// huge allocation?
mi_page_t* page;
if (mi_unlikely(size > MI_LARGE_SIZE_MAX)) {

View file

@ -236,8 +236,8 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se
// The thread local segment cache is limited to be at most 1/8 of the peak size of segments in use,
// and no more than 2.
#define MI_SEGMENT_CACHE_MAX (2)
// and no more than 4.
#define MI_SEGMENT_CACHE_MAX (4)
#define MI_SEGMENT_CACHE_FRACTION (8)
// note: returned segment may be partially reset
@ -708,16 +708,20 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld
/* -----------------------------------------------------------
Page allocation and free
----------------------------------------------------------- */
static bool mi_is_good_fit(size_t bsize, size_t size) {
// good fit if no more than 25% wasted
return (bsize > 0 && size > 0 && bsize < size && (size - (size % bsize)) < (size/4));
}
mi_page_t* _mi_segment_page_alloc(size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) {
mi_page_t* page;
if (block_size <= (MI_SMALL_PAGE_SIZE/4)) {
if (block_size <= MI_SMALL_SIZE_MAX || mi_is_good_fit(block_size,MI_SMALL_PAGE_SIZE)) {
page = mi_segment_small_page_alloc(tld,os_tld);
}
else if (block_size <= (MI_MEDIUM_PAGE_SIZE/4)) {
else if (block_size <= MI_MEDIUM_SIZE_MAX || mi_is_good_fit(block_size, MI_MEDIUM_PAGE_SIZE)) {
page = mi_segment_medium_page_alloc(tld, os_tld);
}
else if (block_size < (MI_LARGE_SIZE_MAX - sizeof(mi_segment_t))) {
else if (block_size < MI_LARGE_SIZE_MAX || mi_is_good_fit(block_size, MI_LARGE_PAGE_SIZE - sizeof(mi_segment_t))) {
page = mi_segment_large_page_alloc(tld, os_tld);
}
else {