mirror of
https://github.com/microsoft/mimalloc.git
synced 2025-05-04 22:49:32 +03:00
add good-fit for allowing larger blocks in smaller segments
This commit is contained in:
parent
5e56b40fe6
commit
442bad9190
7 changed files with 214 additions and 35 deletions
|
@ -91,21 +91,31 @@ terms of the MIT license. A copy of the license can be found in the file
|
|||
#define MI_MEDIUM_PAGES_PER_SEGMENT (MI_SEGMENT_SIZE/MI_MEDIUM_PAGE_SIZE)
|
||||
#define MI_LARGE_PAGES_PER_SEGMENT (MI_SEGMENT_SIZE/MI_LARGE_PAGE_SIZE)
|
||||
|
||||
#define MI_MEDIUM_SIZE_MAX (MI_MEDIUM_PAGE_SIZE/8) // 64kb on 64-bit
|
||||
#define MI_LARGE_SIZE_MAX (MI_LARGE_PAGE_SIZE/8) // 512kb on 64-bit
|
||||
#define MI_MEDIUM_SIZE_MAX (MI_MEDIUM_PAGE_SIZE/4) // 64kb on 64-bit
|
||||
#define MI_LARGE_SIZE_MAX (MI_LARGE_PAGE_SIZE/4) // 512kb on 64-bit
|
||||
#define MI_LARGE_WSIZE_MAX (MI_LARGE_SIZE_MAX>>MI_INTPTR_SHIFT)
|
||||
|
||||
|
||||
// Maximum number of size classes. (spaced exponentially in 16.7% increments)
|
||||
#define MI_BIN_HUGE (64U)
|
||||
|
||||
// Minimal alignment necessary. On most platforms 16 bytes are needed
|
||||
// due to SSE registers for example. This must be at least `MI_INTPTR_SIZE`
|
||||
#define MI_MAX_ALIGN_SIZE 16 // sizeof(max_align_t)
|
||||
|
||||
#if (MI_LARGE_WSIZE_MAX > 131072)
|
||||
#define MI_BIN4
|
||||
#ifdef MI_BIN4
|
||||
// Maximum number of size classes. (spaced exponentially in 25% increments)
|
||||
#define MI_BIN_HUGE (40U)
|
||||
|
||||
#if (MI_LARGE_WSIZE_MAX > 524287)
|
||||
#error "define more bins"
|
||||
#endif
|
||||
#else
|
||||
// Maximum number of size classes. (spaced exponentially in 12.5% increments)
|
||||
#define MI_BIN_HUGE (70U)
|
||||
|
||||
#if (MI_LARGE_WSIZE_MAX > 393216)
|
||||
#error "define more bins"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
typedef uintptr_t mi_encoded_t;
|
||||
|
||||
|
@ -172,10 +182,10 @@ typedef struct mi_page_s {
|
|||
bool is_reset:1; // `true` if the page memory was reset
|
||||
bool is_committed:1; // `true` if the page virtual memory is committed
|
||||
|
||||
// layout like this to optimize access in `mi_malloc` and `mi_free`
|
||||
// layout like this to optimize access in `mi_malloc` and `mi_free`
|
||||
uint16_t capacity; // number of blocks committed
|
||||
uint16_t reserved; // number of blocks reserved in memory
|
||||
// 16 bits padding
|
||||
// 16 bits padding
|
||||
mi_block_t* free; // list of available free blocks (`malloc` allocates from this list)
|
||||
#if MI_SECURE
|
||||
uintptr_t cookie; // random cookie to encode the free lists
|
||||
|
|
|
@ -57,6 +57,7 @@ extern inline void* mi_malloc_small(size_t size) mi_attr_noexcept {
|
|||
return mi_heap_malloc_small(mi_get_default_heap(), size);
|
||||
}
|
||||
|
||||
|
||||
// zero initialized small block
|
||||
void* mi_zalloc_small(size_t size) mi_attr_noexcept {
|
||||
void* p = mi_malloc_small(size);
|
||||
|
@ -71,7 +72,7 @@ extern inline void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcep
|
|||
void* p;
|
||||
if (mi_likely(size <= MI_SMALL_SIZE_MAX)) {
|
||||
p = mi_heap_malloc_small(heap, size);
|
||||
}
|
||||
}
|
||||
else {
|
||||
p = _mi_malloc_generic(heap, size);
|
||||
}
|
||||
|
@ -235,11 +236,11 @@ void mi_free(void* p) mi_attr_noexcept
|
|||
// huge page stat is accounted for in `_mi_page_retire`
|
||||
#endif
|
||||
|
||||
// adjust if it might be an un-aligned block
|
||||
uintptr_t tid = _mi_thread_id();
|
||||
if (mi_likely(tid == page->flags.value)) { // local, and not full or aligned
|
||||
if (mi_likely(tid == page->flags.value)) {
|
||||
// local, and not full or aligned
|
||||
mi_block_t* block = (mi_block_t*)p;
|
||||
mi_block_set_next(page, block, page->local_free); // note: moving this write earlier does not matter for performance
|
||||
mi_block_set_next(page, block, page->local_free);
|
||||
page->local_free = block;
|
||||
page->used--;
|
||||
if (mi_unlikely(mi_page_all_free(page))) { _mi_page_retire(page); }
|
||||
|
|
37
src/init.c
37
src/init.c
|
@ -32,24 +32,37 @@ const mi_page_t _mi_page_empty = {
|
|||
|
||||
// Empty page queues for every bin
|
||||
#define QNULL(sz) { NULL, NULL, (sz)*sizeof(uintptr_t) }
|
||||
#ifdef MI_BIN4
|
||||
#define MI_PAGE_QUEUES_EMPTY \
|
||||
{ QNULL(1), \
|
||||
QNULL(1), QNULL(2), QNULL(3), QNULL(4), QNULL(5), QNULL(6), QNULL(7), QNULL(8), \
|
||||
QNULL(10), QNULL(12), QNULL(14), QNULL(16), QNULL(20), QNULL(24), QNULL(28), QNULL(32), \
|
||||
QNULL(40), QNULL(48), QNULL(56), QNULL(64), QNULL(80), QNULL(96), QNULL(112), QNULL(128), \
|
||||
QNULL(160), QNULL(192), QNULL(224), QNULL(256), QNULL(320), QNULL(384), QNULL(448), QNULL(512), \
|
||||
QNULL(640), QNULL(768), QNULL(896), QNULL(1024), QNULL(1280), QNULL(1536), QNULL(1792), QNULL(2048), \
|
||||
QNULL(2560), QNULL(3072), QNULL(3584), QNULL(4096), QNULL(5120), QNULL(6144), QNULL(7168), QNULL(8192), \
|
||||
QNULL(10240), QNULL(12288), QNULL(14336), QNULL(16384), QNULL(20480), QNULL(24576), QNULL(28672), QNULL(32768), \
|
||||
QNULL(40960), QNULL(49152), QNULL(57344), QNULL(65536), QNULL(81920), QNULL(98304), QNULL(114688), \
|
||||
QNULL(MI_LARGE_WSIZE_MAX + 1 /*131072, Huge queue */), \
|
||||
QNULL( 1), QNULL( 2), QNULL( 3), QNULL( 4), QNULL( 5), QNULL( 6), QNULL( 7), QNULL( 8), /* 8 */ \
|
||||
QNULL( 11), QNULL( 15), QNULL( 23), QNULL( 31), QNULL( 47), QNULL( 63), QNULL( 95), QNULL( 127), /* 16 */ \
|
||||
QNULL( 191), QNULL( 255), QNULL( 383), QNULL( 511), QNULL( 767), QNULL( 1023), QNULL( 1535), QNULL( 2047), /* 24 */ \
|
||||
QNULL( 3071), QNULL( 4095), QNULL( 6143), QNULL( 8191), QNULL( 12287), QNULL( 16383), QNULL( 24575), QNULL( 32767), /* 32 */ \
|
||||
QNULL( 49151), QNULL( 65535), QNULL( 98303), QNULL(131071), QNULL(196607), QNULL(262143), QNULL(393215), /* 39 */ \
|
||||
QNULL(MI_LARGE_WSIZE_MAX + 1 /* 524287, Huge queue */), \
|
||||
QNULL(MI_LARGE_WSIZE_MAX + 2) /* Full queue */ }
|
||||
#else
|
||||
#define MI_PAGE_QUEUES_EMPTY \
|
||||
{ QNULL(1), \
|
||||
QNULL( 1), QNULL( 2), QNULL( 3), QNULL( 4), QNULL( 5), QNULL( 6), QNULL( 7), QNULL( 8), /* 8 */ \
|
||||
QNULL( 10), QNULL( 12), QNULL( 14), QNULL( 16), QNULL( 20), QNULL( 24), QNULL( 28), QNULL( 32), /* 16 */ \
|
||||
QNULL( 40), QNULL( 48), QNULL( 56), QNULL( 64), QNULL( 80), QNULL( 96), QNULL( 112), QNULL( 128), /* 24 */ \
|
||||
QNULL( 160), QNULL( 192), QNULL( 224), QNULL( 256), QNULL( 320), QNULL( 384), QNULL( 448), QNULL( 512), /* 32 */ \
|
||||
QNULL( 640), QNULL( 768), QNULL( 896), QNULL( 1024), QNULL( 1280), QNULL( 1536), QNULL( 1792), QNULL( 2048), /* 40 */ \
|
||||
QNULL( 2560), QNULL( 3072), QNULL( 3584), QNULL( 4096), QNULL( 5120), QNULL( 6144), QNULL( 7168), QNULL( 8192), /* 48 */ \
|
||||
QNULL( 10240), QNULL( 12288), QNULL( 14336), QNULL( 16384), QNULL( 20480), QNULL( 24576), QNULL( 28672), QNULL( 32768), /* 56 */ \
|
||||
QNULL( 40960), QNULL( 49152), QNULL( 57344), QNULL( 65536), QNULL( 81920), QNULL( 98304), QNULL(114688), QNULL(131072), /* 64 */ \
|
||||
QNULL(163840), QNULL(196608), QNULL(229376), QNULL(262144), QNULL(327680), /* 69 */ \
|
||||
QNULL(MI_LARGE_WSIZE_MAX + 1 /* 393216, Huge queue */), \
|
||||
QNULL(MI_LARGE_WSIZE_MAX + 2) /* Full queue */ }
|
||||
#endif
|
||||
|
||||
#define MI_STAT_COUNT_NULL() {0,0,0,0}
|
||||
|
||||
// Empty statistics
|
||||
#if MI_STAT>1
|
||||
#define MI_STAT_COUNT_END_NULL() , { MI_STAT_COUNT_NULL(), MI_INIT64(MI_STAT_COUNT_NULL) }
|
||||
#define MI_STAT_COUNT_END_NULL() , { MI_STAT_COUNT_NULL(), MI_INIT32(MI_STAT_COUNT_NULL) }
|
||||
#else
|
||||
#define MI_STAT_COUNT_END_NULL()
|
||||
#endif
|
||||
|
@ -97,8 +110,8 @@ static mi_tld_t tld_main = {
|
|||
0,
|
||||
&_mi_heap_main,
|
||||
{ { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats }, // segments
|
||||
{ 0, NULL, NULL, 0, tld_main_stats }, // os
|
||||
{ MI_STATS_NULL } // stats
|
||||
{ 0, NULL, NULL, 0, tld_main_stats }, // os
|
||||
{ MI_STATS_NULL } // stats
|
||||
};
|
||||
|
||||
mi_heap_t _mi_heap_main = {
|
||||
|
|
|
@ -97,7 +97,7 @@ uint8_t _mi_bsr(uintptr_t x) {
|
|||
// Returns MI_BIN_HUGE if the size is too large.
|
||||
// We use `wsize` for the size in "machine word sizes",
|
||||
// i.e. byte size == `wsize*sizeof(void*)`.
|
||||
inline uint8_t _mi_bin(size_t size) {
|
||||
extern inline uint8_t _mi_bin(size_t size) {
|
||||
size_t wsize = _mi_wsize_from_size(size);
|
||||
uint8_t bin;
|
||||
if (wsize <= 1) {
|
||||
|
@ -120,16 +120,21 @@ inline uint8_t _mi_bin(size_t size) {
|
|||
bin = MI_BIN_HUGE;
|
||||
}
|
||||
else {
|
||||
#if defined(MI_ALIGN4W)
|
||||
#if defined(MI_ALIGN4W)
|
||||
if (wsize <= 16) { wsize = (wsize+3)&~3; } // round to 4x word sizes
|
||||
#endif
|
||||
#ifdef MI_BIN4
|
||||
uint8_t b = mi_bsr32((uint32_t)wsize);
|
||||
bin = ((b << 1) + (uint8_t)((wsize >> (b - 1)) & 0x01)) + 3;
|
||||
#else
|
||||
wsize--;
|
||||
// find the highest bit
|
||||
uint8_t b = mi_bsr32((uint32_t)wsize);
|
||||
// and use the top 3 bits to determine the bin (~16% worst internal fragmentation).
|
||||
// and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation).
|
||||
// - adjust with 3 because we use do not round the first 8 sizes
|
||||
// which each get an exact bin
|
||||
bin = ((b << 2) + (uint8_t)((wsize >> (b - 2)) & 0x03)) - 3;
|
||||
#endif
|
||||
}
|
||||
mi_assert_internal(bin > 0 && bin <= MI_BIN_HUGE);
|
||||
return bin;
|
||||
|
|
|
@ -385,7 +385,7 @@ void _mi_page_retire(mi_page_t* page) {
|
|||
// is the only page left with free blocks. It is not clear
|
||||
// how to check this efficiently though... for now we just check
|
||||
// if its neighbours are almost fully used.
|
||||
if (mi_likely(page->block_size <= MI_SMALL_SIZE_MAX)) {
|
||||
if (mi_likely(page->block_size <= MI_MEDIUM_SIZE_MAX)) {
|
||||
if (mi_page_mostly_used(page->prev) && mi_page_mostly_used(page->next)) {
|
||||
_mi_stat_counter_increase(&_mi_stats_main.page_no_retire,1);
|
||||
return; // dont't retire after all
|
||||
|
@ -722,10 +722,10 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept
|
|||
|
||||
// call potential deferred free routines
|
||||
_mi_deferred_free(heap, false);
|
||||
|
||||
|
||||
// free delayed frees from other threads
|
||||
_mi_heap_delayed_free(heap);
|
||||
|
||||
|
||||
// huge allocation?
|
||||
mi_page_t* page;
|
||||
if (mi_unlikely(size > MI_LARGE_SIZE_MAX)) {
|
||||
|
|
|
@ -236,8 +236,8 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se
|
|||
|
||||
|
||||
// The thread local segment cache is limited to be at most 1/8 of the peak size of segments in use,
|
||||
// and no more than 2.
|
||||
#define MI_SEGMENT_CACHE_MAX (2)
|
||||
// and no more than 4.
|
||||
#define MI_SEGMENT_CACHE_MAX (4)
|
||||
#define MI_SEGMENT_CACHE_FRACTION (8)
|
||||
|
||||
// note: returned segment may be partially reset
|
||||
|
@ -708,16 +708,20 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld
|
|||
/* -----------------------------------------------------------
|
||||
Page allocation and free
|
||||
----------------------------------------------------------- */
|
||||
static bool mi_is_good_fit(size_t bsize, size_t size) {
|
||||
// good fit if no more than 25% wasted
|
||||
return (bsize > 0 && size > 0 && bsize < size && (size - (size % bsize)) < (size/4));
|
||||
}
|
||||
|
||||
mi_page_t* _mi_segment_page_alloc(size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) {
|
||||
mi_page_t* page;
|
||||
if (block_size <= (MI_SMALL_PAGE_SIZE/4)) {
|
||||
if (block_size <= MI_SMALL_SIZE_MAX || mi_is_good_fit(block_size,MI_SMALL_PAGE_SIZE)) {
|
||||
page = mi_segment_small_page_alloc(tld,os_tld);
|
||||
}
|
||||
else if (block_size <= (MI_MEDIUM_PAGE_SIZE/4)) {
|
||||
else if (block_size <= MI_MEDIUM_SIZE_MAX || mi_is_good_fit(block_size, MI_MEDIUM_PAGE_SIZE)) {
|
||||
page = mi_segment_medium_page_alloc(tld, os_tld);
|
||||
}
|
||||
else if (block_size < (MI_LARGE_SIZE_MAX - sizeof(mi_segment_t))) {
|
||||
else if (block_size < MI_LARGE_SIZE_MAX || mi_is_good_fit(block_size, MI_LARGE_PAGE_SIZE - sizeof(mi_segment_t))) {
|
||||
page = mi_segment_large_page_alloc(tld, os_tld);
|
||||
}
|
||||
else {
|
||||
|
|
|
@ -6,8 +6,154 @@
|
|||
#include <mimalloc.h>
|
||||
#include <mimalloc-override.h> // redefines malloc etc.
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#define MI_INTPTR_SIZE 8
|
||||
#define MI_LARGE_WSIZE_MAX (4*1024*1024 / MI_INTPTR_SIZE)
|
||||
|
||||
#define MI_BIN_HUGE 100
|
||||
//#define MI_ALIGN2W
|
||||
|
||||
// Bit scan reverse: return the index of the highest bit.
|
||||
static inline uint8_t mi_bsr32(uint32_t x);
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#include <windows.h>
|
||||
#include <intrin.h>
|
||||
static inline uint8_t mi_bsr32(uint32_t x) {
|
||||
uint32_t idx;
|
||||
_BitScanReverse((DWORD*)&idx, x);
|
||||
return idx;
|
||||
}
|
||||
#elif defined(__GNUC__) || defined(__clang__)
|
||||
static inline uint8_t mi_bsr32(uint32_t x) {
|
||||
return (31 - __builtin_clz(x));
|
||||
}
|
||||
#else
|
||||
static inline uint8_t mi_bsr32(uint32_t x) {
|
||||
// de Bruijn multiplication, see <http://supertech.csail.mit.edu/papers/debruijn.pdf>
|
||||
static const uint8_t debruijn[32] = {
|
||||
31, 0, 22, 1, 28, 23, 18, 2, 29, 26, 24, 10, 19, 7, 3, 12,
|
||||
30, 21, 27, 17, 25, 9, 6, 11, 20, 16, 8, 5, 15, 4, 14, 13,
|
||||
};
|
||||
x |= x >> 1;
|
||||
x |= x >> 2;
|
||||
x |= x >> 4;
|
||||
x |= x >> 8;
|
||||
x |= x >> 16;
|
||||
x++;
|
||||
return debruijn[(x*0x076be629) >> 27];
|
||||
}
|
||||
#endif
|
||||
|
||||
// Bit scan reverse: return the index of the highest bit.
|
||||
uint8_t _mi_bsr(uintptr_t x) {
|
||||
if (x == 0) return 0;
|
||||
#if MI_INTPTR_SIZE==8
|
||||
uint32_t hi = (x >> 32);
|
||||
return (hi == 0 ? mi_bsr32((uint32_t)x) : 32 + mi_bsr32(hi));
|
||||
#elif MI_INTPTR_SIZE==4
|
||||
return mi_bsr32(x);
|
||||
#else
|
||||
# error "define bsr for non-32 or 64-bit platforms"
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline size_t _mi_wsize_from_size(size_t size) {
|
||||
return (size + sizeof(uintptr_t) - 1) / sizeof(uintptr_t);
|
||||
}
|
||||
|
||||
// Return the bin for a given field size.
|
||||
// Returns MI_BIN_HUGE if the size is too large.
|
||||
// We use `wsize` for the size in "machine word sizes",
|
||||
// i.e. byte size == `wsize*sizeof(void*)`.
|
||||
extern inline uint8_t _mi_bin8(size_t size) {
|
||||
size_t wsize = _mi_wsize_from_size(size);
|
||||
uint8_t bin;
|
||||
if (wsize <= 1) {
|
||||
bin = 1;
|
||||
}
|
||||
#if defined(MI_ALIGN4W)
|
||||
else if (wsize <= 4) {
|
||||
bin = (uint8_t)((wsize+1)&~1); // round to double word sizes
|
||||
}
|
||||
#elif defined(MI_ALIGN2W)
|
||||
else if (wsize <= 8) {
|
||||
bin = (uint8_t)((wsize+1)&~1); // round to double word sizes
|
||||
}
|
||||
#else
|
||||
else if (wsize <= 8) {
|
||||
bin = (uint8_t)wsize;
|
||||
}
|
||||
#endif
|
||||
else if (wsize > MI_LARGE_WSIZE_MAX) {
|
||||
bin = MI_BIN_HUGE;
|
||||
}
|
||||
else {
|
||||
#if defined(MI_ALIGN4W)
|
||||
if (wsize <= 16) { wsize = (wsize+3)&~3; } // round to 4x word sizes
|
||||
#endif
|
||||
wsize--;
|
||||
// find the highest bit
|
||||
uint8_t b = mi_bsr32((uint32_t)wsize);
|
||||
// and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation).
|
||||
// - adjust with 3 because we use do not round the first 8 sizes
|
||||
// which each get an exact bin
|
||||
bin = ((b << 2) + (uint8_t)((wsize >> (b - 2)) & 0x03)) - 3;
|
||||
}
|
||||
return bin;
|
||||
}
|
||||
|
||||
extern inline uint8_t _mi_bin4(size_t size) {
|
||||
size_t wsize = _mi_wsize_from_size(size);
|
||||
uint8_t bin;
|
||||
if (wsize <= 1) {
|
||||
bin = 1;
|
||||
}
|
||||
#if defined(MI_ALIGN4W)
|
||||
else if (wsize <= 4) {
|
||||
bin = (uint8_t)((wsize+1)&~1); // round to double word sizes
|
||||
}
|
||||
#elif defined(MI_ALIGN2W)
|
||||
else if (wsize <= 8) {
|
||||
bin = (uint8_t)((wsize+1)&~1); // round to double word sizes
|
||||
}
|
||||
#else
|
||||
else if (wsize <= 8) {
|
||||
bin = (uint8_t)wsize;
|
||||
}
|
||||
#endif
|
||||
else if (wsize > MI_LARGE_WSIZE_MAX) {
|
||||
bin = MI_BIN_HUGE;
|
||||
}
|
||||
else {
|
||||
uint8_t b = mi_bsr32((uint32_t)wsize);
|
||||
bin = ((b << 1) + (uint8_t)((wsize >> (b - 1)) & 0x01)) + 3;
|
||||
}
|
||||
return bin;
|
||||
}
|
||||
|
||||
void mi_bins() {
|
||||
printf(" QNULL(1), /* 0 */ \\\n ");
|
||||
size_t last_bin = 0;
|
||||
for (size_t size = 1; size < (MI_INTPTR_SIZE*MI_LARGE_WSIZE_MAX); size++) {
|
||||
size_t bin = _mi_bin4(size);
|
||||
if (bin != last_bin) {
|
||||
size_t wsize = (size-1)/sizeof(intptr_t);
|
||||
// printf("size: %6zd, wsize: %6d, bin: %6zd\n", size - 1, (size-1)/sizeof(intptr_t), last_bin);
|
||||
printf("QNULL(%6zd), ", wsize);
|
||||
if (last_bin%8 == 0) printf("/* %i */ \\\n ", last_bin);
|
||||
last_bin = bin;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
int main() {
|
||||
mi_version();
|
||||
mi_bins();
|
||||
void* p1 = malloc(78);
|
||||
void* p2 = malloc(24);
|
||||
free(p1);
|
||||
|
|
Loading…
Add table
Reference in a new issue