From 47f8caad4db06314d080b798f29b91e25dd51e76 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 5 Feb 2022 17:23:28 -0800 Subject: [PATCH 1/4] improve commit chunk alignment --- src/segment.c | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/src/segment.c b/src/segment.c index 0970046d..b000e641 100644 --- a/src/segment.c +++ b/src/segment.c @@ -466,28 +466,35 @@ static void mi_segment_commit_mask(mi_segment_t* segment, bool conservative, uin mi_assert_internal(segment->kind != MI_SEGMENT_HUGE); mi_commit_mask_create_empty(cm); if (size == 0 || size > MI_SEGMENT_SIZE || segment->kind == MI_SEGMENT_HUGE) return; + const size_t segstart = mi_segment_info_size(segment); const size_t segsize = mi_segment_size(segment); if (p >= (uint8_t*)segment + segsize) return; - size_t diff = (p - (uint8_t*)segment); - mi_assert_internal(diff + size <= segsize); + size_t pstart = (p - (uint8_t*)segment); + mi_assert_internal(pstart + size <= segsize); size_t start; size_t end; if (conservative) { // decommit conservative - start = _mi_align_up(diff, MI_COMMIT_SIZE); - end = _mi_align_down(diff + size, MI_COMMIT_SIZE); + start = _mi_align_up(pstart, MI_COMMIT_SIZE); + end = _mi_align_down(pstart + size, MI_COMMIT_SIZE); + mi_assert_internal(start >= segstart); + mi_assert_internal(end <= segsize); } else { // commit liberal - start = _mi_align_down(diff, MI_COMMIT_SIZE); - end = _mi_align_up(diff + size, MI_MINIMAL_COMMIT_SIZE); + start = _mi_align_down(pstart, MI_MINIMAL_COMMIT_SIZE); + end = _mi_align_up(pstart + size, MI_MINIMAL_COMMIT_SIZE); + } + if (start < segstart) { + start = segstart; } if (end > segsize) { end = segsize; } + mi_assert_internal(start <= pstart && (pstart + size) <= end); mi_assert_internal(start % MI_COMMIT_SIZE==0 && end % MI_COMMIT_SIZE == 0); *start_p = (uint8_t*)segment + start; *full_size = (end > start ? end - start : 0); @@ -504,14 +511,19 @@ static void mi_segment_commit_mask(mi_segment_t* segment, bool conservative, uin mi_commit_mask_create(bitidx, bitcount, cm); } -#define MI_COMMIT_SIZE_BATCH MiB static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, size_t size, mi_stats_t* stats) { mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask)); - //if (commit && size < MI_COMMIT_SIZE_BATCH && p + MI_COMMIT_SIZE_BATCH <= mi_segment_end(segment)) { - // size = MI_COMMIT_SIZE_BATCH; - // } + // try to commit in at least MI_MINIMAL_COMMIT_SIZE sizes. + /* + if (commit && size > 0) { + const size_t csize = _mi_align_up(size, MI_MINIMAL_COMMIT_SIZE); + if (p + csize <= mi_segment_end(segment)) { + size = csize; + } + } + */ // commit liberal, but decommit conservative uint8_t* start = NULL; size_t full_size = 0; @@ -569,13 +581,13 @@ static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_ if (mi_commit_mask_is_empty(&mask) || full_size==0) return; // update delayed commit + mi_assert_internal(segment->decommit_expire > 0 || mi_commit_mask_is_empty(&segment->decommit_mask)); mi_commit_mask_t cmask; mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask); // only decommit what is committed; span_free may try to decommit more mi_commit_mask_set(&segment->decommit_mask, &cmask); mi_msecs_t now = _mi_clock_now(); if (segment->decommit_expire == 0) { // no previous decommits, initialize now - mi_assert_internal(mi_commit_mask_is_empty(&segment->decommit_mask)); segment->decommit_expire = now + mi_option_get(mi_option_decommit_delay); } else if (segment->decommit_expire <= now) { @@ -609,7 +621,8 @@ static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_st mi_segment_commitx(segment, false, p, size, stats); } } - mi_commit_mask_foreach_end() + mi_commit_mask_foreach_end() + mi_assert_internal(mi_commit_mask_is_empty(&segment->decommit_mask)); } @@ -893,6 +906,7 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ mi_assert_internal(mi_commit_mask_is_empty(&decommit_mask)); segment->decommit_expire = 0; mi_commit_mask_create_empty( &segment->decommit_mask ); + mi_assert_internal(mi_commit_mask_is_empty(&segment->decommit_mask)); } } From f2b6938d64d555f2053612da2e84fcb128bd9116 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 5 Feb 2022 17:36:14 -0800 Subject: [PATCH 2/4] fix start adjustment for the commit mask --- src/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/segment.c b/src/segment.c index b000e641..c4cf9875 100644 --- a/src/segment.c +++ b/src/segment.c @@ -487,7 +487,7 @@ static void mi_segment_commit_mask(mi_segment_t* segment, bool conservative, uin start = _mi_align_down(pstart, MI_MINIMAL_COMMIT_SIZE); end = _mi_align_up(pstart + size, MI_MINIMAL_COMMIT_SIZE); } - if (start < segstart) { + if (pstart >= segstart && start < segstart) { // note: the mask is also calculated for an initial commit of the info area start = segstart; } if (end > segsize) { From e87b1d2298313f2ec47da0d76dbfc195742126fc Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 10 Feb 2022 11:08:13 -0800 Subject: [PATCH 3/4] add extra huge allocation test --- test/main-override-static.c | 351 +++++++++++++++++++----------------- 1 file changed, 182 insertions(+), 169 deletions(-) diff --git a/test/main-override-static.c b/test/main-override-static.c index afb9131e..07116503 100644 --- a/test/main-override-static.c +++ b/test/main-override-static.c @@ -8,172 +8,6 @@ #include // redefines malloc etc. -#include -#include - -#define MI_INTPTR_SIZE 8 -#define MI_LARGE_WSIZE_MAX (4*1024*1024 / MI_INTPTR_SIZE) - -#define MI_BIN_HUGE 100 -//#define MI_ALIGN2W - -// Bit scan reverse: return the index of the highest bit. -static inline uint8_t mi_bsr32(uint32_t x); - -#if defined(_MSC_VER) -#include -#include -static inline uint8_t mi_bsr32(uint32_t x) { - uint32_t idx; - _BitScanReverse((DWORD*)&idx, x); - return idx; -} -#elif defined(__GNUC__) || defined(__clang__) -static inline uint8_t mi_bsr32(uint32_t x) { - return (31 - __builtin_clz(x)); -} -#else -static inline uint8_t mi_bsr32(uint32_t x) { - // de Bruijn multiplication, see - static const uint8_t debruijn[32] = { - 31, 0, 22, 1, 28, 23, 18, 2, 29, 26, 24, 10, 19, 7, 3, 12, - 30, 21, 27, 17, 25, 9, 6, 11, 20, 16, 8, 5, 15, 4, 14, 13, - }; - x |= x >> 1; - x |= x >> 2; - x |= x >> 4; - x |= x >> 8; - x |= x >> 16; - x++; - return debruijn[(x*0x076be629) >> 27]; -} -#endif - -/* -// Bit scan reverse: return the index of the highest bit. -uint8_t _mi_bsr(uintptr_t x) { - if (x == 0) return 0; - #if MI_INTPTR_SIZE==8 - uint32_t hi = (x >> 32); - return (hi == 0 ? mi_bsr32((uint32_t)x) : 32 + mi_bsr32(hi)); - #elif MI_INTPTR_SIZE==4 - return mi_bsr32(x); - #else - # error "define bsr for non-32 or 64-bit platforms" - #endif -} -*/ - - -static inline size_t _mi_wsize_from_size(size_t size) { - return (size + sizeof(uintptr_t) - 1) / sizeof(uintptr_t); -} - -// Return the bin for a given field size. -// Returns MI_BIN_HUGE if the size is too large. -// We use `wsize` for the size in "machine word sizes", -// i.e. byte size == `wsize*sizeof(void*)`. -extern inline uint8_t _mi_bin8(size_t size) { - size_t wsize = _mi_wsize_from_size(size); - uint8_t bin; - if (wsize <= 1) { - bin = 1; - } - #if defined(MI_ALIGN4W) - else if (wsize <= 4) { - bin = (uint8_t)((wsize+1)&~1); // round to double word sizes - } - #elif defined(MI_ALIGN2W) - else if (wsize <= 8) { - bin = (uint8_t)((wsize+1)&~1); // round to double word sizes - } - #else - else if (wsize <= 8) { - bin = (uint8_t)wsize; - } - #endif - else if (wsize > MI_LARGE_WSIZE_MAX) { - bin = MI_BIN_HUGE; - } - else { - #if defined(MI_ALIGN4W) - if (wsize <= 16) { wsize = (wsize+3)&~3; } // round to 4x word sizes - #endif - wsize--; - // find the highest bit - uint8_t b = mi_bsr32((uint32_t)wsize); - // and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation). - // - adjust with 3 because we use do not round the first 8 sizes - // which each get an exact bin - bin = ((b << 2) + (uint8_t)((wsize >> (b - 2)) & 0x03)) - 3; - } - return bin; -} - -extern inline uint8_t _mi_bin4(size_t size) { - size_t wsize = _mi_wsize_from_size(size); - uint8_t bin; - if (wsize <= 1) { - bin = 1; - } - #if defined(MI_ALIGN4W) - else if (wsize <= 4) { - bin = (uint8_t)((wsize+1)&~1); // round to double word sizes - } - #elif defined(MI_ALIGN2W) - else if (wsize <= 8) { - bin = (uint8_t)((wsize+1)&~1); // round to double word sizes - } - #else - else if (wsize <= 8) { - bin = (uint8_t)wsize; - } - #endif - else if (wsize > MI_LARGE_WSIZE_MAX) { - bin = MI_BIN_HUGE; - } - else { - uint8_t b = mi_bsr32((uint32_t)wsize); - bin = ((b << 1) + (uint8_t)((wsize >> (b - 1)) & 0x01)) + 3; - } - return bin; -} - -size_t _mi_binx4(size_t bsize) { - if (bsize==0) return 0; - uint8_t b = mi_bsr32((uint32_t)bsize); - if (b <= 1) return bsize; - size_t bin = ((b << 1) | (bsize >> (b - 1))&0x01); - return bin; -} - -size_t _mi_binx8(size_t bsize) { - if (bsize<=1) return bsize; - uint8_t b = mi_bsr32((uint32_t)bsize); - if (b <= 2) return bsize; - size_t bin = ((b << 2) | (bsize >> (b - 2))&0x03) - 5; - return bin; -} - -void mi_bins() { - //printf(" QNULL(1), /* 0 */ \\\n "); - size_t last_bin = 0; - size_t min_bsize = 0; - size_t last_bsize = 0; - for (size_t bsize = 1; bsize < 2*1024; bsize++) { - size_t size = bsize * 64 * 1024; - size_t bin = _mi_binx8(bsize); - if (bin != last_bin) { - printf("min bsize: %6zd, max bsize: %6zd, bin: %6zd\n", min_bsize, last_bsize, last_bin); - //printf("QNULL(%6zd), ", wsize); - //if (last_bin%8 == 0) printf("/* %i */ \\\n ", last_bin); - last_bin = bin; - min_bsize = bsize; - } - last_bsize = bsize; - } -} - static void double_free1(); static void double_free2(); static void corrupt_free(); @@ -183,7 +17,7 @@ static void test_aslr(void); static void test_process_info(void); static void test_reserved(void); static void negative_stat(void); - +static void alloc_huge(void); int main() { mi_version(); @@ -197,6 +31,7 @@ int main() { // invalid_free(); // test_reserved(); // negative_stat(); + alloc_huge(); void* p1 = malloc(78); void* p2 = malloc(24); @@ -210,7 +45,7 @@ int main() { free(p1); free(p2); free(s); - + /* now test if override worked by allocating/freeing across the api's*/ //p1 = mi_malloc(32); //free(p1); @@ -347,4 +182,182 @@ static void negative_stat(void) { *p = 100; mi_free(p); mi_stats_print_out(NULL, NULL); -} \ No newline at end of file +} + +static void alloc_huge(void) { + void* p = mi_malloc(67108872); + mi_free(p); +} + + +// ---------------------------- +// bin size experiments +// ------------------------------ + +#if 0 +#include +#include + +#define MI_INTPTR_SIZE 8 +#define MI_LARGE_WSIZE_MAX (4*1024*1024 / MI_INTPTR_SIZE) + +#define MI_BIN_HUGE 100 +//#define MI_ALIGN2W + +// Bit scan reverse: return the index of the highest bit. +static inline uint8_t mi_bsr32(uint32_t x); + +#if defined(_MSC_VER) +#include +#include +static inline uint8_t mi_bsr32(uint32_t x) { + uint32_t idx; + _BitScanReverse((DWORD*)&idx, x); + return idx; +} +#elif defined(__GNUC__) || defined(__clang__) +static inline uint8_t mi_bsr32(uint32_t x) { + return (31 - __builtin_clz(x)); +} +#else +static inline uint8_t mi_bsr32(uint32_t x) { + // de Bruijn multiplication, see + static const uint8_t debruijn[32] = { + 31, 0, 22, 1, 28, 23, 18, 2, 29, 26, 24, 10, 19, 7, 3, 12, + 30, 21, 27, 17, 25, 9, 6, 11, 20, 16, 8, 5, 15, 4, 14, 13, + }; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + x++; + return debruijn[(x*0x076be629) >> 27]; +} +#endif + +/* +// Bit scan reverse: return the index of the highest bit. +uint8_t _mi_bsr(uintptr_t x) { + if (x == 0) return 0; + #if MI_INTPTR_SIZE==8 + uint32_t hi = (x >> 32); + return (hi == 0 ? mi_bsr32((uint32_t)x) : 32 + mi_bsr32(hi)); + #elif MI_INTPTR_SIZE==4 + return mi_bsr32(x); + #else + # error "define bsr for non-32 or 64-bit platforms" + #endif +} +*/ + + +static inline size_t _mi_wsize_from_size(size_t size) { + return (size + sizeof(uintptr_t) - 1) / sizeof(uintptr_t); +} + +// Return the bin for a given field size. +// Returns MI_BIN_HUGE if the size is too large. +// We use `wsize` for the size in "machine word sizes", +// i.e. byte size == `wsize*sizeof(void*)`. +extern inline uint8_t _mi_bin8(size_t size) { + size_t wsize = _mi_wsize_from_size(size); + uint8_t bin; + if (wsize <= 1) { + bin = 1; + } +#if defined(MI_ALIGN4W) + else if (wsize <= 4) { + bin = (uint8_t)((wsize+1)&~1); // round to double word sizes + } +#elif defined(MI_ALIGN2W) + else if (wsize <= 8) { + bin = (uint8_t)((wsize+1)&~1); // round to double word sizes + } +#else + else if (wsize <= 8) { + bin = (uint8_t)wsize; + } +#endif + else if (wsize > MI_LARGE_WSIZE_MAX) { + bin = MI_BIN_HUGE; + } + else { +#if defined(MI_ALIGN4W) + if (wsize <= 16) { wsize = (wsize+3)&~3; } // round to 4x word sizes +#endif + wsize--; + // find the highest bit + uint8_t b = mi_bsr32((uint32_t)wsize); + // and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation). + // - adjust with 3 because we use do not round the first 8 sizes + // which each get an exact bin + bin = ((b << 2) + (uint8_t)((wsize >> (b - 2)) & 0x03)) - 3; + } + return bin; +} + +static inline uint8_t _mi_bin4(size_t size) { + size_t wsize = _mi_wsize_from_size(size); + uint8_t bin; + if (wsize <= 1) { + bin = 1; + } +#if defined(MI_ALIGN4W) + else if (wsize <= 4) { + bin = (uint8_t)((wsize+1)&~1); // round to double word sizes + } +#elif defined(MI_ALIGN2W) + else if (wsize <= 8) { + bin = (uint8_t)((wsize+1)&~1); // round to double word sizes + } +#else + else if (wsize <= 8) { + bin = (uint8_t)wsize; + } +#endif + else if (wsize > MI_LARGE_WSIZE_MAX) { + bin = MI_BIN_HUGE; + } + else { + uint8_t b = mi_bsr32((uint32_t)wsize); + bin = ((b << 1) + (uint8_t)((wsize >> (b - 1)) & 0x01)) + 3; + } + return bin; +} + +static size_t _mi_binx4(size_t bsize) { + if (bsize==0) return 0; + uint8_t b = mi_bsr32((uint32_t)bsize); + if (b <= 1) return bsize; + size_t bin = ((b << 1) | (bsize >> (b - 1))&0x01); + return bin; +} + +static size_t _mi_binx8(size_t bsize) { + if (bsize<=1) return bsize; + uint8_t b = mi_bsr32((uint32_t)bsize); + if (b <= 2) return bsize; + size_t bin = ((b << 2) | (bsize >> (b - 2))&0x03) - 5; + return bin; +} + +static void mi_bins(void) { + //printf(" QNULL(1), /* 0 */ \\\n "); + size_t last_bin = 0; + size_t min_bsize = 0; + size_t last_bsize = 0; + for (size_t bsize = 1; bsize < 2*1024; bsize++) { + size_t size = bsize * 64 * 1024; + size_t bin = _mi_binx8(bsize); + if (bin != last_bin) { + printf("min bsize: %6zd, max bsize: %6zd, bin: %6zd\n", min_bsize, last_bsize, last_bin); + //printf("QNULL(%6zd), ", wsize); + //if (last_bin%8 == 0) printf("/* %i */ \\\n ", last_bin); + last_bin = bin; + min_bsize = bsize; + } + last_bsize = bsize; + } +} +#endif \ No newline at end of file From 96008c55d0add668dbb09d135f6ca18a2f6a322e Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 10 Feb 2022 11:57:30 -0800 Subject: [PATCH 4/4] fix ubsan warning on huge allocations (issue #543) --- src/segment.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/segment.c b/src/segment.c index c4cf9875..8d3eebe5 100644 --- a/src/segment.c +++ b/src/segment.c @@ -762,7 +762,8 @@ static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_i } // and also for the last one (if not set already) (the last one is needed for coalescing) - mi_slice_t* last = &segment->slices[slice_index + slice_count - 1]; + // note: the cast is needed for ubsan since the index can be larger than MI_SLICES_PER_SEGMENT for huge allocations (see #543) + mi_slice_t* last = &((mi_slice_t*)segment->slices)[slice_index + slice_count - 1]; if (last < mi_segment_slices_end(segment) && last >= slice) { last->slice_offset = (uint32_t)(sizeof(mi_slice_t)*(slice_count-1)); last->slice_count = 0;