add start offset to pages to reduce cache/page effects

This commit is contained in:
Daan Leijen 2021-10-02 11:13:00 -07:00
parent 262022c1d1
commit e6b58052da
4 changed files with 15 additions and 9 deletions

View file

@ -145,6 +145,10 @@ terms of the MIT license. A copy of the license can be found in the file
// Used as a special value to encode block sizes in 32 bits. // Used as a special value to encode block sizes in 32 bits.
#define MI_HUGE_BLOCK_SIZE ((uint32_t)MI_HUGE_OBJ_SIZE_MAX) #define MI_HUGE_BLOCK_SIZE ((uint32_t)MI_HUGE_OBJ_SIZE_MAX)
// blocks up to this size are always allocated aligned
#define MI_MAX_ALIGN_GUARANTEE (8*MI_MAX_ALIGN_SIZE)
// The free lists use encoded next fields // The free lists use encoded next fields
// (Only actually encodes when MI_ENCODED_FREELIST is defined.) // (Only actually encodes when MI_ENCODED_FREELIST is defined.)

View file

@ -41,7 +41,7 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t
} }
// use regular allocation if it is guaranteed to fit the alignment constraints // use regular allocation if it is guaranteed to fit the alignment constraints
if (offset==0 && alignment<=padsize && padsize<=MI_MEDIUM_OBJ_SIZE_MAX && (padsize&align_mask)==0) { if (offset==0 && alignment<=padsize && padsize<=MI_MAX_ALIGN_GUARANTEE && (padsize&align_mask)==0) {
void* p = _mi_heap_malloc_zero(heap, size, zero); void* p = _mi_heap_malloc_zero(heap, size, zero);
mi_assert_internal(p == NULL || ((uintptr_t)p % alignment) == 0); mi_assert_internal(p == NULL || ((uintptr_t)p % alignment) == 0);
return p; return p;

View file

@ -593,7 +593,7 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld)
// ensure we don't touch memory beyond the page to reduce page commit. // ensure we don't touch memory beyond the page to reduce page commit.
// the `lean` benchmark tests this. Going from 1 to 8 increases rss by 50%. // the `lean` benchmark tests this. Going from 1 to 8 increases rss by 50%.
extend = (max_extend==0 ? 1 : max_extend); extend = (max_extend==0 ? 1 : max_extend);
} }
mi_assert_internal(extend > 0 && extend + page->capacity <= page->reserved); mi_assert_internal(extend > 0 && extend + page->capacity <= page->reserved);
mi_assert_internal(extend < (1UL<<16)); mi_assert_internal(extend < (1UL<<16));
@ -624,9 +624,9 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
mi_assert_internal(block_size > 0); mi_assert_internal(block_size > 0);
// set fields // set fields
mi_page_set_heap(page, heap); mi_page_set_heap(page, heap);
page->xblock_size = (block_size < MI_HUGE_BLOCK_SIZE ? (uint32_t)block_size : MI_HUGE_BLOCK_SIZE); // initialize before _mi_segment_page_start
size_t page_size; size_t page_size;
_mi_segment_page_start(segment, page, &page_size); _mi_segment_page_start(segment, page, &page_size);
page->xblock_size = (block_size < MI_HUGE_BLOCK_SIZE ? (uint32_t)block_size : MI_HUGE_BLOCK_SIZE);
mi_assert_internal(mi_page_block_size(page) <= page_size); mi_assert_internal(mi_page_block_size(page) <= page_size);
mi_assert_internal(page_size <= page->slice_count*MI_SEGMENT_SLICE_SIZE); mi_assert_internal(page_size <= page->slice_count*MI_SEGMENT_SLICE_SIZE);
mi_assert_internal(page_size / block_size < (1L<<16)); mi_assert_internal(page_size / block_size < (1L<<16));

View file

@ -184,20 +184,22 @@ static size_t mi_segment_info_size(mi_segment_t* segment) {
return segment->segment_info_slices * MI_SEGMENT_SLICE_SIZE; return segment->segment_info_slices * MI_SEGMENT_SLICE_SIZE;
} }
static uint8_t* _mi_segment_page_start_from_slice(const mi_segment_t* segment, const mi_slice_t* slice, size_t* page_size) static uint8_t* _mi_segment_page_start_from_slice(const mi_segment_t* segment, const mi_slice_t* slice, size_t xblock_size, size_t* page_size)
{ {
ptrdiff_t idx = slice - segment->slices; ptrdiff_t idx = slice - segment->slices;
size_t psize = slice->slice_count*MI_SEGMENT_SLICE_SIZE; size_t psize = slice->slice_count*MI_SEGMENT_SLICE_SIZE;
if (page_size != NULL) *page_size = psize; // make the start not OS page aligned for smaller blocks to avoid page/cache effects
return (uint8_t*)segment + (idx*MI_SEGMENT_SLICE_SIZE); size_t start_offset = (xblock_size >= MI_INTPTR_SIZE && xblock_size <= 1024 ? MI_MAX_ALIGN_GUARANTEE : 0);
if (page_size != NULL) *page_size = psize - start_offset;
return (uint8_t*)segment + ((idx*MI_SEGMENT_SLICE_SIZE) + start_offset);
} }
// Start of the page available memory; can be used on uninitialized pages // Start of the page available memory; can be used on uninitialized pages
uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size)
{ {
const mi_slice_t* slice = mi_page_to_slice((mi_page_t*)page); const mi_slice_t* slice = mi_page_to_slice((mi_page_t*)page);
uint8_t* p = _mi_segment_page_start_from_slice(segment, slice, page_size); uint8_t* p = _mi_segment_page_start_from_slice(segment, slice, page->xblock_size, page_size);
mi_assert_internal(page->xblock_size == 0 || _mi_ptr_page(p) == page); mi_assert_internal(page->xblock_size > 0 || _mi_ptr_page(p) == page);
mi_assert_internal(_mi_ptr_segment(p) == segment); mi_assert_internal(_mi_ptr_segment(p) == segment);
return p; return p;
} }
@ -556,7 +558,7 @@ static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_i
mi_assert_internal(slice->xblock_size==0 || slice->xblock_size==1); mi_assert_internal(slice->xblock_size==0 || slice->xblock_size==1);
// commit before changing the slice data // commit before changing the slice data
if (!mi_segment_ensure_committed(segment, _mi_segment_page_start_from_slice(segment, slice, NULL), slice_count * MI_SEGMENT_SLICE_SIZE, tld->stats)) { if (!mi_segment_ensure_committed(segment, _mi_segment_page_start_from_slice(segment, slice, 0, NULL), slice_count * MI_SEGMENT_SLICE_SIZE, tld->stats)) {
return NULL; // commit failed! return NULL; // commit failed!
} }