From bd5f7de3f416bb8a90d97d0ef1ae6b69ecebbe37 Mon Sep 17 00:00:00 2001 From: daanx Date: Mon, 2 Dec 2024 20:21:35 -0800 Subject: [PATCH] can run basic test --- src/arena.c | 4 ++-- src/bitmap.c | 30 ++++++++++++++++++++++-------- src/init.c | 2 +- src/page-queue.c | 30 +++++++++++++++++++++++++++++- src/page.c | 9 ++++++--- test/test-stress.c | 4 ++-- 6 files changed, 62 insertions(+), 17 deletions(-) diff --git a/src/arena.c b/src/arena.c index 08b6c98d..317a7e48 100644 --- a/src/arena.c +++ b/src/arena.c @@ -676,7 +676,7 @@ void _mi_arena_page_abandon(mi_page_t* page) { // leave as is; it will be reclaimed when an object is free'd in the page } _mi_page_unown(page); - mi_stat_increase(_mi_stats_main.pages_abandoned, 1); + _mi_stat_increase(&_mi_stats_main.pages_abandoned, 1); } // called from `mi_free` if trying to unabandon an abandoned page @@ -706,7 +706,7 @@ void _mi_arena_page_unabandon(mi_page_t* page) { // nothing to do // TODO: maintain count of these as well? } - mi_stat_decrease(_mi_stats_main.pages_abandoned, 1); + _mi_stat_decrease(&_mi_stats_main.pages_abandoned, 1); } /* diff --git a/src/bitmap.c b/src/bitmap.c index c7c78dec..eb5da086 100644 --- a/src/bitmap.c +++ b/src/bitmap.c @@ -453,6 +453,20 @@ static inline bool mi_bitmap_chunk_try_clearN(mi_bitmap_chunk_t* chunk, size_t c return mi_bitmap_chunk_try_xsetN(MI_BIT_CLEAR, chunk, cidx, n); } +#if defined(__AVX2__) +static inline __m256i mi_mm256_zero(void) { + return _mm256_setzero_si256(); +} +static inline __m256i mi_mm256_ones(void) { + return _mm256_set1_epi64x(~0); +} +static inline bool mi_mm256_is_ones(__m256i vec) { + return _mm256_testc_si256(vec, _mm256_cmpeq_epi32(vec, vec)); +} +static inline bool mi_mm256_is_zero( __m256i vec) { + return _mm256_testz_si256(vec,vec); +} +#endif // find least 0/1-bit in a chunk and try to set/clear it atomically // set `*pidx` to the bit index (0 <= *pidx < MI_BITMAP_CHUNK_BITS) on success. @@ -461,7 +475,7 @@ static inline bool mi_bitmap_chunk_find_and_try_xset(mi_bit_t set, mi_bitmap_chu #if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256) while (true) { const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields); - const __m256i vcmp = _mm256_cmpeq_epi64(vec, (set ? _mm256_set1_epi64x(~0) : _mm256_setzero_si256())); // (elem64 == ~0 / 0 ? 0xFF : 0) + const __m256i vcmp = _mm256_cmpeq_epi64(vec, (set ? mi_mm256_ones() : mi_mm256_zero())); // (elem64 == ~0 / 0 ? 0xFF : 0) const uint32_t mask = ~_mm256_movemask_epi8(vcmp); // mask of most significant bit of each byte (so each 8 bits are all set or clear) // mask is inverted, so each 8-bits is 0xFF iff the corresponding elem64 has a zero / one bit (and thus can be set/cleared) if (mask==0) return false; @@ -483,11 +497,11 @@ static inline bool mi_bitmap_chunk_find_and_try_xset(mi_bit_t set, mi_bitmap_chu size_t chunk_idx = 0; #if 1 __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields); - if ((set ? _mm256_test_all_ones(vec) : _mm256_testz_si256(vec,vec))) { + if ((set ? mi_mm256_is_ones(vec) : mi_mm256_is_zero(vec))) { chunk_idx += 4; vec = _mm256_load_si256(((const __m256i*)chunk->bfields) + 1); } - const __m256i vcmp = _mm256_cmpeq_epi64(vec, (set ? _mm256_set1_epi64x(~0) : _mm256_setzero_si256())); // (elem64 == ~0 / 0 ? 0xFF : 0) + const __m256i vcmp = _mm256_cmpeq_epi64(vec, (set ? mi_mm256_ones() : mi_mm256_zero())); // (elem64 == ~0 / 0 ? 0xFF : 0) const uint32_t mask = ~_mm256_movemask_epi8(vcmp); // mask of most significant bit of each byte (so each 8 bits are all set or clear) // mask is inverted, so each 8-bits is 0xFF iff the corresponding elem64 has a zero / one bit (and thus can be set/cleared) if (mask==0) return false; @@ -496,7 +510,7 @@ static inline bool mi_bitmap_chunk_find_and_try_xset(mi_bit_t set, mi_bitmap_chu #else const __m256i vec1 = _mm256_load_si256((const __m256i*)chunk->bfields); const __m256i vec2 = _mm256_load_si256(((const __m256i*)chunk->bfields)+1); - const __m256i cmpv = (set ? _mm256_set1_epi64x(~0) : _mm256_setzero_si256()); + const __m256i cmpv = (set ? mi_mm256_ones() : mi_mm256_zero()); const __m256i vcmp1 = _mm256_cmpeq_epi64(vec1, cmpv); // (elem64 == ~0 / 0 ? 0xFF : 0) const __m256i vcmp2 = _mm256_cmpeq_epi64(vec2, cmpv); // (elem64 == ~0 / 0 ? 0xFF : 0) const uint32_t mask1 = ~_mm256_movemask_epi8(vcmp1); // mask of most significant bit of each byte (so each 8 bits are all set or clear) @@ -549,7 +563,7 @@ static inline bool mi_bitmap_chunk_find_and_try_clear8(mi_bitmap_chunk_t* chunk, #if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256) while(true) { const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields); - const __m256i vcmp = _mm256_cmpeq_epi8(vec, _mm256_set1_epi64x(~0)); // (byte == ~0 ? -1 : 0) + const __m256i vcmp = _mm256_cmpeq_epi8(vec, mi_mm256_ones()); // (byte == ~0 ? -1 : 0) const uint32_t mask = _mm256_movemask_epi8(vcmp); // mask of most significant bit of each byte if (mask == 0) return false; const size_t i = _tzcnt_u32(mask); @@ -650,12 +664,12 @@ static inline bool mi_bitmap_chunk_find_and_try_clearN(mi_bitmap_chunk_t* chunk, static inline bool mi_bitmap_chunk_all_are_clear(mi_bitmap_chunk_t* chunk) { #if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256) const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields); - return _mm256_testz_si256( vec, vec ); + return mi_mm256_is_zero(vec); #elif defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==512) const __m256i vec1 = _mm256_load_si256((const __m256i*)chunk->bfields); - if (!_mm256_testz_si256(vec1, vec1)) return false; + if (!mi_mm256_is_zero(vec1)) return false; const __m256i vec2 = _mm256_load_si256(((const __m256i*)chunk->bfields)+1); - return (_mm256_testz_si256(vec2, vec2)); + return (mi_mm256_is_zero(vec2)); #else for(int i = 0; i < MI_BITMAP_CHUNK_FIELDS; i++) { if (chunk->bfields[i] != 0) return false; diff --git a/src/init.c b/src/init.c index 05ce54b4..d1670d02 100644 --- a/src/init.c +++ b/src/init.c @@ -34,7 +34,7 @@ const mi_page_t _mi_page_empty = { NULL, // xheap NULL, NULL, // next, prev NULL, // subproc - { {{ NULL, 0}}, false, false, false, MI_MEM_NONE } // memid + { {{ NULL, 0, 0}}, false, false, false, MI_MEM_NONE } // memid }; #define MI_PAGE_EMPTY() ((mi_page_t*)&_mi_page_empty) diff --git a/src/page-queue.c b/src/page-queue.c index 552e12c3..ad616b1d 100644 --- a/src/page-queue.c +++ b/src/page-queue.c @@ -260,6 +260,34 @@ static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_ heap->page_count++; } +static void mi_page_queue_push_at_end(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_t* page) { + mi_assert_internal(mi_page_heap(page) == heap); + mi_assert_internal(!mi_page_queue_contains(queue, page)); + + mi_assert_internal(mi_page_block_size(page) == queue->block_size || + (mi_page_is_huge(page) && mi_page_queue_is_huge(queue)) || + (mi_page_is_in_full(page) && mi_page_queue_is_full(queue))); + + mi_page_set_in_full(page, mi_page_queue_is_full(queue)); + + page->prev = queue->last; + page->next = NULL; + if (queue->last != NULL) { + mi_assert_internal(queue->last->next == NULL); + queue->last->next = page; + queue->last = page; + } + else { + queue->first = queue->last = page; + } + + // update direct + if (queue->first == page) { + mi_heap_queue_first_update(heap, queue); + } + heap->page_count++; +} + static void mi_page_queue_move_to_front(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_t* page) { mi_assert_internal(mi_page_heap(page) == heap); mi_assert_internal(mi_page_queue_contains(queue, page)); @@ -344,7 +372,7 @@ static void mi_page_queue_enqueue_from(mi_page_queue_t* to, mi_page_queue_t* fro static void mi_page_queue_enqueue_from_full(mi_page_queue_t* to, mi_page_queue_t* from, mi_page_t* page) { // note: we could insert at the front to increase reuse, but it slows down certain benchmarks (like `alloc-test`) - mi_page_queue_enqueue_from_ex(to, from, false /* enqueue at the end of the `to` queue? */, page); + mi_page_queue_enqueue_from_ex(to, from, true /* enqueue at the end of the `to` queue? */, page); } // Only called from `mi_heap_absorb`. diff --git a/src/page.c b/src/page.c index 8cdfd6be..4d26dbad 100644 --- a/src/page.c +++ b/src/page.c @@ -274,7 +274,7 @@ void _mi_heap_page_reclaim(mi_heap_t* heap, mi_page_t* page) mi_page_set_heap(page,heap); _mi_page_free_collect(page, false); // ensure used count is up to date mi_page_queue_t* pq = mi_heap_page_queue_of(heap, page); - mi_page_queue_push(heap, pq, page); + mi_page_queue_push_at_end(heap, pq, page); mi_assert_expensive(_mi_page_is_valid(page)); } @@ -807,8 +807,11 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p page_candidate = page; candidate_count = 0; } - else if (/* !mi_page_is_expandable(page) && */ page->used >= page_candidate->used) { - if (mi_page_all_free(page_candidate)) { _mi_page_free(page_candidate, pq); } + else if (mi_page_all_free(page_candidate)) { + _mi_page_free(page_candidate, pq); + page_candidate = page; + } + else if (page->used >= page_candidate->used) { page_candidate = page; } // if we find a non-expandable candidate, or searched for N pages, return with the best candidate diff --git a/test/test-stress.c b/test/test-stress.c index 487f7215..ffeb5dea 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -46,7 +46,7 @@ static int SCALE = 100; static int ITER = 50; #else static int THREADS = 32; // more repeatable if THREADS <= #processors -static int SCALE = 25; // scaling factor +static int SCALE = 50; // scaling factor static int ITER = 50; // N full iterations destructing and re-creating all threads #endif @@ -54,7 +54,7 @@ static int ITER = 50; // N full iterations destructing and re-creating a #define STRESS // undefine for leak test -static bool allow_large_objects = true; // allow very large objects? (set to `true` if SCALE>100) +static bool allow_large_objects = false; // allow very large objects? (set to `true` if SCALE>100) static size_t use_one_size = 0; // use single object size of `N * sizeof(uintptr_t)`? static bool main_participates = false; // main thread participates as a worker too