can run basic test

This commit is contained in:
daanx 2024-12-02 20:21:35 -08:00
parent fe5a314114
commit bd5f7de3f4
6 changed files with 62 additions and 17 deletions

View file

@ -676,7 +676,7 @@ void _mi_arena_page_abandon(mi_page_t* page) {
// leave as is; it will be reclaimed when an object is free'd in the page // leave as is; it will be reclaimed when an object is free'd in the page
} }
_mi_page_unown(page); _mi_page_unown(page);
mi_stat_increase(_mi_stats_main.pages_abandoned, 1); _mi_stat_increase(&_mi_stats_main.pages_abandoned, 1);
} }
// called from `mi_free` if trying to unabandon an abandoned page // called from `mi_free` if trying to unabandon an abandoned page
@ -706,7 +706,7 @@ void _mi_arena_page_unabandon(mi_page_t* page) {
// nothing to do // nothing to do
// TODO: maintain count of these as well? // TODO: maintain count of these as well?
} }
mi_stat_decrease(_mi_stats_main.pages_abandoned, 1); _mi_stat_decrease(&_mi_stats_main.pages_abandoned, 1);
} }
/* /*

View file

@ -453,6 +453,20 @@ static inline bool mi_bitmap_chunk_try_clearN(mi_bitmap_chunk_t* chunk, size_t c
return mi_bitmap_chunk_try_xsetN(MI_BIT_CLEAR, chunk, cidx, n); return mi_bitmap_chunk_try_xsetN(MI_BIT_CLEAR, chunk, cidx, n);
} }
#if defined(__AVX2__)
static inline __m256i mi_mm256_zero(void) {
return _mm256_setzero_si256();
}
static inline __m256i mi_mm256_ones(void) {
return _mm256_set1_epi64x(~0);
}
static inline bool mi_mm256_is_ones(__m256i vec) {
return _mm256_testc_si256(vec, _mm256_cmpeq_epi32(vec, vec));
}
static inline bool mi_mm256_is_zero( __m256i vec) {
return _mm256_testz_si256(vec,vec);
}
#endif
// find least 0/1-bit in a chunk and try to set/clear it atomically // find least 0/1-bit in a chunk and try to set/clear it atomically
// set `*pidx` to the bit index (0 <= *pidx < MI_BITMAP_CHUNK_BITS) on success. // set `*pidx` to the bit index (0 <= *pidx < MI_BITMAP_CHUNK_BITS) on success.
@ -461,7 +475,7 @@ static inline bool mi_bitmap_chunk_find_and_try_xset(mi_bit_t set, mi_bitmap_chu
#if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256) #if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256)
while (true) { while (true) {
const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields); const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
const __m256i vcmp = _mm256_cmpeq_epi64(vec, (set ? _mm256_set1_epi64x(~0) : _mm256_setzero_si256())); // (elem64 == ~0 / 0 ? 0xFF : 0) const __m256i vcmp = _mm256_cmpeq_epi64(vec, (set ? mi_mm256_ones() : mi_mm256_zero())); // (elem64 == ~0 / 0 ? 0xFF : 0)
const uint32_t mask = ~_mm256_movemask_epi8(vcmp); // mask of most significant bit of each byte (so each 8 bits are all set or clear) const uint32_t mask = ~_mm256_movemask_epi8(vcmp); // mask of most significant bit of each byte (so each 8 bits are all set or clear)
// mask is inverted, so each 8-bits is 0xFF iff the corresponding elem64 has a zero / one bit (and thus can be set/cleared) // mask is inverted, so each 8-bits is 0xFF iff the corresponding elem64 has a zero / one bit (and thus can be set/cleared)
if (mask==0) return false; if (mask==0) return false;
@ -483,11 +497,11 @@ static inline bool mi_bitmap_chunk_find_and_try_xset(mi_bit_t set, mi_bitmap_chu
size_t chunk_idx = 0; size_t chunk_idx = 0;
#if 1 #if 1
__m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields); __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
if ((set ? _mm256_test_all_ones(vec) : _mm256_testz_si256(vec,vec))) { if ((set ? mi_mm256_is_ones(vec) : mi_mm256_is_zero(vec))) {
chunk_idx += 4; chunk_idx += 4;
vec = _mm256_load_si256(((const __m256i*)chunk->bfields) + 1); vec = _mm256_load_si256(((const __m256i*)chunk->bfields) + 1);
} }
const __m256i vcmp = _mm256_cmpeq_epi64(vec, (set ? _mm256_set1_epi64x(~0) : _mm256_setzero_si256())); // (elem64 == ~0 / 0 ? 0xFF : 0) const __m256i vcmp = _mm256_cmpeq_epi64(vec, (set ? mi_mm256_ones() : mi_mm256_zero())); // (elem64 == ~0 / 0 ? 0xFF : 0)
const uint32_t mask = ~_mm256_movemask_epi8(vcmp); // mask of most significant bit of each byte (so each 8 bits are all set or clear) const uint32_t mask = ~_mm256_movemask_epi8(vcmp); // mask of most significant bit of each byte (so each 8 bits are all set or clear)
// mask is inverted, so each 8-bits is 0xFF iff the corresponding elem64 has a zero / one bit (and thus can be set/cleared) // mask is inverted, so each 8-bits is 0xFF iff the corresponding elem64 has a zero / one bit (and thus can be set/cleared)
if (mask==0) return false; if (mask==0) return false;
@ -496,7 +510,7 @@ static inline bool mi_bitmap_chunk_find_and_try_xset(mi_bit_t set, mi_bitmap_chu
#else #else
const __m256i vec1 = _mm256_load_si256((const __m256i*)chunk->bfields); const __m256i vec1 = _mm256_load_si256((const __m256i*)chunk->bfields);
const __m256i vec2 = _mm256_load_si256(((const __m256i*)chunk->bfields)+1); const __m256i vec2 = _mm256_load_si256(((const __m256i*)chunk->bfields)+1);
const __m256i cmpv = (set ? _mm256_set1_epi64x(~0) : _mm256_setzero_si256()); const __m256i cmpv = (set ? mi_mm256_ones() : mi_mm256_zero());
const __m256i vcmp1 = _mm256_cmpeq_epi64(vec1, cmpv); // (elem64 == ~0 / 0 ? 0xFF : 0) const __m256i vcmp1 = _mm256_cmpeq_epi64(vec1, cmpv); // (elem64 == ~0 / 0 ? 0xFF : 0)
const __m256i vcmp2 = _mm256_cmpeq_epi64(vec2, cmpv); // (elem64 == ~0 / 0 ? 0xFF : 0) const __m256i vcmp2 = _mm256_cmpeq_epi64(vec2, cmpv); // (elem64 == ~0 / 0 ? 0xFF : 0)
const uint32_t mask1 = ~_mm256_movemask_epi8(vcmp1); // mask of most significant bit of each byte (so each 8 bits are all set or clear) const uint32_t mask1 = ~_mm256_movemask_epi8(vcmp1); // mask of most significant bit of each byte (so each 8 bits are all set or clear)
@ -549,7 +563,7 @@ static inline bool mi_bitmap_chunk_find_and_try_clear8(mi_bitmap_chunk_t* chunk,
#if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256) #if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256)
while(true) { while(true) {
const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields); const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
const __m256i vcmp = _mm256_cmpeq_epi8(vec, _mm256_set1_epi64x(~0)); // (byte == ~0 ? -1 : 0) const __m256i vcmp = _mm256_cmpeq_epi8(vec, mi_mm256_ones()); // (byte == ~0 ? -1 : 0)
const uint32_t mask = _mm256_movemask_epi8(vcmp); // mask of most significant bit of each byte const uint32_t mask = _mm256_movemask_epi8(vcmp); // mask of most significant bit of each byte
if (mask == 0) return false; if (mask == 0) return false;
const size_t i = _tzcnt_u32(mask); const size_t i = _tzcnt_u32(mask);
@ -650,12 +664,12 @@ static inline bool mi_bitmap_chunk_find_and_try_clearN(mi_bitmap_chunk_t* chunk,
static inline bool mi_bitmap_chunk_all_are_clear(mi_bitmap_chunk_t* chunk) { static inline bool mi_bitmap_chunk_all_are_clear(mi_bitmap_chunk_t* chunk) {
#if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256) #if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256)
const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields); const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
return _mm256_testz_si256( vec, vec ); return mi_mm256_is_zero(vec);
#elif defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==512) #elif defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==512)
const __m256i vec1 = _mm256_load_si256((const __m256i*)chunk->bfields); const __m256i vec1 = _mm256_load_si256((const __m256i*)chunk->bfields);
if (!_mm256_testz_si256(vec1, vec1)) return false; if (!mi_mm256_is_zero(vec1)) return false;
const __m256i vec2 = _mm256_load_si256(((const __m256i*)chunk->bfields)+1); const __m256i vec2 = _mm256_load_si256(((const __m256i*)chunk->bfields)+1);
return (_mm256_testz_si256(vec2, vec2)); return (mi_mm256_is_zero(vec2));
#else #else
for(int i = 0; i < MI_BITMAP_CHUNK_FIELDS; i++) { for(int i = 0; i < MI_BITMAP_CHUNK_FIELDS; i++) {
if (chunk->bfields[i] != 0) return false; if (chunk->bfields[i] != 0) return false;

View file

@ -34,7 +34,7 @@ const mi_page_t _mi_page_empty = {
NULL, // xheap NULL, // xheap
NULL, NULL, // next, prev NULL, NULL, // next, prev
NULL, // subproc NULL, // subproc
{ {{ NULL, 0}}, false, false, false, MI_MEM_NONE } // memid { {{ NULL, 0, 0}}, false, false, false, MI_MEM_NONE } // memid
}; };
#define MI_PAGE_EMPTY() ((mi_page_t*)&_mi_page_empty) #define MI_PAGE_EMPTY() ((mi_page_t*)&_mi_page_empty)

View file

@ -260,6 +260,34 @@ static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_
heap->page_count++; heap->page_count++;
} }
static void mi_page_queue_push_at_end(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_t* page) {
mi_assert_internal(mi_page_heap(page) == heap);
mi_assert_internal(!mi_page_queue_contains(queue, page));
mi_assert_internal(mi_page_block_size(page) == queue->block_size ||
(mi_page_is_huge(page) && mi_page_queue_is_huge(queue)) ||
(mi_page_is_in_full(page) && mi_page_queue_is_full(queue)));
mi_page_set_in_full(page, mi_page_queue_is_full(queue));
page->prev = queue->last;
page->next = NULL;
if (queue->last != NULL) {
mi_assert_internal(queue->last->next == NULL);
queue->last->next = page;
queue->last = page;
}
else {
queue->first = queue->last = page;
}
// update direct
if (queue->first == page) {
mi_heap_queue_first_update(heap, queue);
}
heap->page_count++;
}
static void mi_page_queue_move_to_front(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_t* page) { static void mi_page_queue_move_to_front(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_t* page) {
mi_assert_internal(mi_page_heap(page) == heap); mi_assert_internal(mi_page_heap(page) == heap);
mi_assert_internal(mi_page_queue_contains(queue, page)); mi_assert_internal(mi_page_queue_contains(queue, page));
@ -344,7 +372,7 @@ static void mi_page_queue_enqueue_from(mi_page_queue_t* to, mi_page_queue_t* fro
static void mi_page_queue_enqueue_from_full(mi_page_queue_t* to, mi_page_queue_t* from, mi_page_t* page) { static void mi_page_queue_enqueue_from_full(mi_page_queue_t* to, mi_page_queue_t* from, mi_page_t* page) {
// note: we could insert at the front to increase reuse, but it slows down certain benchmarks (like `alloc-test`) // note: we could insert at the front to increase reuse, but it slows down certain benchmarks (like `alloc-test`)
mi_page_queue_enqueue_from_ex(to, from, false /* enqueue at the end of the `to` queue? */, page); mi_page_queue_enqueue_from_ex(to, from, true /* enqueue at the end of the `to` queue? */, page);
} }
// Only called from `mi_heap_absorb`. // Only called from `mi_heap_absorb`.

View file

@ -274,7 +274,7 @@ void _mi_heap_page_reclaim(mi_heap_t* heap, mi_page_t* page)
mi_page_set_heap(page,heap); mi_page_set_heap(page,heap);
_mi_page_free_collect(page, false); // ensure used count is up to date _mi_page_free_collect(page, false); // ensure used count is up to date
mi_page_queue_t* pq = mi_heap_page_queue_of(heap, page); mi_page_queue_t* pq = mi_heap_page_queue_of(heap, page);
mi_page_queue_push(heap, pq, page); mi_page_queue_push_at_end(heap, pq, page);
mi_assert_expensive(_mi_page_is_valid(page)); mi_assert_expensive(_mi_page_is_valid(page));
} }
@ -807,8 +807,11 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p
page_candidate = page; page_candidate = page;
candidate_count = 0; candidate_count = 0;
} }
else if (/* !mi_page_is_expandable(page) && */ page->used >= page_candidate->used) { else if (mi_page_all_free(page_candidate)) {
if (mi_page_all_free(page_candidate)) { _mi_page_free(page_candidate, pq); } _mi_page_free(page_candidate, pq);
page_candidate = page;
}
else if (page->used >= page_candidate->used) {
page_candidate = page; page_candidate = page;
} }
// if we find a non-expandable candidate, or searched for N pages, return with the best candidate // if we find a non-expandable candidate, or searched for N pages, return with the best candidate

View file

@ -46,7 +46,7 @@ static int SCALE = 100;
static int ITER = 50; static int ITER = 50;
#else #else
static int THREADS = 32; // more repeatable if THREADS <= #processors static int THREADS = 32; // more repeatable if THREADS <= #processors
static int SCALE = 25; // scaling factor static int SCALE = 50; // scaling factor
static int ITER = 50; // N full iterations destructing and re-creating all threads static int ITER = 50; // N full iterations destructing and re-creating all threads
#endif #endif
@ -54,7 +54,7 @@ static int ITER = 50; // N full iterations destructing and re-creating a
#define STRESS // undefine for leak test #define STRESS // undefine for leak test
static bool allow_large_objects = true; // allow very large objects? (set to `true` if SCALE>100) static bool allow_large_objects = false; // allow very large objects? (set to `true` if SCALE>100)
static size_t use_one_size = 0; // use single object size of `N * sizeof(uintptr_t)`? static size_t use_one_size = 0; // use single object size of `N * sizeof(uintptr_t)`?
static bool main_participates = false; // main thread participates as a worker too static bool main_participates = false; // main thread participates as a worker too