mirror of
https://github.com/microsoft/mimalloc.git
synced 2025-05-07 15:59:32 +03:00
can run basic test
This commit is contained in:
parent
fe5a314114
commit
bd5f7de3f4
6 changed files with 62 additions and 17 deletions
|
@ -676,7 +676,7 @@ void _mi_arena_page_abandon(mi_page_t* page) {
|
||||||
// leave as is; it will be reclaimed when an object is free'd in the page
|
// leave as is; it will be reclaimed when an object is free'd in the page
|
||||||
}
|
}
|
||||||
_mi_page_unown(page);
|
_mi_page_unown(page);
|
||||||
mi_stat_increase(_mi_stats_main.pages_abandoned, 1);
|
_mi_stat_increase(&_mi_stats_main.pages_abandoned, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// called from `mi_free` if trying to unabandon an abandoned page
|
// called from `mi_free` if trying to unabandon an abandoned page
|
||||||
|
@ -706,7 +706,7 @@ void _mi_arena_page_unabandon(mi_page_t* page) {
|
||||||
// nothing to do
|
// nothing to do
|
||||||
// TODO: maintain count of these as well?
|
// TODO: maintain count of these as well?
|
||||||
}
|
}
|
||||||
mi_stat_decrease(_mi_stats_main.pages_abandoned, 1);
|
_mi_stat_decrease(&_mi_stats_main.pages_abandoned, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
30
src/bitmap.c
30
src/bitmap.c
|
@ -453,6 +453,20 @@ static inline bool mi_bitmap_chunk_try_clearN(mi_bitmap_chunk_t* chunk, size_t c
|
||||||
return mi_bitmap_chunk_try_xsetN(MI_BIT_CLEAR, chunk, cidx, n);
|
return mi_bitmap_chunk_try_xsetN(MI_BIT_CLEAR, chunk, cidx, n);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(__AVX2__)
|
||||||
|
static inline __m256i mi_mm256_zero(void) {
|
||||||
|
return _mm256_setzero_si256();
|
||||||
|
}
|
||||||
|
static inline __m256i mi_mm256_ones(void) {
|
||||||
|
return _mm256_set1_epi64x(~0);
|
||||||
|
}
|
||||||
|
static inline bool mi_mm256_is_ones(__m256i vec) {
|
||||||
|
return _mm256_testc_si256(vec, _mm256_cmpeq_epi32(vec, vec));
|
||||||
|
}
|
||||||
|
static inline bool mi_mm256_is_zero( __m256i vec) {
|
||||||
|
return _mm256_testz_si256(vec,vec);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
// find least 0/1-bit in a chunk and try to set/clear it atomically
|
// find least 0/1-bit in a chunk and try to set/clear it atomically
|
||||||
// set `*pidx` to the bit index (0 <= *pidx < MI_BITMAP_CHUNK_BITS) on success.
|
// set `*pidx` to the bit index (0 <= *pidx < MI_BITMAP_CHUNK_BITS) on success.
|
||||||
|
@ -461,7 +475,7 @@ static inline bool mi_bitmap_chunk_find_and_try_xset(mi_bit_t set, mi_bitmap_chu
|
||||||
#if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256)
|
#if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256)
|
||||||
while (true) {
|
while (true) {
|
||||||
const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
|
const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
|
||||||
const __m256i vcmp = _mm256_cmpeq_epi64(vec, (set ? _mm256_set1_epi64x(~0) : _mm256_setzero_si256())); // (elem64 == ~0 / 0 ? 0xFF : 0)
|
const __m256i vcmp = _mm256_cmpeq_epi64(vec, (set ? mi_mm256_ones() : mi_mm256_zero())); // (elem64 == ~0 / 0 ? 0xFF : 0)
|
||||||
const uint32_t mask = ~_mm256_movemask_epi8(vcmp); // mask of most significant bit of each byte (so each 8 bits are all set or clear)
|
const uint32_t mask = ~_mm256_movemask_epi8(vcmp); // mask of most significant bit of each byte (so each 8 bits are all set or clear)
|
||||||
// mask is inverted, so each 8-bits is 0xFF iff the corresponding elem64 has a zero / one bit (and thus can be set/cleared)
|
// mask is inverted, so each 8-bits is 0xFF iff the corresponding elem64 has a zero / one bit (and thus can be set/cleared)
|
||||||
if (mask==0) return false;
|
if (mask==0) return false;
|
||||||
|
@ -483,11 +497,11 @@ static inline bool mi_bitmap_chunk_find_and_try_xset(mi_bit_t set, mi_bitmap_chu
|
||||||
size_t chunk_idx = 0;
|
size_t chunk_idx = 0;
|
||||||
#if 1
|
#if 1
|
||||||
__m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
|
__m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
|
||||||
if ((set ? _mm256_test_all_ones(vec) : _mm256_testz_si256(vec,vec))) {
|
if ((set ? mi_mm256_is_ones(vec) : mi_mm256_is_zero(vec))) {
|
||||||
chunk_idx += 4;
|
chunk_idx += 4;
|
||||||
vec = _mm256_load_si256(((const __m256i*)chunk->bfields) + 1);
|
vec = _mm256_load_si256(((const __m256i*)chunk->bfields) + 1);
|
||||||
}
|
}
|
||||||
const __m256i vcmp = _mm256_cmpeq_epi64(vec, (set ? _mm256_set1_epi64x(~0) : _mm256_setzero_si256())); // (elem64 == ~0 / 0 ? 0xFF : 0)
|
const __m256i vcmp = _mm256_cmpeq_epi64(vec, (set ? mi_mm256_ones() : mi_mm256_zero())); // (elem64 == ~0 / 0 ? 0xFF : 0)
|
||||||
const uint32_t mask = ~_mm256_movemask_epi8(vcmp); // mask of most significant bit of each byte (so each 8 bits are all set or clear)
|
const uint32_t mask = ~_mm256_movemask_epi8(vcmp); // mask of most significant bit of each byte (so each 8 bits are all set or clear)
|
||||||
// mask is inverted, so each 8-bits is 0xFF iff the corresponding elem64 has a zero / one bit (and thus can be set/cleared)
|
// mask is inverted, so each 8-bits is 0xFF iff the corresponding elem64 has a zero / one bit (and thus can be set/cleared)
|
||||||
if (mask==0) return false;
|
if (mask==0) return false;
|
||||||
|
@ -496,7 +510,7 @@ static inline bool mi_bitmap_chunk_find_and_try_xset(mi_bit_t set, mi_bitmap_chu
|
||||||
#else
|
#else
|
||||||
const __m256i vec1 = _mm256_load_si256((const __m256i*)chunk->bfields);
|
const __m256i vec1 = _mm256_load_si256((const __m256i*)chunk->bfields);
|
||||||
const __m256i vec2 = _mm256_load_si256(((const __m256i*)chunk->bfields)+1);
|
const __m256i vec2 = _mm256_load_si256(((const __m256i*)chunk->bfields)+1);
|
||||||
const __m256i cmpv = (set ? _mm256_set1_epi64x(~0) : _mm256_setzero_si256());
|
const __m256i cmpv = (set ? mi_mm256_ones() : mi_mm256_zero());
|
||||||
const __m256i vcmp1 = _mm256_cmpeq_epi64(vec1, cmpv); // (elem64 == ~0 / 0 ? 0xFF : 0)
|
const __m256i vcmp1 = _mm256_cmpeq_epi64(vec1, cmpv); // (elem64 == ~0 / 0 ? 0xFF : 0)
|
||||||
const __m256i vcmp2 = _mm256_cmpeq_epi64(vec2, cmpv); // (elem64 == ~0 / 0 ? 0xFF : 0)
|
const __m256i vcmp2 = _mm256_cmpeq_epi64(vec2, cmpv); // (elem64 == ~0 / 0 ? 0xFF : 0)
|
||||||
const uint32_t mask1 = ~_mm256_movemask_epi8(vcmp1); // mask of most significant bit of each byte (so each 8 bits are all set or clear)
|
const uint32_t mask1 = ~_mm256_movemask_epi8(vcmp1); // mask of most significant bit of each byte (so each 8 bits are all set or clear)
|
||||||
|
@ -549,7 +563,7 @@ static inline bool mi_bitmap_chunk_find_and_try_clear8(mi_bitmap_chunk_t* chunk,
|
||||||
#if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256)
|
#if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256)
|
||||||
while(true) {
|
while(true) {
|
||||||
const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
|
const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
|
||||||
const __m256i vcmp = _mm256_cmpeq_epi8(vec, _mm256_set1_epi64x(~0)); // (byte == ~0 ? -1 : 0)
|
const __m256i vcmp = _mm256_cmpeq_epi8(vec, mi_mm256_ones()); // (byte == ~0 ? -1 : 0)
|
||||||
const uint32_t mask = _mm256_movemask_epi8(vcmp); // mask of most significant bit of each byte
|
const uint32_t mask = _mm256_movemask_epi8(vcmp); // mask of most significant bit of each byte
|
||||||
if (mask == 0) return false;
|
if (mask == 0) return false;
|
||||||
const size_t i = _tzcnt_u32(mask);
|
const size_t i = _tzcnt_u32(mask);
|
||||||
|
@ -650,12 +664,12 @@ static inline bool mi_bitmap_chunk_find_and_try_clearN(mi_bitmap_chunk_t* chunk,
|
||||||
static inline bool mi_bitmap_chunk_all_are_clear(mi_bitmap_chunk_t* chunk) {
|
static inline bool mi_bitmap_chunk_all_are_clear(mi_bitmap_chunk_t* chunk) {
|
||||||
#if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256)
|
#if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256)
|
||||||
const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
|
const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields);
|
||||||
return _mm256_testz_si256( vec, vec );
|
return mi_mm256_is_zero(vec);
|
||||||
#elif defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==512)
|
#elif defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==512)
|
||||||
const __m256i vec1 = _mm256_load_si256((const __m256i*)chunk->bfields);
|
const __m256i vec1 = _mm256_load_si256((const __m256i*)chunk->bfields);
|
||||||
if (!_mm256_testz_si256(vec1, vec1)) return false;
|
if (!mi_mm256_is_zero(vec1)) return false;
|
||||||
const __m256i vec2 = _mm256_load_si256(((const __m256i*)chunk->bfields)+1);
|
const __m256i vec2 = _mm256_load_si256(((const __m256i*)chunk->bfields)+1);
|
||||||
return (_mm256_testz_si256(vec2, vec2));
|
return (mi_mm256_is_zero(vec2));
|
||||||
#else
|
#else
|
||||||
for(int i = 0; i < MI_BITMAP_CHUNK_FIELDS; i++) {
|
for(int i = 0; i < MI_BITMAP_CHUNK_FIELDS; i++) {
|
||||||
if (chunk->bfields[i] != 0) return false;
|
if (chunk->bfields[i] != 0) return false;
|
||||||
|
|
|
@ -34,7 +34,7 @@ const mi_page_t _mi_page_empty = {
|
||||||
NULL, // xheap
|
NULL, // xheap
|
||||||
NULL, NULL, // next, prev
|
NULL, NULL, // next, prev
|
||||||
NULL, // subproc
|
NULL, // subproc
|
||||||
{ {{ NULL, 0}}, false, false, false, MI_MEM_NONE } // memid
|
{ {{ NULL, 0, 0}}, false, false, false, MI_MEM_NONE } // memid
|
||||||
};
|
};
|
||||||
|
|
||||||
#define MI_PAGE_EMPTY() ((mi_page_t*)&_mi_page_empty)
|
#define MI_PAGE_EMPTY() ((mi_page_t*)&_mi_page_empty)
|
||||||
|
|
|
@ -260,6 +260,34 @@ static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_
|
||||||
heap->page_count++;
|
heap->page_count++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void mi_page_queue_push_at_end(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_t* page) {
|
||||||
|
mi_assert_internal(mi_page_heap(page) == heap);
|
||||||
|
mi_assert_internal(!mi_page_queue_contains(queue, page));
|
||||||
|
|
||||||
|
mi_assert_internal(mi_page_block_size(page) == queue->block_size ||
|
||||||
|
(mi_page_is_huge(page) && mi_page_queue_is_huge(queue)) ||
|
||||||
|
(mi_page_is_in_full(page) && mi_page_queue_is_full(queue)));
|
||||||
|
|
||||||
|
mi_page_set_in_full(page, mi_page_queue_is_full(queue));
|
||||||
|
|
||||||
|
page->prev = queue->last;
|
||||||
|
page->next = NULL;
|
||||||
|
if (queue->last != NULL) {
|
||||||
|
mi_assert_internal(queue->last->next == NULL);
|
||||||
|
queue->last->next = page;
|
||||||
|
queue->last = page;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
queue->first = queue->last = page;
|
||||||
|
}
|
||||||
|
|
||||||
|
// update direct
|
||||||
|
if (queue->first == page) {
|
||||||
|
mi_heap_queue_first_update(heap, queue);
|
||||||
|
}
|
||||||
|
heap->page_count++;
|
||||||
|
}
|
||||||
|
|
||||||
static void mi_page_queue_move_to_front(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_t* page) {
|
static void mi_page_queue_move_to_front(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_t* page) {
|
||||||
mi_assert_internal(mi_page_heap(page) == heap);
|
mi_assert_internal(mi_page_heap(page) == heap);
|
||||||
mi_assert_internal(mi_page_queue_contains(queue, page));
|
mi_assert_internal(mi_page_queue_contains(queue, page));
|
||||||
|
@ -344,7 +372,7 @@ static void mi_page_queue_enqueue_from(mi_page_queue_t* to, mi_page_queue_t* fro
|
||||||
|
|
||||||
static void mi_page_queue_enqueue_from_full(mi_page_queue_t* to, mi_page_queue_t* from, mi_page_t* page) {
|
static void mi_page_queue_enqueue_from_full(mi_page_queue_t* to, mi_page_queue_t* from, mi_page_t* page) {
|
||||||
// note: we could insert at the front to increase reuse, but it slows down certain benchmarks (like `alloc-test`)
|
// note: we could insert at the front to increase reuse, but it slows down certain benchmarks (like `alloc-test`)
|
||||||
mi_page_queue_enqueue_from_ex(to, from, false /* enqueue at the end of the `to` queue? */, page);
|
mi_page_queue_enqueue_from_ex(to, from, true /* enqueue at the end of the `to` queue? */, page);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Only called from `mi_heap_absorb`.
|
// Only called from `mi_heap_absorb`.
|
||||||
|
|
|
@ -274,7 +274,7 @@ void _mi_heap_page_reclaim(mi_heap_t* heap, mi_page_t* page)
|
||||||
mi_page_set_heap(page,heap);
|
mi_page_set_heap(page,heap);
|
||||||
_mi_page_free_collect(page, false); // ensure used count is up to date
|
_mi_page_free_collect(page, false); // ensure used count is up to date
|
||||||
mi_page_queue_t* pq = mi_heap_page_queue_of(heap, page);
|
mi_page_queue_t* pq = mi_heap_page_queue_of(heap, page);
|
||||||
mi_page_queue_push(heap, pq, page);
|
mi_page_queue_push_at_end(heap, pq, page);
|
||||||
mi_assert_expensive(_mi_page_is_valid(page));
|
mi_assert_expensive(_mi_page_is_valid(page));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -807,8 +807,11 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p
|
||||||
page_candidate = page;
|
page_candidate = page;
|
||||||
candidate_count = 0;
|
candidate_count = 0;
|
||||||
}
|
}
|
||||||
else if (/* !mi_page_is_expandable(page) && */ page->used >= page_candidate->used) {
|
else if (mi_page_all_free(page_candidate)) {
|
||||||
if (mi_page_all_free(page_candidate)) { _mi_page_free(page_candidate, pq); }
|
_mi_page_free(page_candidate, pq);
|
||||||
|
page_candidate = page;
|
||||||
|
}
|
||||||
|
else if (page->used >= page_candidate->used) {
|
||||||
page_candidate = page;
|
page_candidate = page;
|
||||||
}
|
}
|
||||||
// if we find a non-expandable candidate, or searched for N pages, return with the best candidate
|
// if we find a non-expandable candidate, or searched for N pages, return with the best candidate
|
||||||
|
|
|
@ -46,7 +46,7 @@ static int SCALE = 100;
|
||||||
static int ITER = 50;
|
static int ITER = 50;
|
||||||
#else
|
#else
|
||||||
static int THREADS = 32; // more repeatable if THREADS <= #processors
|
static int THREADS = 32; // more repeatable if THREADS <= #processors
|
||||||
static int SCALE = 25; // scaling factor
|
static int SCALE = 50; // scaling factor
|
||||||
static int ITER = 50; // N full iterations destructing and re-creating all threads
|
static int ITER = 50; // N full iterations destructing and re-creating all threads
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -54,7 +54,7 @@ static int ITER = 50; // N full iterations destructing and re-creating a
|
||||||
|
|
||||||
#define STRESS // undefine for leak test
|
#define STRESS // undefine for leak test
|
||||||
|
|
||||||
static bool allow_large_objects = true; // allow very large objects? (set to `true` if SCALE>100)
|
static bool allow_large_objects = false; // allow very large objects? (set to `true` if SCALE>100)
|
||||||
static size_t use_one_size = 0; // use single object size of `N * sizeof(uintptr_t)`?
|
static size_t use_one_size = 0; // use single object size of `N * sizeof(uintptr_t)`?
|
||||||
|
|
||||||
static bool main_participates = false; // main thread participates as a worker too
|
static bool main_participates = false; // main thread participates as a worker too
|
||||||
|
|
Loading…
Add table
Reference in a new issue