diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index d0a77c5f..d507ca69 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -299,9 +299,9 @@ typedef struct mi_page_s { mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`) _Atomic(mi_thread_free_t) xthread_free; // list of deferred free blocks freed by other threads - _Atomic(mi_page_flags_t) xflags; // `in_full` and `has_aligned` flags + _Atomic(mi_page_flags_t) xflags; // `in_full` and `has_aligned` flags - size_t block_size; // size available in each block (always `>0`) + size_t block_size; // size available in each block (always `>0`) uint8_t* page_start; // start of the blocks mi_heaptag_t heap_tag; // tag of the owning heap, used to separate heaps by object type bool free_is_zero; // `true` if the blocks in the free list are zero initialized @@ -439,7 +439,7 @@ struct mi_heap_s { // Arena's // These are large reserved areas of memory allocated from // the OS that are managed by mimalloc to efficiently -// allocate MI_SLICE_SIZE slices of memory for the +// allocate MI_SLICE_SIZE slices of memory for the // mimalloc pages. // ------------------------------------------------------ diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index 9673334a..b1e6329c 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -38,7 +38,8 @@ static mi_decl_restrict void* mi_heap_malloc_guarded_aligned(mi_heap_t* heap, si static void* mi_heap_malloc_zero_no_guarded(mi_heap_t* heap, size_t size, bool zero) { const size_t rate = heap->guarded_sample_rate; - if (rate != 0) { heap->guarded_sample_rate = 0; } // don't write to constant heap_empty + // only write if `rate!=0` so we don't write to the constant `_mi_heap_empty` + if (rate != 0) { heap->guarded_sample_rate = 0; } void* p = _mi_heap_malloc_zero(heap, size, zero); if (rate != 0) { heap->guarded_sample_rate = rate; } return p; @@ -59,7 +60,7 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_overalloc(mi_heap_t size_t oversize; if mi_unlikely(alignment > MI_PAGE_MAX_OVERALLOC_ALIGN) { // use OS allocation for large alignments and allocate inside a singleton page (not in an arena) - // This can support alignments >= MI_PAGE_ALIGN by ensuring the object can be aligned + // This can support alignments >= MI_PAGE_ALIGN by ensuring the object can be aligned // in the first (and single) page such that the page info is `MI_PAGE_ALIGN` bytes before it (and can be found in the _mi_page_map). if mi_unlikely(offset != 0) { // todo: cannot support offset alignment for very large alignments yet diff --git a/src/arena.c b/src/arena.c index 2558165a..ab74b988 100644 --- a/src/arena.c +++ b/src/arena.c @@ -202,7 +202,7 @@ static mi_decl_noinline void* mi_arena_try_alloc_at( // set the dirty bits if (arena->memid.initially_zero) { - memid->initially_zero = mi_bitmap_setN(arena->slices_dirty, slice_index, slice_count, NULL); + memid->initially_zero = mi_bitmap_setN(arena->slices_dirty, slice_index, slice_count, NULL); } // set commit state @@ -584,7 +584,7 @@ static mi_page_t* mi_arena_page_alloc_fresh(size_t slice_count, size_t block_siz mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN)); mi_assert_internal(!os_align || _mi_is_aligned((uint8_t*)page + page_alignment, block_alignment)); - // claimed free slices: initialize the page partly + // claimed free slices: initialize the page partly if (!memid.initially_zero) { mi_track_mem_undefined(page, slice_count * MI_ARENA_SLICE_SIZE); _mi_memzero_aligned(page, sizeof(*page)); diff --git a/src/bitmap.c b/src/bitmap.c index b76dfc77..44113429 100644 --- a/src/bitmap.c +++ b/src/bitmap.c @@ -920,7 +920,7 @@ bool mi_bitmap_xset(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx) { } } -// Set/clear aligned 8-bits in the bitmap (with `(idx%8)==0`). +// Set/clear aligned 8-bits in the bitmap (with `(idx%8)==0`). // Returns `true` if atomically transitioned from 0 to 1 (or 1 to 0) static bool mi_bitmap_xset8(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx) { mi_assert_internal(idx < mi_bitmap_max_bits(bitmap)); diff --git a/src/init.c b/src/init.c index 7e3e5f86..2396f594 100644 --- a/src/init.c +++ b/src/init.c @@ -134,7 +134,7 @@ extern mi_heap_t _mi_heap_main; static mi_decl_cache_align mi_subproc_t mi_subproc_default; static mi_decl_cache_align mi_tld_t tld_main = { - 0, + 0, &_mi_heap_main, &_mi_heap_main, &mi_subproc_default, // subproc 0, // tseq @@ -241,7 +241,7 @@ mi_heap_t* _mi_heap_main_get(void) { // Thread sequence number static _Atomic(size_t) mi_tcount; -// The mimalloc thread local data +// The mimalloc thread local data mi_decl_thread mi_tld_t* mi_tld; // Allocate fresh tld diff --git a/test/main-override-static.c b/test/main-override-static.c index a8e30f69..2e7f1aca 100644 --- a/test/main-override-static.c +++ b/test/main-override-static.c @@ -43,8 +43,7 @@ int main() { // test_heap_walk(); // alloc_huge(); - mi_bins(); - + // mi_bins(); void* p1 = malloc(78); void* p2 = malloc(24); @@ -281,7 +280,7 @@ static void test_large_pages(void) { // bin size experiments // ------------------------------ -#if 1 +#if 0 #include #include #include @@ -291,6 +290,51 @@ static void test_large_pages(void) { #define MI_BIN_HUGE 100 //#define MI_ALIGN2W +// Bit scan reverse: return the index of the highest bit. +static inline uint8_t mi_bsr32(uint32_t x); + +#if defined(_MSC_VER) +//#include +#include +static inline uint8_t mi_bsr32(uint32_t x) { + uint32_t idx; + _BitScanReverse(&idx, x); + return idx; +} +#elif defined(__GNUC__) || defined(__clang__) +static inline uint8_t mi_bsr32(uint32_t x) { + return (31 - __builtin_clz(x)); +} +#else +static inline uint8_t mi_bsr32(uint32_t x) { + // de Bruijn multiplication, see + static const uint8_t debruijn[32] = { + 31, 0, 22, 1, 28, 23, 18, 2, 29, 26, 24, 10, 19, 7, 3, 12, + 30, 21, 27, 17, 25, 9, 6, 11, 20, 16, 8, 5, 15, 4, 14, 13, + }; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + x++; + return debruijn[(x*0x076be629) >> 27]; +} +#endif + + +// Bit scan reverse: return the index of the highest bit. +uint8_t _mi_bsr(uintptr_t x) { + if (x == 0) return 0; + #if MI_INTPTR_SIZE==8 + uint32_t hi = (x >> 32); + return (hi == 0 ? mi_bsr32((uint32_t)x) : 32 + mi_bsr32(hi)); + #elif MI_INTPTR_SIZE==4 + return mi_bsr32(x); + #else + # error "define bsr for non-32 or 64-bit platforms" + #endif +} static inline size_t _mi_wsize_from_size(size_t size) { return (size + sizeof(uintptr_t) - 1) / sizeof(uintptr_t); diff --git a/test/test-stress.c b/test/test-stress.c index 915c953f..96cf702d 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -57,7 +57,7 @@ static int ITER = 10; #define ALLOW_LARGE true #else static int THREADS = 32; // more repeatable if THREADS <= #processors -static int SCALE = 25; // scaling factor +static int SCALE = 50; // scaling factor static int ITER = 50; // N full iterations destructing and re-creating all threads #endif