From f8d04dc2bc42efcae8f6012f2ccdef8c3056801c Mon Sep 17 00:00:00 2001 From: daanx Date: Sat, 30 Nov 2024 12:41:11 -0800 Subject: [PATCH] compile with clang and gcc --- CMakeLists.txt | 2 -- include/mimalloc/bits.h | 4 +-- include/mimalloc/internal.h | 2 +- include/mimalloc/types.h | 2 +- src/alloc-aligned.c | 6 ++-- src/arena.c | 12 ++++---- src/bitmap.c | 56 ++++++++++++++++++------------------- src/bitmap.h | 2 +- src/heap.c | 6 ++-- src/init.c | 7 +++-- src/os.c | 2 +- src/page-map.c | 2 +- src/page.c | 10 +++---- src/prim/unix/prim.c | 2 +- src/static.c | 2 -- 15 files changed, 57 insertions(+), 60 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5cb05840..04b09252 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -57,8 +57,6 @@ set(mi_sources src/page.c src/page-map.c src/random.c - src/segment.c - src/segment-map.c src/stats.c src/prim/prim.c) diff --git a/include/mimalloc/bits.h b/include/mimalloc/bits.h index d6695a00..79034c2f 100644 --- a/include/mimalloc/bits.h +++ b/include/mimalloc/bits.h @@ -291,7 +291,7 @@ static inline size_t mi_rotr(size_t x, size_t r) { // The term `(-rshift)&(MI_BFIELD_BITS-1)` is written instead of `MI_BFIELD_BITS - rshift` to // avoid UB when `rshift==0`. See const unsigned int rshift = (unsigned int)(r) & (MI_SIZE_BITS-1); - return (x >> rshift) | (x << ((-rshift) & (MI_SIZE_BITS-1))); + return ((x >> rshift) | (x << ((-rshift) & (MI_SIZE_BITS-1)))); #endif } @@ -310,7 +310,7 @@ static inline size_t mi_rotl(size_t x, size_t r) { // The term `(-rshift)&(MI_BFIELD_BITS-1)` is written instead of `MI_BFIELD_BITS - rshift` to // avoid UB when `rshift==0`. See const unsigned int rshift = (unsigned int)(r) & (MI_SIZE_BITS-1); - return (x << rshift) | (x >> ((-rshift) & (MI_SIZE_BITS-1))) + return ((x << rshift) | (x >> ((-rshift) & (MI_SIZE_BITS-1)))); #endif } diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 082882bb..1c1ec2bc 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -471,7 +471,7 @@ static inline uint8_t* mi_page_area(const mi_page_t* page, size_t* size) { static inline bool mi_page_contains_address(const mi_page_t* page, const void* p) { size_t psize; uint8_t* start = mi_page_area(page, &psize); - return (start <= p && p < start + psize); + return (start <= (uint8_t*)p && (uint8_t*)p < start + psize); } static inline bool mi_page_is_in_arena(const mi_page_t* page) { diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index ac0a5fc4..cc8deeb6 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -125,7 +125,7 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_ARENA_SLICE_SIZE (MI_ZU(1) << MI_ARENA_SLICE_SHIFT) #define MI_ARENA_SLICE_ALIGN (MI_ARENA_SLICE_SIZE) -#define MI_BITMAP_CHUNK_BITS (MI_ZU(1) << MI_BITMAP_CHUNK_BITS_SHIFT) +#define MI_BITMAP_CHUNK_BITS (1 << MI_BITMAP_CHUNK_BITS_SHIFT) #define MI_ARENA_MIN_OBJ_BLOCKS (1) #define MI_ARENA_MAX_OBJ_BLOCKS (MI_BITMAP_CHUNK_BITS) // for now, cannot cross chunk boundaries diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index 43dc2d36..84f49ec6 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -59,9 +59,9 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_overalloc(mi_heap_t void* p; size_t oversize; if mi_unlikely(alignment > MI_BLOCK_ALIGNMENT_MAX) { - // use OS allocation for very large alignment and allocate inside a huge page (dedicated segment with 1 page) - // This can support alignments >= MI_SEGMENT_SIZE by ensuring the object can be aligned at a point in the - // first (and single) page such that the segment info is `MI_SEGMENT_SIZE` bytes before it (so it can be found by aligning the pointer down) + // use OS allocation for very large alignment and allocate inside a huge page (not in an arena) + // This can support alignments >= MI_PAGE_ALIGN by ensuring the object can be aligned at a point in the + // first (and single) page such that the page info is `MI_ARENA_SLICE_SIZE` bytes before it (and can be found in the _mi_page_map). if mi_unlikely(offset != 0) { // todo: cannot support offset alignment for very large alignments yet #if MI_DEBUG > 0 diff --git a/src/arena.c b/src/arena.c index 7b5256b6..b59f8ad3 100644 --- a/src/arena.c +++ b/src/arena.c @@ -652,7 +652,7 @@ static void mi_arena_schedule_purge(mi_arena_t* arena, size_t slice_index, size_ static void mi_arenas_try_purge(bool force, bool visit_all, mi_stats_t* stats); void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memid, mi_stats_t* stats) { - mi_assert_internal(size >= 0 && stats != NULL); + mi_assert_internal(size > 0 && stats != NULL); mi_assert_internal(committed_size <= size); if (p==NULL) return; if (size==0) return; @@ -675,8 +675,8 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi size_t slice_index; mi_arena_t* arena = mi_arena_from_memid(memid, &slice_index, &slice_count); mi_assert_internal(size==1); - mi_assert_internal(mi_arena_slice_start(arena,slice_index) <= p); - mi_assert_internal(mi_arena_slice_start(arena,slice_index) + mi_size_of_slices(slice_count) > p); + mi_assert_internal(mi_arena_slice_start(arena,slice_index) <= (uint8_t*)p); + mi_assert_internal(mi_arena_slice_start(arena,slice_index) + mi_size_of_slices(slice_count) > (uint8_t*)p); // checks if (arena == NULL) { _mi_error_message(EINVAL, "trying to free from an invalid arena: %p, size %zu, memid: 0x%zx\n", p, size, memid); @@ -796,7 +796,7 @@ static bool mi_arena_add(mi_arena_t* arena, mi_arena_id_t* arena_id, mi_stats_t* static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int numa_node, bool exclusive, mi_memid_t memid, mi_arena_id_t* arena_id) mi_attr_noexcept { - mi_assert(!is_large || memid.initially_committed && memid.is_pinned); + mi_assert(!is_large || (memid.initially_committed && memid.is_pinned)); mi_assert(_mi_is_aligned(start,MI_ARENA_SLICE_SIZE)); mi_assert(start!=NULL); if (start==NULL) return false; @@ -849,7 +849,7 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int mi_bitmap_init(&arena->slices_committed,true); mi_bitmap_init(&arena->slices_dirty,true); mi_bitmap_init(&arena->slices_purge,true); - for( int i = 0; i < MI_ARENA_BIN_COUNT; i++) { + for( size_t i = 0; i < MI_ARENA_BIN_COUNT; i++) { mi_bitmap_init(&arena->slices_abandoned[i],true); } @@ -924,7 +924,7 @@ static size_t mi_debug_show_bitmap(const char* prefix, const char* header, size_ for (int i = 0; i < MI_BFIELD_BITS && bit_count < slice_count; i++) { char buf[MI_BITMAP_CHUNK_BITS + 32]; _mi_memzero(buf, sizeof(buf)); mi_bitmap_chunk_t* chunk = &bitmap->chunks[i]; - for (int j = 0, k = 0; j < MI_BITMAP_CHUNK_FIELDS; j++) { + for (size_t j = 0, k = 0; j < MI_BITMAP_CHUNK_FIELDS; j++) { if (bit_count < slice_count) { mi_bfield_t bfield = chunk->bfields[j]; if (invert) bfield = ~bfield; diff --git a/src/bitmap.c b/src/bitmap.c index bb54af6b..fe44bb67 100644 --- a/src/bitmap.c +++ b/src/bitmap.c @@ -22,9 +22,9 @@ static inline size_t mi_bfield_ctz(mi_bfield_t x) { return mi_ctz(x); } -static inline size_t mi_bfield_clz(mi_bfield_t x) { - return mi_clz(x); -} +//static inline size_t mi_bfield_clz(mi_bfield_t x) { +// return mi_clz(x); +//} // find the least significant bit that is set (i.e. count trailing zero's) // return false if `x==0` (with `*idx` undefined) and true otherwise, @@ -124,11 +124,11 @@ static bool mi_bfield_atomic_is_xset_mask(mi_bit_t set, _Atomic(mi_bfield_t)*b, } // Check if a bit is set/clear -static inline bool mi_bfield_atomic_is_xset(mi_bit_t set, _Atomic(mi_bfield_t)*b, size_t idx) { - mi_assert_internal(idx < MI_BFIELD_BITS); - const mi_bfield_t mask = ((mi_bfield_t)1)<bfields[chunk_idx], &cidx)) { // find the bit-idx that is set/clear @@ -302,9 +302,9 @@ static inline bool mi_bitmap_chunk_find_and_try_clear(mi_bitmap_chunk_t* chunk, return mi_bitmap_chunk_find_and_try_xset(MI_BIT_CLEAR, chunk, pidx); } -static inline bool mi_bitmap_chunk_find_and_try_set(mi_bitmap_chunk_t* chunk, size_t* pidx) { - return mi_bitmap_chunk_find_and_try_xset(MI_BIT_SET, chunk, pidx); -} +// static inline bool mi_bitmap_chunk_find_and_try_set(mi_bitmap_chunk_t* chunk, size_t* pidx) { +// return mi_bitmap_chunk_find_and_try_xset(MI_BIT_SET, chunk, pidx); +// } /* // find least 1-bit in a chunk and try unset it atomically @@ -435,19 +435,19 @@ static inline bool mi_bitmap_chunk_find_and_try_clearN(mi_bitmap_chunk_t* chunk, // are all bits in a bitmap chunk set? -static inline bool mi_bitmap_chunk_all_are_set(mi_bitmap_chunk_t* chunk) { - #if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256) - const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields); - return _mm256_test_all_ones(vec); - #else - // written like this for vectorization - mi_bfield_t x = chunk->bfields[0]; - for(int i = 1; i < MI_BITMAP_CHUNK_FIELDS; i++) { - x = x & chunk->bfields[i]; - } - return (~x == 0); - #endif -} +// static inline bool mi_bitmap_chunk_all_are_set(mi_bitmap_chunk_t* chunk) { +// #if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256) +// const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields); +// return _mm256_test_all_ones(vec); +// #else +// // written like this for vectorization +// mi_bfield_t x = chunk->bfields[0]; +// for(int i = 1; i < MI_BITMAP_CHUNK_FIELDS; i++) { +// x = x & chunk->bfields[i]; +// } +// return (~x == 0); +// #endif +// } // are all bits in a bitmap chunk clear? static bool mi_bitmap_chunk_all_are_clear(mi_bitmap_chunk_t* chunk) { @@ -594,11 +594,11 @@ bool mi_bitmap_is_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) mi_assert_internal(n>0); mi_assert_internal(n<=MI_BITMAP_CHUNK_BITS); mi_assert_internal(idx + n <= MI_BITMAP_MAX_BITS); - + const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS; const size_t cidx = idx % MI_BITMAP_CHUNK_BITS; mi_assert_internal(cidx + n <= MI_BITMAP_CHUNK_BITS); // don't cross chunks (for now) - mi_assert_internal(chunk_idx < MI_BFIELD_BITS); + mi_assert_internal(chunk_idx < MI_BFIELD_BITS); if (cidx + n > MI_BITMAP_CHUNK_BITS) { n = MI_BITMAP_CHUNK_BITS - cidx; } // paranoia return mi_bitmap_chunk_is_xsetN(set, &bitmap->chunks[chunk_idx], cidx, n); diff --git a/src/bitmap.h b/src/bitmap.h index fcadc213..1a180924 100644 --- a/src/bitmap.h +++ b/src/bitmap.h @@ -22,7 +22,7 @@ typedef size_t mi_bfield_t; #define MI_BFIELD_BITS (1 << MI_BFIELD_BITS_SHIFT) #define MI_BFIELD_SIZE (MI_BFIELD_BITS/8) #define MI_BFIELD_BITS_MOD_MASK (MI_BFIELD_BITS - 1) -#define MI_BFIELD_LO_BIT8 ((~(mi_bfield_t(0)))/0xFF) // 0x01010101 .. +#define MI_BFIELD_LO_BIT8 (((~(mi_bfield_t)0))/0xFF) // 0x01010101 .. #define MI_BFIELD_HI_BIT8 (MI_BFIELD_LO_BIT8 << 7) // 0x80808080 .. #define MI_BITMAP_CHUNK_FIELDS (MI_BITMAP_CHUNK_BITS / MI_BFIELD_BITS) diff --git a/src/heap.c b/src/heap.c index 8ee66055..4da3b449 100644 --- a/src/heap.c +++ b/src/heap.c @@ -31,7 +31,7 @@ static bool mi_heap_visit_pages(mi_heap_t* heap, heap_page_visitor_fun* fn, void size_t count = 0; #endif - for (size_t i = 0; i <= MI_BIN_FULL; i++) { + for (int i = 0; i <= MI_BIN_FULL; i++) { mi_page_queue_t* pq = &heap->pages[i]; mi_page_t* page = pq->first; while(page != NULL) { @@ -54,7 +54,7 @@ static bool mi_heap_page_is_valid(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_ MI_UNUSED(arg1); MI_UNUSED(arg2); MI_UNUSED(pq); - mi_assert_internal(mi_page_heap(page) == heap); + mi_assert_internal(mi_page_heap(page) == heap); mi_assert_expensive(_mi_page_is_valid(page)); return true; } @@ -419,7 +419,7 @@ static void mi_heap_absorb(mi_heap_t* heap, mi_heap_t* from) { // so threads may do delayed frees in either heap for a while. // note: appending waits for each page to not be in the `MI_DELAYED_FREEING` state // so after this only the new heap will get delayed frees - for (size_t i = 0; i <= MI_BIN_FULL; i++) { + for (int i = 0; i <= MI_BIN_FULL; i++) { mi_page_queue_t* pq = &heap->pages[i]; mi_page_queue_t* append = &from->pages[i]; size_t pcount = _mi_page_queue_append(heap, pq, append); diff --git a/src/init.c b/src/init.c index 40bc5c4a..1456cb4a 100644 --- a/src/init.c +++ b/src/init.c @@ -33,7 +33,7 @@ const mi_page_t _mi_page_empty = { MI_ATOMIC_VAR_INIT(0), // xheap MI_ATOMIC_VAR_INIT(0), // xthread_id NULL, NULL, // next, prev - { { NULL, 0}, false, false, false, MI_MEM_NONE } // memid + { {{ NULL, 0}}, false, false, false, MI_MEM_NONE } // memid }; #define MI_PAGE_EMPTY() ((mi_page_t*)&_mi_page_empty) @@ -396,7 +396,8 @@ void _mi_tld_init(mi_tld_t* tld, mi_heap_t* bheap) { tld->heap_backing = bheap; tld->heaps = NULL; tld->subproc = &mi_subproc_default; - tld->tseq = 0; // mi_atomic_add_acq_rel(&mi_tcount, 1); + tld->tseq = 0; + mi_atomic_add_acq_rel(&mi_tcount, 1); tld->os.stats = &tld->stats; } @@ -433,7 +434,7 @@ static bool _mi_thread_heap_done(mi_heap_t* heap) { _mi_stats_done(&heap->tld->stats); // free if not the main thread - if (heap != &_mi_heap_main) { + if (heap != &_mi_heap_main) { mi_thread_data_free((mi_thread_data_t*)heap); } else { diff --git a/src/os.c b/src/os.c index da41d152..110d7ec6 100644 --- a/src/os.c +++ b/src/os.c @@ -573,7 +573,7 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) { } #endif -// Allocate MI_SEGMENT_SIZE aligned huge pages +// Allocate MI_ARENA_SLICE_ALIGN aligned huge pages void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_msecs, size_t* pages_reserved, size_t* psize, mi_memid_t* memid) { *memid = _mi_memid_none(); if (psize != NULL) *psize = 0; diff --git a/src/page-map.c b/src/page-map.c index c7d5e8b4..07433aa3 100644 --- a/src/page-map.c +++ b/src/page-map.c @@ -107,4 +107,4 @@ mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_att else { return false; } -} \ No newline at end of file +} diff --git a/src/page.c b/src/page.c index f8ef641e..d91b9123 100644 --- a/src/page.c +++ b/src/page.c @@ -250,13 +250,13 @@ void _mi_page_free_collect(mi_page_t* page, bool force) { // called from segments when reclaiming abandoned pages void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) { mi_page_set_heap(page, heap); - _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, true); // override never (after heap is set) + _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, true); // override never (after heap is set) _mi_page_free_collect(page, false); // ensure used count is up to date mi_assert_expensive(mi_page_is_valid_init(page)); mi_assert_internal(mi_page_heap(page) == heap); mi_assert_internal(mi_page_thread_free_flag(page) != MI_NEVER_DELAYED_FREE); - + // TODO: push on full queue immediately if it is full? mi_page_queue_t* pq = mi_page_queue(heap, mi_page_block_size(page)); mi_page_queue_push(heap, pq, page); @@ -686,7 +686,7 @@ void _mi_page_init(mi_heap_t* heap, mi_page_t* page) { mi_assert_expensive(mi_mem_is_zero(page_start, page_size)); } #endif - + mi_assert_internal(page->capacity == 0); mi_assert_internal(page->free == NULL); mi_assert_internal(page->used == 0); @@ -928,8 +928,8 @@ static mi_page_t* mi_find_page(mi_heap_t* heap, size_t size, size_t huge_alignme // Generic allocation routine if the fast path (`alloc.c:mi_page_malloc`) does not succeed. // Note: in debug mode the size includes MI_PADDING_SIZE and might have overflowed. -// The `huge_alignment` is normally 0 but is set to a multiple of MI_SEGMENT_SIZE for -// very large requested alignments in which case we use a huge segment. +// The `huge_alignment` is normally 0 but is set to a multiple of MI_SLICE_SIZE for +// very large requested alignments in which case we use a huge singleton page. void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept { mi_assert_internal(heap != NULL); diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c index 59421e52..5a4440c3 100644 --- a/src/prim/unix/prim.c +++ b/src/prim/unix/prim.c @@ -478,7 +478,7 @@ static long mi_prim_mbind(void* start, unsigned long len, unsigned long mode, co int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) { bool is_large = true; *is_zero = true; - *addr = unix_mmap(hint_addr, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large); + *addr = unix_mmap(hint_addr, size, MI_ARENA_SLICE_ALIGN, PROT_READ | PROT_WRITE, true, true, &is_large); if (*addr != NULL && numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes unsigned long numa_mask = (1UL << numa_node); // TODO: does `mbind` work correctly for huge OS pages? should we diff --git a/src/static.c b/src/static.c index b34d5d42..0a8fa447 100644 --- a/src/static.c +++ b/src/static.c @@ -33,8 +33,6 @@ terms of the MIT license. A copy of the license can be found in the file #include "page.c" // includes page-queue.c #include "page-map.c" #include "random.c" -#include "segment.c" -#include "segment-map.c" #include "stats.c" #include "prim/prim.c" #if MI_OSX_ZONE