mirror of
https://github.com/microsoft/mimalloc.git
synced 2025-05-04 22:49:32 +03:00
consolidate bit scan operations
This commit is contained in:
parent
c7272afa9a
commit
30b993ecf3
4 changed files with 113 additions and 96 deletions
|
@ -107,7 +107,6 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page); // callback fro
|
|||
|
||||
size_t _mi_bin_size(uint8_t bin); // for stats
|
||||
uint8_t _mi_bin(size_t size); // for stats
|
||||
uint8_t _mi_bsr(uintptr_t x); // bit-scan-right, used on BSD in "os.c"
|
||||
|
||||
// "heap.c"
|
||||
void _mi_heap_destroy_pages(mi_heap_t* heap);
|
||||
|
@ -744,5 +743,107 @@ static inline uintptr_t _mi_thread_id(void) mi_attr_noexcept {
|
|||
}
|
||||
#endif
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Count bits: trailing or leading zeros (with MI_INTPTR_BITS on all zero)
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
#if defined(__GNUC__)
|
||||
|
||||
#include <limits.h> // LONG_MAX
|
||||
#define MI_HAVE_FAST_BITSCAN
|
||||
static inline size_t mi_clz(uintptr_t x) {
|
||||
if (x==0) return MI_INTPTR_BITS;
|
||||
#if (INTPTR_MAX == LONG_MAX)
|
||||
return __builtin_clzl(x);
|
||||
#else
|
||||
return __builtin_clzll(x);
|
||||
#endif
|
||||
}
|
||||
static inline size_t mi_ctz(uintptr_t x) {
|
||||
if (x==0) return MI_INTPTR_BITS;
|
||||
#if (INTPTR_MAX == LONG_MAX)
|
||||
return __builtin_ctzl(x);
|
||||
#else
|
||||
return __builtin_ctzll(x);
|
||||
#endif
|
||||
}
|
||||
|
||||
#elif defined(_MSC_VER)
|
||||
|
||||
#define MI_HAVE_FAST_BITSCAN
|
||||
static inline size_t mi_clz(uintptr_t x) {
|
||||
if (x==0) return MI_INTPTR_BITS;
|
||||
unsigned long idx;
|
||||
#if (INTPTR_MAX == LONG_MAX)
|
||||
_BitScanReverse(&idx, x);
|
||||
#else
|
||||
_BitScanReverse64(&idx, x);
|
||||
#endif
|
||||
return ((MI_INTPTR_BITS - 1) - idx);
|
||||
}
|
||||
static inline size_t mi_ctz(uintptr_t x) {
|
||||
if (x==0) return MI_INTPTR_BITS;
|
||||
unsigned long idx;
|
||||
#if (INTPTR_MAX == LONG_MAX)
|
||||
_BitScanForward(&idx, x);
|
||||
#else
|
||||
_BitScanForward64(&idx, x);
|
||||
#endif
|
||||
return idx;
|
||||
}
|
||||
|
||||
#else
|
||||
static inline size_t mi_ctz32(uint32_t x) {
|
||||
// de Bruijn multiplication, see <http://supertech.csail.mit.edu/papers/debruijn.pdf>
|
||||
static const unsigned char debruijn[32] = {
|
||||
0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
|
||||
31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
|
||||
};
|
||||
if (x==0) return 32;
|
||||
return debruijn[((x & -(int32_t)x) * 0x077CB531UL) >> 27];
|
||||
}
|
||||
static inline size_t mi_clz32(uint32_t x) {
|
||||
// de Bruijn multiplication, see <http://supertech.csail.mit.edu/papers/debruijn.pdf>
|
||||
static const uint8_t debruijn[32] = {
|
||||
31, 22, 30, 21, 18, 10, 29, 2, 20, 17, 15, 13, 9, 6, 28, 1,
|
||||
23, 19, 11, 3, 16, 14, 7, 24, 12, 4, 8, 25, 5, 26, 27, 0
|
||||
};
|
||||
if (x==0) return 32;
|
||||
x |= x >> 1;
|
||||
x |= x >> 2;
|
||||
x |= x >> 4;
|
||||
x |= x >> 8;
|
||||
x |= x >> 16;
|
||||
return debruijn[(uint32_t)(x * 0x07C4ACDDUL) >> 27];
|
||||
}
|
||||
|
||||
static inline size_t mi_clz(uintptr_t x) {
|
||||
if (x==0) return MI_INTPTR_BITS;
|
||||
#if (MI_INTPTR_BITS <= 32)
|
||||
return mi_clz32((uint32_t)x);
|
||||
#else
|
||||
size_t count = mi_clz32((uint32_t)(x >> 32));
|
||||
if (count < 32) return count;
|
||||
return (32 + mi_clz32((uint32_t)x));
|
||||
#endif
|
||||
}
|
||||
static inline size_t mi_ctz(uintptr_t x) {
|
||||
if (x==0) return MI_INTPTR_BITS;
|
||||
#if (MI_INTPTR_BITS <= 32)
|
||||
return mi_ctz32((uint32_t)x);
|
||||
#else
|
||||
size_t count = mi_ctz32((uint32_t)x);
|
||||
if (count < 32) return count;
|
||||
return (32 + mi_ctz32((uint32_t)(x>>32)));
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
// "bit scan reverse": Return index of the highest bit (or MI_INTPTR_BITS if `x` is zero)
|
||||
static inline size_t mi_bsr(uintptr_t x) {
|
||||
return (x==0 ? MI_INTPTR_BITS : MI_INTPTR_BITS - 1 - mi_clz(x));
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
|
|
@ -68,47 +68,6 @@ static inline uintptr_t mi_bitmap_mask_(size_t count, size_t bitidx) {
|
|||
}
|
||||
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
Use bit scan forward/reverse to quickly find the first zero bit if it is available
|
||||
----------------------------------------------------------- */
|
||||
#if defined(_MSC_VER)
|
||||
#define MI_HAVE_BITSCAN
|
||||
#include <intrin.h>
|
||||
#ifndef MI_64
|
||||
#if MI_INTPTR_SIZE==8
|
||||
#define MI_64(f) f##64
|
||||
#else
|
||||
#define MI_64(f) f
|
||||
#endif
|
||||
#endif
|
||||
|
||||
static inline size_t mi_bsf(uintptr_t x) {
|
||||
if (x==0) return 8*MI_INTPTR_SIZE;
|
||||
DWORD idx;
|
||||
MI_64(_BitScanForward)(&idx, x);
|
||||
return idx;
|
||||
}
|
||||
static inline size_t mi_bsr(uintptr_t x) {
|
||||
if (x==0) return 8*MI_INTPTR_SIZE;
|
||||
DWORD idx;
|
||||
MI_64(_BitScanReverse)(&idx, x);
|
||||
return idx;
|
||||
}
|
||||
#elif defined(__GNUC__) || defined(__clang__)
|
||||
#include <limits.h> // LONG_MAX
|
||||
#define MI_HAVE_BITSCAN
|
||||
#if (INTPTR_MAX == LONG_MAX)
|
||||
# define MI_L(x) x##l
|
||||
#else
|
||||
# define MI_L(x) x##ll
|
||||
#endif
|
||||
static inline size_t mi_bsf(uintptr_t x) {
|
||||
return (x==0 ? 8*MI_INTPTR_SIZE : MI_L(__builtin_ctz)(x));
|
||||
}
|
||||
static inline size_t mi_bsr(uintptr_t x) {
|
||||
return (x==0 ? 8*MI_INTPTR_SIZE : (8*MI_INTPTR_SIZE - 1) - MI_L(__builtin_clz)(x));
|
||||
}
|
||||
#endif
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
Claim a bit sequence atomically
|
||||
|
@ -148,8 +107,8 @@ static inline bool mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx
|
|||
const uintptr_t mask = mi_bitmap_mask_(count, 0);
|
||||
const size_t bitidx_max = MI_BITMAP_FIELD_BITS - count;
|
||||
|
||||
#ifdef MI_HAVE_BITSCAN
|
||||
size_t bitidx = mi_bsf(~map); // quickly find the first zero bit if possible
|
||||
#ifdef MI_HAVE_FAST_BITSCAN
|
||||
size_t bitidx = mi_ctz(~map); // quickly find the first zero bit if possible
|
||||
#else
|
||||
size_t bitidx = 0; // otherwise start at 0
|
||||
#endif
|
||||
|
@ -157,7 +116,8 @@ static inline bool mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx
|
|||
|
||||
// scan linearly for a free range of zero bits
|
||||
while (bitidx <= bitidx_max) {
|
||||
if ((map & m) == 0) { // are the mask bits free at bitidx?
|
||||
const uintptr_t mapm = map & m;
|
||||
if (mapm == 0) { // are the mask bits free at bitidx?
|
||||
mi_assert_internal((m >> bitidx) == mask); // no overflow?
|
||||
const uintptr_t newmap = map | m;
|
||||
mi_assert_internal((newmap^map) >> bitidx == mask);
|
||||
|
@ -173,8 +133,8 @@ static inline bool mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx
|
|||
}
|
||||
else {
|
||||
// on to the next bit range
|
||||
#ifdef MI_HAVE_BITSCAN
|
||||
const size_t shift = (count == 1 ? 1 : mi_bsr(map & m) - bitidx + 1);
|
||||
#ifdef MI_HAVE_FAST_BITSCAN
|
||||
const size_t shift = (count == 1 ? 1 : mi_bsr(mapm) - bitidx + 1);
|
||||
mi_assert_internal(shift > 0 && shift <= count);
|
||||
#else
|
||||
const size_t shift = 1;
|
||||
|
@ -270,8 +230,8 @@ static inline bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, siz
|
|||
// check initial trailing zeros
|
||||
_Atomic(uintptr_t)* field = &bitmap[idx];
|
||||
uintptr_t map = mi_atomic_load_relaxed(field);
|
||||
const uintptr_t bitidx = (map==0 ? 0 : mi_bsr(map) + 1);
|
||||
const size_t initial = MI_BITMAP_FIELD_BITS - bitidx; // count of initial zeros starting at idx
|
||||
const size_t initial = mi_clz(map); // count of initial zeros starting at idx
|
||||
mi_assert_internal(initial >= 0 && initial <= MI_BITMAP_FIELD_BITS);
|
||||
if (initial == 0) return false;
|
||||
if (initial >= count) return mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx); // no need to cross fields
|
||||
if (_mi_divide_up(count - initial, MI_BITMAP_FIELD_BITS) >= (bitmap_fields - idx)) return false; // not enough entries
|
||||
|
@ -321,7 +281,7 @@ static inline bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, siz
|
|||
} while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap));
|
||||
|
||||
// claimed!
|
||||
*bitmap_idx = mi_bitmap_index_create(idx, bitidx);
|
||||
*bitmap_idx = mi_bitmap_index_create(idx, MI_BITMAP_FIELD_BITS - initial);
|
||||
return true;
|
||||
|
||||
rollback:
|
||||
|
|
2
src/os.c
2
src/os.c
|
@ -358,7 +358,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
|
|||
int fd = -1;
|
||||
#if defined(MAP_ALIGNED) // BSD
|
||||
if (try_alignment > 0) {
|
||||
size_t n = _mi_bsr(try_alignment);
|
||||
size_t n = mi_bsr(try_alignment);
|
||||
if (((size_t)1 << n) == try_alignment && n >= 12 && n <= 30) { // alignment is a power of 2 and 4096 <= alignment <= 1GiB
|
||||
flags |= MAP_ALIGNED(n);
|
||||
}
|
||||
|
|
|
@ -49,50 +49,6 @@ static inline bool mi_page_queue_is_special(const mi_page_queue_t* pq) {
|
|||
Bins
|
||||
----------------------------------------------------------- */
|
||||
|
||||
// Bit scan reverse: return the index of the highest bit.
|
||||
static inline uint8_t mi_bsr32(uint32_t x);
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#include <intrin.h>
|
||||
static inline uint8_t mi_bsr32(uint32_t x) {
|
||||
uint32_t idx;
|
||||
_BitScanReverse((DWORD*)&idx, x);
|
||||
return (uint8_t)idx;
|
||||
}
|
||||
#elif defined(__GNUC__) || defined(__clang__)
|
||||
static inline uint8_t mi_bsr32(uint32_t x) {
|
||||
return (31 - __builtin_clz(x));
|
||||
}
|
||||
#else
|
||||
static inline uint8_t mi_bsr32(uint32_t x) {
|
||||
// de Bruijn multiplication, see <http://supertech.csail.mit.edu/papers/debruijn.pdf>
|
||||
static const uint8_t debruijn[32] = {
|
||||
31, 0, 22, 1, 28, 23, 18, 2, 29, 26, 24, 10, 19, 7, 3, 12,
|
||||
30, 21, 27, 17, 25, 9, 6, 11, 20, 16, 8, 5, 15, 4, 14, 13,
|
||||
};
|
||||
x |= x >> 1;
|
||||
x |= x >> 2;
|
||||
x |= x >> 4;
|
||||
x |= x >> 8;
|
||||
x |= x >> 16;
|
||||
x++;
|
||||
return debruijn[(x*0x076be629) >> 27];
|
||||
}
|
||||
#endif
|
||||
|
||||
// Bit scan reverse: return the index of the highest bit.
|
||||
uint8_t _mi_bsr(uintptr_t x) {
|
||||
if (x == 0) return 0;
|
||||
#if MI_INTPTR_SIZE==8
|
||||
uint32_t hi = (x >> 32);
|
||||
return (hi == 0 ? mi_bsr32((uint32_t)x) : 32 + mi_bsr32(hi));
|
||||
#elif MI_INTPTR_SIZE==4
|
||||
return mi_bsr32(x);
|
||||
#else
|
||||
# error "define bsr for non-32 or 64-bit platforms"
|
||||
#endif
|
||||
}
|
||||
|
||||
// Return the bin for a given field size.
|
||||
// Returns MI_BIN_HUGE if the size is too large.
|
||||
// We use `wsize` for the size in "machine word sizes",
|
||||
|
@ -125,7 +81,7 @@ extern inline uint8_t _mi_bin(size_t size) {
|
|||
#endif
|
||||
wsize--;
|
||||
// find the highest bit
|
||||
uint8_t b = mi_bsr32((uint32_t)wsize);
|
||||
uint8_t b = (uint8_t)mi_bsr(wsize); // note: wsize != 0
|
||||
// and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation).
|
||||
// - adjust with 3 because we use do not round the first 8 sizes
|
||||
// which each get an exact bin
|
||||
|
|
Loading…
Add table
Reference in a new issue