consolidate bit scan operations

This commit is contained in:
daan 2020-09-08 09:27:57 -07:00
parent c7272afa9a
commit 30b993ecf3
4 changed files with 113 additions and 96 deletions

View file

@ -107,7 +107,6 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page); // callback fro
size_t _mi_bin_size(uint8_t bin); // for stats
uint8_t _mi_bin(size_t size); // for stats
uint8_t _mi_bsr(uintptr_t x); // bit-scan-right, used on BSD in "os.c"
// "heap.c"
void _mi_heap_destroy_pages(mi_heap_t* heap);
@ -744,5 +743,107 @@ static inline uintptr_t _mi_thread_id(void) mi_attr_noexcept {
}
#endif
// -----------------------------------------------------------------------
// Count bits: trailing or leading zeros (with MI_INTPTR_BITS on all zero)
// -----------------------------------------------------------------------
#if defined(__GNUC__)
#include <limits.h> // LONG_MAX
#define MI_HAVE_FAST_BITSCAN
static inline size_t mi_clz(uintptr_t x) {
if (x==0) return MI_INTPTR_BITS;
#if (INTPTR_MAX == LONG_MAX)
return __builtin_clzl(x);
#else
return __builtin_clzll(x);
#endif
}
static inline size_t mi_ctz(uintptr_t x) {
if (x==0) return MI_INTPTR_BITS;
#if (INTPTR_MAX == LONG_MAX)
return __builtin_ctzl(x);
#else
return __builtin_ctzll(x);
#endif
}
#elif defined(_MSC_VER)
#define MI_HAVE_FAST_BITSCAN
static inline size_t mi_clz(uintptr_t x) {
if (x==0) return MI_INTPTR_BITS;
unsigned long idx;
#if (INTPTR_MAX == LONG_MAX)
_BitScanReverse(&idx, x);
#else
_BitScanReverse64(&idx, x);
#endif
return ((MI_INTPTR_BITS - 1) - idx);
}
static inline size_t mi_ctz(uintptr_t x) {
if (x==0) return MI_INTPTR_BITS;
unsigned long idx;
#if (INTPTR_MAX == LONG_MAX)
_BitScanForward(&idx, x);
#else
_BitScanForward64(&idx, x);
#endif
return idx;
}
#else
static inline size_t mi_ctz32(uint32_t x) {
// de Bruijn multiplication, see <http://supertech.csail.mit.edu/papers/debruijn.pdf>
static const unsigned char debruijn[32] = {
0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
};
if (x==0) return 32;
return debruijn[((x & -(int32_t)x) * 0x077CB531UL) >> 27];
}
static inline size_t mi_clz32(uint32_t x) {
// de Bruijn multiplication, see <http://supertech.csail.mit.edu/papers/debruijn.pdf>
static const uint8_t debruijn[32] = {
31, 22, 30, 21, 18, 10, 29, 2, 20, 17, 15, 13, 9, 6, 28, 1,
23, 19, 11, 3, 16, 14, 7, 24, 12, 4, 8, 25, 5, 26, 27, 0
};
if (x==0) return 32;
x |= x >> 1;
x |= x >> 2;
x |= x >> 4;
x |= x >> 8;
x |= x >> 16;
return debruijn[(uint32_t)(x * 0x07C4ACDDUL) >> 27];
}
static inline size_t mi_clz(uintptr_t x) {
if (x==0) return MI_INTPTR_BITS;
#if (MI_INTPTR_BITS <= 32)
return mi_clz32((uint32_t)x);
#else
size_t count = mi_clz32((uint32_t)(x >> 32));
if (count < 32) return count;
return (32 + mi_clz32((uint32_t)x));
#endif
}
static inline size_t mi_ctz(uintptr_t x) {
if (x==0) return MI_INTPTR_BITS;
#if (MI_INTPTR_BITS <= 32)
return mi_ctz32((uint32_t)x);
#else
size_t count = mi_ctz32((uint32_t)x);
if (count < 32) return count;
return (32 + mi_ctz32((uint32_t)(x>>32)));
#endif
}
#endif
// "bit scan reverse": Return index of the highest bit (or MI_INTPTR_BITS if `x` is zero)
static inline size_t mi_bsr(uintptr_t x) {
return (x==0 ? MI_INTPTR_BITS : MI_INTPTR_BITS - 1 - mi_clz(x));
}
#endif

View file

@ -68,47 +68,6 @@ static inline uintptr_t mi_bitmap_mask_(size_t count, size_t bitidx) {
}
/* -----------------------------------------------------------
Use bit scan forward/reverse to quickly find the first zero bit if it is available
----------------------------------------------------------- */
#if defined(_MSC_VER)
#define MI_HAVE_BITSCAN
#include <intrin.h>
#ifndef MI_64
#if MI_INTPTR_SIZE==8
#define MI_64(f) f##64
#else
#define MI_64(f) f
#endif
#endif
static inline size_t mi_bsf(uintptr_t x) {
if (x==0) return 8*MI_INTPTR_SIZE;
DWORD idx;
MI_64(_BitScanForward)(&idx, x);
return idx;
}
static inline size_t mi_bsr(uintptr_t x) {
if (x==0) return 8*MI_INTPTR_SIZE;
DWORD idx;
MI_64(_BitScanReverse)(&idx, x);
return idx;
}
#elif defined(__GNUC__) || defined(__clang__)
#include <limits.h> // LONG_MAX
#define MI_HAVE_BITSCAN
#if (INTPTR_MAX == LONG_MAX)
# define MI_L(x) x##l
#else
# define MI_L(x) x##ll
#endif
static inline size_t mi_bsf(uintptr_t x) {
return (x==0 ? 8*MI_INTPTR_SIZE : MI_L(__builtin_ctz)(x));
}
static inline size_t mi_bsr(uintptr_t x) {
return (x==0 ? 8*MI_INTPTR_SIZE : (8*MI_INTPTR_SIZE - 1) - MI_L(__builtin_clz)(x));
}
#endif
/* -----------------------------------------------------------
Claim a bit sequence atomically
@ -148,8 +107,8 @@ static inline bool mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx
const uintptr_t mask = mi_bitmap_mask_(count, 0);
const size_t bitidx_max = MI_BITMAP_FIELD_BITS - count;
#ifdef MI_HAVE_BITSCAN
size_t bitidx = mi_bsf(~map); // quickly find the first zero bit if possible
#ifdef MI_HAVE_FAST_BITSCAN
size_t bitidx = mi_ctz(~map); // quickly find the first zero bit if possible
#else
size_t bitidx = 0; // otherwise start at 0
#endif
@ -157,7 +116,8 @@ static inline bool mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx
// scan linearly for a free range of zero bits
while (bitidx <= bitidx_max) {
if ((map & m) == 0) { // are the mask bits free at bitidx?
const uintptr_t mapm = map & m;
if (mapm == 0) { // are the mask bits free at bitidx?
mi_assert_internal((m >> bitidx) == mask); // no overflow?
const uintptr_t newmap = map | m;
mi_assert_internal((newmap^map) >> bitidx == mask);
@ -173,8 +133,8 @@ static inline bool mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx
}
else {
// on to the next bit range
#ifdef MI_HAVE_BITSCAN
const size_t shift = (count == 1 ? 1 : mi_bsr(map & m) - bitidx + 1);
#ifdef MI_HAVE_FAST_BITSCAN
const size_t shift = (count == 1 ? 1 : mi_bsr(mapm) - bitidx + 1);
mi_assert_internal(shift > 0 && shift <= count);
#else
const size_t shift = 1;
@ -270,8 +230,8 @@ static inline bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, siz
// check initial trailing zeros
_Atomic(uintptr_t)* field = &bitmap[idx];
uintptr_t map = mi_atomic_load_relaxed(field);
const uintptr_t bitidx = (map==0 ? 0 : mi_bsr(map) + 1);
const size_t initial = MI_BITMAP_FIELD_BITS - bitidx; // count of initial zeros starting at idx
const size_t initial = mi_clz(map); // count of initial zeros starting at idx
mi_assert_internal(initial >= 0 && initial <= MI_BITMAP_FIELD_BITS);
if (initial == 0) return false;
if (initial >= count) return mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx); // no need to cross fields
if (_mi_divide_up(count - initial, MI_BITMAP_FIELD_BITS) >= (bitmap_fields - idx)) return false; // not enough entries
@ -321,7 +281,7 @@ static inline bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, siz
} while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap));
// claimed!
*bitmap_idx = mi_bitmap_index_create(idx, bitidx);
*bitmap_idx = mi_bitmap_index_create(idx, MI_BITMAP_FIELD_BITS - initial);
return true;
rollback:

View file

@ -358,7 +358,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
int fd = -1;
#if defined(MAP_ALIGNED) // BSD
if (try_alignment > 0) {
size_t n = _mi_bsr(try_alignment);
size_t n = mi_bsr(try_alignment);
if (((size_t)1 << n) == try_alignment && n >= 12 && n <= 30) { // alignment is a power of 2 and 4096 <= alignment <= 1GiB
flags |= MAP_ALIGNED(n);
}

View file

@ -49,50 +49,6 @@ static inline bool mi_page_queue_is_special(const mi_page_queue_t* pq) {
Bins
----------------------------------------------------------- */
// Bit scan reverse: return the index of the highest bit.
static inline uint8_t mi_bsr32(uint32_t x);
#if defined(_MSC_VER)
#include <intrin.h>
static inline uint8_t mi_bsr32(uint32_t x) {
uint32_t idx;
_BitScanReverse((DWORD*)&idx, x);
return (uint8_t)idx;
}
#elif defined(__GNUC__) || defined(__clang__)
static inline uint8_t mi_bsr32(uint32_t x) {
return (31 - __builtin_clz(x));
}
#else
static inline uint8_t mi_bsr32(uint32_t x) {
// de Bruijn multiplication, see <http://supertech.csail.mit.edu/papers/debruijn.pdf>
static const uint8_t debruijn[32] = {
31, 0, 22, 1, 28, 23, 18, 2, 29, 26, 24, 10, 19, 7, 3, 12,
30, 21, 27, 17, 25, 9, 6, 11, 20, 16, 8, 5, 15, 4, 14, 13,
};
x |= x >> 1;
x |= x >> 2;
x |= x >> 4;
x |= x >> 8;
x |= x >> 16;
x++;
return debruijn[(x*0x076be629) >> 27];
}
#endif
// Bit scan reverse: return the index of the highest bit.
uint8_t _mi_bsr(uintptr_t x) {
if (x == 0) return 0;
#if MI_INTPTR_SIZE==8
uint32_t hi = (x >> 32);
return (hi == 0 ? mi_bsr32((uint32_t)x) : 32 + mi_bsr32(hi));
#elif MI_INTPTR_SIZE==4
return mi_bsr32(x);
#else
# error "define bsr for non-32 or 64-bit platforms"
#endif
}
// Return the bin for a given field size.
// Returns MI_BIN_HUGE if the size is too large.
// We use `wsize` for the size in "machine word sizes",
@ -125,7 +81,7 @@ extern inline uint8_t _mi_bin(size_t size) {
#endif
wsize--;
// find the highest bit
uint8_t b = mi_bsr32((uint32_t)wsize);
uint8_t b = (uint8_t)mi_bsr(wsize); // note: wsize != 0
// and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation).
// - adjust with 3 because we use do not round the first 8 sizes
// which each get an exact bin