consolidate bit scan operations

This commit is contained in:
daan 2020-09-08 09:27:57 -07:00
parent c7272afa9a
commit 30b993ecf3
4 changed files with 113 additions and 96 deletions

View file

@ -68,47 +68,6 @@ static inline uintptr_t mi_bitmap_mask_(size_t count, size_t bitidx) {
}
/* -----------------------------------------------------------
Use bit scan forward/reverse to quickly find the first zero bit if it is available
----------------------------------------------------------- */
#if defined(_MSC_VER)
#define MI_HAVE_BITSCAN
#include <intrin.h>
#ifndef MI_64
#if MI_INTPTR_SIZE==8
#define MI_64(f) f##64
#else
#define MI_64(f) f
#endif
#endif
static inline size_t mi_bsf(uintptr_t x) {
if (x==0) return 8*MI_INTPTR_SIZE;
DWORD idx;
MI_64(_BitScanForward)(&idx, x);
return idx;
}
static inline size_t mi_bsr(uintptr_t x) {
if (x==0) return 8*MI_INTPTR_SIZE;
DWORD idx;
MI_64(_BitScanReverse)(&idx, x);
return idx;
}
#elif defined(__GNUC__) || defined(__clang__)
#include <limits.h> // LONG_MAX
#define MI_HAVE_BITSCAN
#if (INTPTR_MAX == LONG_MAX)
# define MI_L(x) x##l
#else
# define MI_L(x) x##ll
#endif
static inline size_t mi_bsf(uintptr_t x) {
return (x==0 ? 8*MI_INTPTR_SIZE : MI_L(__builtin_ctz)(x));
}
static inline size_t mi_bsr(uintptr_t x) {
return (x==0 ? 8*MI_INTPTR_SIZE : (8*MI_INTPTR_SIZE - 1) - MI_L(__builtin_clz)(x));
}
#endif
/* -----------------------------------------------------------
Claim a bit sequence atomically
@ -148,8 +107,8 @@ static inline bool mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx
const uintptr_t mask = mi_bitmap_mask_(count, 0);
const size_t bitidx_max = MI_BITMAP_FIELD_BITS - count;
#ifdef MI_HAVE_BITSCAN
size_t bitidx = mi_bsf(~map); // quickly find the first zero bit if possible
#ifdef MI_HAVE_FAST_BITSCAN
size_t bitidx = mi_ctz(~map); // quickly find the first zero bit if possible
#else
size_t bitidx = 0; // otherwise start at 0
#endif
@ -157,7 +116,8 @@ static inline bool mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx
// scan linearly for a free range of zero bits
while (bitidx <= bitidx_max) {
if ((map & m) == 0) { // are the mask bits free at bitidx?
const uintptr_t mapm = map & m;
if (mapm == 0) { // are the mask bits free at bitidx?
mi_assert_internal((m >> bitidx) == mask); // no overflow?
const uintptr_t newmap = map | m;
mi_assert_internal((newmap^map) >> bitidx == mask);
@ -173,8 +133,8 @@ static inline bool mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx
}
else {
// on to the next bit range
#ifdef MI_HAVE_BITSCAN
const size_t shift = (count == 1 ? 1 : mi_bsr(map & m) - bitidx + 1);
#ifdef MI_HAVE_FAST_BITSCAN
const size_t shift = (count == 1 ? 1 : mi_bsr(mapm) - bitidx + 1);
mi_assert_internal(shift > 0 && shift <= count);
#else
const size_t shift = 1;
@ -270,8 +230,8 @@ static inline bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, siz
// check initial trailing zeros
_Atomic(uintptr_t)* field = &bitmap[idx];
uintptr_t map = mi_atomic_load_relaxed(field);
const uintptr_t bitidx = (map==0 ? 0 : mi_bsr(map) + 1);
const size_t initial = MI_BITMAP_FIELD_BITS - bitidx; // count of initial zeros starting at idx
const size_t initial = mi_clz(map); // count of initial zeros starting at idx
mi_assert_internal(initial >= 0 && initial <= MI_BITMAP_FIELD_BITS);
if (initial == 0) return false;
if (initial >= count) return mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx); // no need to cross fields
if (_mi_divide_up(count - initial, MI_BITMAP_FIELD_BITS) >= (bitmap_fields - idx)) return false; // not enough entries
@ -321,7 +281,7 @@ static inline bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, siz
} while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap));
// claimed!
*bitmap_idx = mi_bitmap_index_create(idx, bitidx);
*bitmap_idx = mi_bitmap_index_create(idx, MI_BITMAP_FIELD_BITS - initial);
return true;
rollback:

View file

@ -358,7 +358,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
int fd = -1;
#if defined(MAP_ALIGNED) // BSD
if (try_alignment > 0) {
size_t n = _mi_bsr(try_alignment);
size_t n = mi_bsr(try_alignment);
if (((size_t)1 << n) == try_alignment && n >= 12 && n <= 30) { // alignment is a power of 2 and 4096 <= alignment <= 1GiB
flags |= MAP_ALIGNED(n);
}

View file

@ -49,50 +49,6 @@ static inline bool mi_page_queue_is_special(const mi_page_queue_t* pq) {
Bins
----------------------------------------------------------- */
// Bit scan reverse: return the index of the highest bit.
static inline uint8_t mi_bsr32(uint32_t x);
#if defined(_MSC_VER)
#include <intrin.h>
static inline uint8_t mi_bsr32(uint32_t x) {
uint32_t idx;
_BitScanReverse((DWORD*)&idx, x);
return (uint8_t)idx;
}
#elif defined(__GNUC__) || defined(__clang__)
static inline uint8_t mi_bsr32(uint32_t x) {
return (31 - __builtin_clz(x));
}
#else
static inline uint8_t mi_bsr32(uint32_t x) {
// de Bruijn multiplication, see <http://supertech.csail.mit.edu/papers/debruijn.pdf>
static const uint8_t debruijn[32] = {
31, 0, 22, 1, 28, 23, 18, 2, 29, 26, 24, 10, 19, 7, 3, 12,
30, 21, 27, 17, 25, 9, 6, 11, 20, 16, 8, 5, 15, 4, 14, 13,
};
x |= x >> 1;
x |= x >> 2;
x |= x >> 4;
x |= x >> 8;
x |= x >> 16;
x++;
return debruijn[(x*0x076be629) >> 27];
}
#endif
// Bit scan reverse: return the index of the highest bit.
uint8_t _mi_bsr(uintptr_t x) {
if (x == 0) return 0;
#if MI_INTPTR_SIZE==8
uint32_t hi = (x >> 32);
return (hi == 0 ? mi_bsr32((uint32_t)x) : 32 + mi_bsr32(hi));
#elif MI_INTPTR_SIZE==4
return mi_bsr32(x);
#else
# error "define bsr for non-32 or 64-bit platforms"
#endif
}
// Return the bin for a given field size.
// Returns MI_BIN_HUGE if the size is too large.
// We use `wsize` for the size in "machine word sizes",
@ -125,7 +81,7 @@ extern inline uint8_t _mi_bin(size_t size) {
#endif
wsize--;
// find the highest bit
uint8_t b = mi_bsr32((uint32_t)wsize);
uint8_t b = (uint8_t)mi_bsr(wsize); // note: wsize != 0
// and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation).
// - adjust with 3 because we use do not round the first 8 sizes
// which each get an exact bin