fix 32 bit multiply in generic ctz/clz

This commit is contained in:
daanx 2024-12-09 19:43:00 -08:00
parent f28d5c7029
commit 56a1bd7f9e
2 changed files with 4 additions and 4 deletions

View file

@ -220,7 +220,7 @@ static inline size_t mi_popcount(size_t x) {
// return false if `x==0` (with `*idx` undefined) and true otherwise,
// with the `idx` is set to the bit index (`0 <= *idx < MI_BFIELD_BITS`).
static inline bool mi_bsf(size_t x, size_t* idx) {
#if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__)
#if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__) && (!defined(__clang_major__) || __clang_major__ >= 9)
// on x64 the carry flag is set on zero which gives better codegen
bool is_zero;
__asm ( "tzcnt\t%2, %1" : "=@ccc"(is_zero), "=r"(*idx) : "r"(x) : "cc" );
@ -237,7 +237,7 @@ static inline bool mi_bsf(size_t x, size_t* idx) {
// return false if `x==0` (with `*idx` undefined) and true otherwise,
// with the `idx` is set to the bit index (`0 <= *idx < MI_BFIELD_BITS`).
static inline bool mi_bsr(size_t x, size_t* idx) {
#if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__)
#if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__) && (!defined(__clang_major__) || __clang_major__ >= 9)
// on x64 the carry flag is set on zero which gives better codegen
bool is_zero;
__asm ("lzcnt\t%2, %1" : "=@ccc"(is_zero), "=r"(*idx) : "r"(x) : "cc");

View file

@ -289,7 +289,7 @@ static size_t mi_ctz_generic32(uint32_t x) {
31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
};
if (x==0) return 32;
return debruijn[((x & -(int32_t)x) * 0x077CB531UL) >> 27];
return debruijn[((x & -(int32_t)x) * (uint32_t)(0x077CB531U)) >> 27];
}
static size_t mi_clz_generic32(uint32_t x) {
@ -304,7 +304,7 @@ static size_t mi_clz_generic32(uint32_t x) {
x |= x >> 4;
x |= x >> 8;
x |= x >> 16;
return debruijn[(uint32_t)(x * 0x07C4ACDDUL) >> 27];
return debruijn[(uint32_t)(x * (uint32_t)(0x07C4ACDDU)) >> 27];
}
size_t _mi_clz_generic(size_t x) {