Skip to content

Commit

Permalink
Cleanup bitwise ops.
Browse files Browse the repository at this point in the history
  • Loading branch information
bkaradzic committed Nov 23, 2023
1 parent 837de05 commit 08baf8f
Show file tree
Hide file tree
Showing 6 changed files with 346 additions and 183 deletions.
170 changes: 167 additions & 3 deletions include/bx/inline/math.inl
Original file line number Diff line number Diff line change
Expand Up @@ -212,16 +212,180 @@ namespace bx
return pow(2.0f, _a);
}

template<>
inline BX_CONST_FUNC float log2(float _a)
{
return log(_a) * kInvLogNat2;
}

template<>
inline BX_CONST_FUNC int32_t log2(int32_t _a)
inline BX_CONSTEXPR_FUNC uint8_t countBits(uint32_t _val)
{
#if BX_COMPILER_GCC || BX_COMPILER_CLANG
return __builtin_popcount(_val);
#else
const uint32_t tmp0 = uint32_srl(_val, 1);
const uint32_t tmp1 = uint32_and(tmp0, 0x55555555);
const uint32_t tmp2 = uint32_sub(_val, tmp1);
const uint32_t tmp3 = uint32_and(tmp2, 0xc30c30c3);
const uint32_t tmp4 = uint32_srl(tmp2, 2);
const uint32_t tmp5 = uint32_and(tmp4, 0xc30c30c3);
const uint32_t tmp6 = uint32_srl(tmp2, 4);
const uint32_t tmp7 = uint32_and(tmp6, 0xc30c30c3);
const uint32_t tmp8 = uint32_add(tmp3, tmp5);
const uint32_t tmp9 = uint32_add(tmp7, tmp8);
const uint32_t tmpA = uint32_srl(tmp9, 6);
const uint32_t tmpB = uint32_add(tmp9, tmpA);
const uint32_t tmpC = uint32_srl(tmpB, 12);
const uint32_t tmpD = uint32_srl(tmpB, 24);
const uint32_t tmpE = uint32_add(tmpB, tmpC);
const uint32_t tmpF = uint32_add(tmpD, tmpE);
const uint32_t result = uint32_and(tmpF, 0x3f);

return result;
#endif // BX_COMPILER_*
}

template<>
inline BX_CONSTEXPR_FUNC uint8_t countBits(unsigned long long _val)
{
#if BX_COMPILER_GCC || BX_COMPILER_CLANG
return __builtin_popcountll(_val);
#else
const uint32_t lo = uint32_t(_val&UINT32_MAX);
const uint32_t hi = uint32_t(_val>>32);

return uint32_cntbits(lo)
+ uint32_cntbits(hi)
;
#endif // BX_COMPILER_*
}

template<>
inline BX_CONSTEXPR_FUNC uint8_t countBits(unsigned long _val)
{
return countBits<unsigned long long>(_val);
}

template<> inline BX_CONSTEXPR_FUNC uint8_t countBits(uint8_t _val) { return countBits<uint32_t>(_val); }
template<> inline BX_CONSTEXPR_FUNC uint8_t countBits(int8_t _val) { return countBits<uint8_t >(_val); }
template<> inline BX_CONSTEXPR_FUNC uint8_t countBits(uint16_t _val) { return countBits<uint32_t>(_val); }
template<> inline BX_CONSTEXPR_FUNC uint8_t countBits(int16_t _val) { return countBits<uint16_t>(_val); }
template<> inline BX_CONSTEXPR_FUNC uint8_t countBits(int32_t _val) { return countBits<uint32_t>(_val); }
template<> inline BX_CONSTEXPR_FUNC uint8_t countBits(int64_t _val) { return countBits<uint64_t>(_val); }

template<>
inline BX_CONSTEXPR_FUNC uint8_t countLeadingZeros(uint32_t _val)
{
#if BX_COMPILER_GCC || BX_COMPILER_CLANG
return 0 == _val ? 32 : __builtin_clz(_val);
#else
const uint32_t tmp0 = uint32_srl(_val, 1);
const uint32_t tmp1 = uint32_or(tmp0, _val);
const uint32_t tmp2 = uint32_srl(tmp1, 2);
const uint32_t tmp3 = uint32_or(tmp2, tmp1);
const uint32_t tmp4 = uint32_srl(tmp3, 4);
const uint32_t tmp5 = uint32_or(tmp4, tmp3);
const uint32_t tmp6 = uint32_srl(tmp5, 8);
const uint32_t tmp7 = uint32_or(tmp6, tmp5);
const uint32_t tmp8 = uint32_srl(tmp7, 16);
const uint32_t tmp9 = uint32_or(tmp8, tmp7);
const uint32_t tmpA = uint32_not(tmp9);
const uint32_t result = uint32_cntbits(tmpA);

return result;
#endif // BX_COMPILER_*
}

template<>
inline BX_CONSTEXPR_FUNC uint8_t countLeadingZeros(unsigned long long _val)
{
#if BX_COMPILER_GCC || BX_COMPILER_CLANG
return 0 == _val ? 64 : __builtin_clzll(_val);
#else
return _val & UINT64_C(0xffffffff00000000)
? uint32_cntlz(uint32_t(_val>>32) )
: uint32_cntlz(uint32_t(_val) ) + 32
;
#endif // BX_COMPILER_*
}

template<>
inline BX_CONSTEXPR_FUNC uint8_t countLeadingZeros(unsigned long _val)
{
return countLeadingZeros<unsigned long long>(_val);
}

template<> inline BX_CONSTEXPR_FUNC uint8_t countLeadingZeros(uint8_t _val) { return countLeadingZeros<uint32_t>(_val)-24; }
template<> inline BX_CONSTEXPR_FUNC uint8_t countLeadingZeros(int8_t _val) { return countLeadingZeros<uint8_t >(_val); }
template<> inline BX_CONSTEXPR_FUNC uint8_t countLeadingZeros(uint16_t _val) { return countLeadingZeros<uint32_t>(_val)-16; }
template<> inline BX_CONSTEXPR_FUNC uint8_t countLeadingZeros(int16_t _val) { return countLeadingZeros<uint16_t>(_val); }
template<> inline BX_CONSTEXPR_FUNC uint8_t countLeadingZeros(int32_t _val) { return countLeadingZeros<uint32_t>(_val); }
template<> inline BX_CONSTEXPR_FUNC uint8_t countLeadingZeros(int64_t _val) { return countLeadingZeros<uint64_t>(_val); }

template<>
inline BX_CONSTEXPR_FUNC uint8_t countTrailingZeros(uint32_t _val)
{
#if BX_COMPILER_GCC || BX_COMPILER_CLANG
return 0 == _val ? 32 : __builtin_ctz(_val);
#else
const uint32_t tmp0 = uint32_not(_val);
const uint32_t tmp1 = uint32_dec(_val);
const uint32_t tmp2 = uint32_and(tmp0, tmp1);
const uint32_t result = uint32_cntbits(tmp2);

return result;
#endif // BX_COMPILER_*
}

template<>
inline BX_CONSTEXPR_FUNC uint8_t countTrailingZeros(unsigned long long _val)
{
return 31 - uint32_cntlz(_a);
#if BX_COMPILER_GCC || BX_COMPILER_CLANG
return 0 == _val ? 64 : __builtin_ctzll(_val);
#else
return _val & UINT64_C(0xffffffff)
? uint32_cnttz(uint32_t(_val) )
: uint32_cnttz(uint32_t(_val>>32) ) + 32
;
#endif // BX_COMPILER_*
}

template<>
inline BX_CONSTEXPR_FUNC uint8_t countTrailingZeros(unsigned long _val)
{
return countTrailingZeros<unsigned long long>(_val);
}

template<> inline BX_CONSTEXPR_FUNC uint8_t countTrailingZeros(uint8_t _val) { return bx::min(8u, countTrailingZeros<uint32_t>(_val) ); }
template<> inline BX_CONSTEXPR_FUNC uint8_t countTrailingZeros(int8_t _val) { return countTrailingZeros<uint8_t >(_val); }
template<> inline BX_CONSTEXPR_FUNC uint8_t countTrailingZeros(uint16_t _val) { return bx::min(16u, countTrailingZeros<uint32_t>(_val) ); }
template<> inline BX_CONSTEXPR_FUNC uint8_t countTrailingZeros(int16_t _val) { return countTrailingZeros<uint16_t>(_val); }
template<> inline BX_CONSTEXPR_FUNC uint8_t countTrailingZeros(int32_t _val) { return countTrailingZeros<uint32_t>(_val); }
template<> inline BX_CONSTEXPR_FUNC uint8_t countTrailingZeros(int64_t _val) { return countTrailingZeros<uint64_t>(_val); }

template<typename Ty>
inline BX_CONSTEXPR_FUNC uint8_t findFirstSet(Ty _x)
{
return Ty(0) == _x ? uint8_t(0) : countTrailingZeros<Ty>(_x) + 1;
}

template<typename Ty>
inline BX_CONSTEXPR_FUNC uint8_t ceilLog2(Ty _a)
{
BX_STATIC_ASSERT(isInteger<Ty>(), "Type Ty must be of integer type!");
return Ty(_a) < Ty(1) ? Ty(0) : sizeof(Ty)*8 - countLeadingZeros<Ty>(_a - 1);
}

template<typename Ty>
inline BX_CONSTEXPR_FUNC Ty nextPow2(Ty _a)
{
const uint8_t log2 = ceilLog2(_a);
BX_ASSERT(log2 < sizeof(Ty)*8
, "Type Ty cannot represent the next power-of-two value (1<<%u is larger than %u-bit type)."
, log2
, sizeof(Ty)*8
);
return Ty(1)<<log2;
}

inline BX_CONST_FUNC float rsqrtRef(float _a)
Expand Down
124 changes: 31 additions & 93 deletions include/bx/inline/uint32_t.inl
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,6 @@ namespace bx
return result;
}

template<>
inline BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(uint32_t _val)
{
#if BX_COMPILER_GCC || BX_COMPILER_CLANG
Expand Down Expand Up @@ -335,35 +334,6 @@ namespace bx
#endif // BX_COMPILER_*
}

template<>
inline BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(unsigned long long _val)
{
#if BX_COMPILER_GCC || BX_COMPILER_CLANG
return __builtin_popcountll(_val);
#else
const uint32_t lo = uint32_t(_val&UINT32_MAX);
const uint32_t hi = uint32_t(_val>>32);

return uint32_cntbits(lo)
+ uint32_cntbits(hi)
;
#endif // BX_COMPILER_*
}

template<>
inline BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(unsigned long _val)
{
return uint32_cntbits<unsigned long long>(_val);
}

template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(uint8_t _val) { return uint32_cntbits<uint32_t>(_val); }
template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(int8_t _val) { return uint32_cntbits<uint8_t >(_val); }
template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(uint16_t _val) { return uint32_cntbits<uint32_t>(_val); }
template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(int16_t _val) { return uint32_cntbits<uint16_t>(_val); }
template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(int32_t _val) { return uint32_cntbits<uint32_t>(_val); }
template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntbits(int64_t _val) { return uint32_cntbits<uint64_t>(_val); }

template<>
inline BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(uint32_t _val)
{
#if BX_COMPILER_GCC || BX_COMPILER_CLANG
Expand All @@ -386,33 +356,6 @@ namespace bx
#endif // BX_COMPILER_*
}

template<>
inline BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(unsigned long long _val)
{
#if BX_COMPILER_GCC || BX_COMPILER_CLANG
return 0 == _val ? 64 : __builtin_clzll(_val);
#else
return _val & UINT64_C(0xffffffff00000000)
? uint32_cntlz(uint32_t(_val>>32) )
: uint32_cntlz(uint32_t(_val) ) + 32
;
#endif // BX_COMPILER_*
}

template<>
inline BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(unsigned long _val)
{
return uint32_cntlz<unsigned long long>(_val);
}

template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(uint8_t _val) { return uint32_cntlz<uint32_t>(_val)-24; }
template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(int8_t _val) { return uint32_cntlz<uint8_t >(_val); }
template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(uint16_t _val) { return uint32_cntlz<uint32_t>(_val)-16; }
template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(int16_t _val) { return uint32_cntlz<uint16_t>(_val); }
template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(int32_t _val) { return uint32_cntlz<uint32_t>(_val); }
template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cntlz(int64_t _val) { return uint32_cntlz<uint64_t>(_val); }

template<>
inline BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(uint32_t _val)
{
#if BX_COMPILER_GCC || BX_COMPILER_CLANG
Expand All @@ -427,39 +370,11 @@ namespace bx
#endif // BX_COMPILER_*
}

template<>
inline BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(unsigned long long _val)
inline BX_CONSTEXPR_FUNC uint32_t uint32_ffs(uint32_t _x)
{
#if BX_COMPILER_GCC || BX_COMPILER_CLANG
return 0 == _val ? 64 : __builtin_ctzll(_val);
#else
return _val & UINT64_C(0xffffffff)
? uint32_cnttz(uint32_t(_val) )
: uint32_cnttz(uint32_t(_val>>32) ) + 32
;
#endif // BX_COMPILER_*
}

template<>
inline BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(unsigned long _val)
{
return uint32_cnttz<unsigned long long>(_val);
}

template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(uint8_t _val) { return bx::min(8u, uint32_cnttz<uint32_t>(_val) ); }
template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(int8_t _val) { return uint32_cnttz<uint8_t >(_val); }
template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(uint16_t _val) { return bx::min(16u, uint32_cnttz<uint32_t>(_val) ); }
template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(int16_t _val) { return uint32_cnttz<uint16_t>(_val); }
template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(int32_t _val) { return uint32_cnttz<uint32_t>(_val); }
template<> inline BX_CONSTEXPR_FUNC uint32_t uint32_cnttz(int64_t _val) { return uint32_cnttz<uint64_t>(_val); }

template<typename Ty>
inline BX_CONSTEXPR_FUNC uint32_t uint32_ffs(Ty _x)
{
return Ty(0) == _x ? uint32_t(0) : uint32_cnttz<Ty>(_x) + 1;
return 0 == _x ? 0 : uint32_cnttz(_x) + 1;
}


inline BX_CONSTEXPR_FUNC uint32_t uint32_part1by1(uint32_t _a)
{
// shuffle:
Expand Down Expand Up @@ -644,19 +559,42 @@ namespace bx
return _a * _b;
}

inline BX_CONSTEXPR_FUNC uint32_t uint64_cntbits(uint64_t _val)
inline BX_CONSTEXPR_FUNC uint64_t uint64_cntbits(uint64_t _val)
{
return uint32_cntbits(_val);
#if BX_COMPILER_GCC || BX_COMPILER_CLANG
return __builtin_popcountll(_val);
#else
const uint32_t lo = uint32_t(_val&UINT32_MAX);
const uint32_t hi = uint32_t(_val>>32);

return uint32_cntbits(lo)
+ uint32_cntbits(hi)
;
#endif // BX_COMPILER_*
}

inline BX_CONSTEXPR_FUNC uint32_t uint64_cntlz(uint64_t _val)
inline BX_CONSTEXPR_FUNC uint64_t uint64_cntlz(uint64_t _val)
{
return uint32_cntlz(_val);
#if BX_COMPILER_GCC || BX_COMPILER_CLANG
return 0 == _val ? 64 : __builtin_clzll(_val);
#else
return _val & UINT64_C(0xffffffff00000000)
? uint32_cntlz(uint32_t(_val>>32) )
: uint32_cntlz(uint32_t(_val) ) + 32
;
#endif // BX_COMPILER_*
}

inline BX_CONSTEXPR_FUNC uint32_t uint64_cnttz(uint64_t _val)
inline BX_CONSTEXPR_FUNC uint64_t uint64_cnttz(uint64_t _val)
{
return uint32_cnttz(_val);
#if BX_COMPILER_GCC || BX_COMPILER_CLANG
return 0 == _val ? 64 : __builtin_ctzll(_val);
#else
return _val & UINT64_C(0xffffffff)
? uint32_cnttz(uint32_t(_val) )
: uint32_cnttz(uint32_t(_val>>32) ) + 32
;
#endif // BX_COMPILER_*
}

inline BX_CONSTEXPR_FUNC uint32_t uint32_gcd(uint32_t _a, uint32_t _b)
Expand Down
31 changes: 30 additions & 1 deletion include/bx/math.h
Original file line number Diff line number Diff line change
Expand Up @@ -270,8 +270,37 @@ namespace bx

/// Returns the base 2 logarithm of _a.
///
BX_CONST_FUNC float log2(float _a);

/// Count number of bits set.
///
template<typename Ty>
BX_CONSTEXPR_FUNC uint8_t countBits(Ty _val);

/// Count number of leading zeros.
///
template<typename Ty>
BX_CONSTEXPR_FUNC uint8_t countLeadingZeros(Ty _val);

/// Count number of trailing zeros.
///
template<typename Ty>
BX_CONSTEXPR_FUNC uint8_t countTrailingZeros(Ty _val);

/// Find first set.
///
template<typename Ty>
BX_CONSTEXPR_FUNC uint8_t findFirstSet(Ty _val);

/// Returns the next smallest integer base 2 logarithm of _a.
///
template<typename Ty>
BX_CONSTEXPR_FUNC uint8_t ceilLog2(Ty _a);

/// Returns the next smallest power of two value.
///
template<typename Ty>
BX_CONST_FUNC Ty log2(Ty _a);
BX_CONSTEXPR_FUNC Ty nextPow2(Ty _a);

/// Returns the square root of _a.
///
Expand Down
Loading

0 comments on commit 08baf8f

Please sign in to comment.