Skip to content

Commit

Permalink
Consolidate implementation of Rotate and PopCount (dotnet/coreclr#22584)
Browse files Browse the repository at this point in the history
* Perf: BitOps.LeadingZeroCount

* Remove redundant MSIL cast, conv.u8

* Use local functions for SoftwareFallback

* Target BIT32/64

Commit migrated from dotnet/coreclr@b4f99f2
  • Loading branch information
grant-d authored and jkotas committed Feb 14, 2019
1 parent fd64168 commit 441fb94
Show file tree
Hide file tree
Showing 8 changed files with 155 additions and 72 deletions.
152 changes: 136 additions & 16 deletions src/libraries/System.Private.CoreLib/src/System/BitOps.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@

namespace System
{
/// <summary>
/// Utility methods for intrinsic bit-twiddling operations.
/// The methods use hardware intrinsics when available on the underlying platform,
/// otherwise they use optimized software fallbacks.
/// </summary>
internal static class BitOps
{
// C# no-alloc optimization that directly wraps the data section of the dll (similar to string constants)
Expand Down Expand Up @@ -53,11 +58,11 @@ public static int TrailingZeroCount(uint value)
{
if (Bmi1.IsSupported)
{
// Note that TZCNT contract specifies 0->32
// TZCNT contract is 0->32
return (int)Bmi1.TrailingZeroCount(value);
}

// Software fallback has behavior 0->0, so special-case to match intrinsic path 0->32
// Unguarded fallback contract is 0->0
if (value == 0)
{
return 32;
Expand All @@ -67,8 +72,8 @@ public static int TrailingZeroCount(uint value)
return Unsafe.AddByteOffset(
// Using deBruijn sequence, k=2, n=5 (2^5=32) : 0b_0000_0111_0111_1100_1011_0101_0011_0001u
ref MemoryMarshal.GetReference(s_TrailingZeroCountDeBruijn),
// long -> IntPtr cast on 32-bit platforms is expensive - it does overflow checks not needed here
(IntPtr)(int)(((uint)((value & -value) * 0x077CB531u)) >> 27)); // shift over long also expensive on 32-bit
// uint|long -> IntPtr cast on 32-bit platforms does expensive overflow checks not needed here
(IntPtr)(int)(((value & (uint)-(int)value) * 0x077CB531u) >> 27)); // Multi-cast mitigates redundant conv.u8
}

/// <summary>
Expand All @@ -90,7 +95,7 @@ public static int TrailingZeroCount(ulong value)
{
if (Bmi1.X64.IsSupported)
{
// Note that TZCNT contract specifies 0->64
// TZCNT contract is 0->64
return (int)Bmi1.X64.TrailingZeroCount(value);
}

Expand All @@ -114,17 +119,17 @@ public static int LeadingZeroCount(uint value)
{
if (Lzcnt.IsSupported)
{
// Note that LZCNT contract specifies 0->32
// LZCNT contract is 0->32
return (int)Lzcnt.LeadingZeroCount(value);
}

// Software fallback has behavior 0->0, so special-case to match intrinsic path 0->32
// Unguarded fallback contract is 0->31
if (value == 0)
{
return 32;
}

return 31 - Log2(value);
return 31 - Log2SoftwareFallback(value);
}

/// <summary>
Expand All @@ -137,7 +142,7 @@ public static int LeadingZeroCount(ulong value)
{
if (Lzcnt.X64.IsSupported)
{
// Note that LZCNT contract specifies 0->64
// LZCNT contract is 0->64
return (int)Lzcnt.X64.LeadingZeroCount(value);
}

Expand Down Expand Up @@ -168,30 +173,32 @@ public static int Log2(uint value)
// 1000.. 0 31-0 31
if (Lzcnt.IsSupported)
{
// Enforce conventional contract 0->0 (since Log(0) is undefined)
// Enforce conventional contract 0->0 (Log(0) is undefined)
if (value == 0)
{
return 0;
}

// Note that LZCNT contract specifies 0->32
// LZCNT contract is 0->32
return 31 - (int)Lzcnt.LeadingZeroCount(value);
}

// Already has contract 0->0, without branching
// Fallback contract is 0->0
return Log2SoftwareFallback(value);
}

/// <summary>
/// Returns the integer (floor) log of the specified value, base 2.
/// Note that by convention, input value 0 returns 0 since Log(0) is undefined.
/// Does not incur branching.
/// Does not directly use any hardware intrinsics, nor does it incur branching.
/// </summary>
/// <param name="value">The value.</param>
private static int Log2SoftwareFallback(uint value)
{
// No AggressiveInlining due to large method size
// Has conventional contract 0->0 (Log(0) is undefined)

// Fill trailing zeros with ones, eg 00010010 becomes 00011111
value |= value >> 01;
value |= value >> 02;
value |= value >> 04;
Expand All @@ -202,7 +209,7 @@ private static int Log2SoftwareFallback(uint value)
return Unsafe.AddByteOffset(
// Using deBruijn sequence, k=2, n=5 (2^5=32) : 0b_0000_0111_1100_0100_1010_1100_1101_1101u
ref MemoryMarshal.GetReference(s_Log2DeBruijn),
// long -> IntPtr cast on 32-bit platforms is expensive - it does overflow checks not needed here
// uint|long -> IntPtr cast on 32-bit platforms does expensive overflow checks not needed here
(IntPtr)(int)((value * 0x07C4ACDDu) >> 27));
}

Expand All @@ -216,13 +223,13 @@ public static int Log2(ulong value)
{
if (Lzcnt.X64.IsSupported)
{
// Enforce conventional contract 0->0 (since Log(0) is undefined)
// Enforce conventional contract 0->0 (Log(0) is undefined)
if (value == 0)
{
return 0;
}

// Note that LZCNT contract specifies 0->64
// LZCNT contract is 0->64
return 63 - (int)Lzcnt.X64.LeadingZeroCount(value);
}

Expand All @@ -235,5 +242,118 @@ public static int Log2(ulong value)

return 32 + Log2(hi);
}

/// <summary>
/// Rotates the specified value left by the specified number of bits.
/// Similar in behavior to the x86 instruction ROL.
/// </summary>
/// <param name="value">The value to rotate.</param>
/// <param name="offset">The number of bits to rotate by.
/// Any value outside the range [0..31] is treated as congruent mod 32.</param>
/// <returns>The rotated value.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static uint RotateLeft(uint value, int offset)
=> (value << offset) | (value >> (32 - offset));

/// <summary>
/// Rotates the specified value left by the specified number of bits.
/// Similar in behavior to the x86 instruction ROL.
/// </summary>
/// <param name="value">The value to rotate.</param>
/// <param name="offset">The number of bits to rotate by.
/// Any value outside the range [0..63] is treated as congruent mod 64.</param>
/// <returns>The rotated value.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ulong RotateLeft(ulong value, int offset)
=> (value << offset) | (value >> (64 - offset));

/// <summary>
/// Rotates the specified value right by the specified number of bits.
/// Similar in behavior to the x86 instruction ROR.
/// </summary>
/// <param name="value">The value to rotate.</param>
/// <param name="offset">The number of bits to rotate by.
/// Any value outside the range [0..31] is treated as congruent mod 32.</param>
/// <returns>The rotated value.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static uint RotateRight(uint value, int offset)
=> (value >> offset) | (value << (32 - offset));

/// <summary>
/// Rotates the specified value right by the specified number of bits.
/// Similar in behavior to the x86 instruction ROR.
/// </summary>
/// <param name="value">The value to rotate.</param>
/// <param name="offset">The number of bits to rotate by.
/// Any value outside the range [0..63] is treated as congruent mod 64.</param>
/// <returns>The rotated value.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ulong RotateRight(ulong value, int offset)
=> (value >> offset) | (value << (64 - offset));

/// <summary>
/// Returns the population count (number of bits set) of a mask.
/// Similar in behavior to the x86 instruction POPCNT.
/// </summary>
/// <param name="value">The value.</param>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int PopCount(uint value)
{
if (Popcnt.IsSupported)
{
return (int)Popcnt.PopCount(value);
}

return SoftwareFallback(value);

int SoftwareFallback(uint v)
{
const uint c1 = 0x_55555555u;
const uint c2 = 0x_33333333u;
const uint c3 = 0x_0F0F0F0Fu;
const uint c4 = 0x_01010101u;

v = v - ((v >> 1) & c1);
v = (v & c2) + ((v >> 2) & c2);
v = (((v + (v >> 4)) & c3) * c4) >> 24;

return (int)v;
}
}

/// <summary>
/// Returns the population count (number of bits set) of a mask.
/// Similar in behavior to the x86 instruction POPCNT.
/// </summary>
/// <param name="value">The value.</param>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int PopCount(ulong value)
{
if (Popcnt.X64.IsSupported)
{
return (int)Popcnt.X64.PopCount(value);
}

#if BIT32
return PopCount((uint)value) // lo
+ PopCount((uint)(value >> 32)); // hi
#else
return SoftwareFallback(value);

int SoftwareFallback(ulong v)
{
const ulong c1 = 0x_55555555_55555555ul;
const ulong c2 = 0x_33333333_33333333ul;
const ulong c3 = 0x_0F0F0F0F_0F0F0F0Ful;
const ulong c4 = 0x_01010101_01010101ul;

v = v - ((v >> 1) & c1);
v = (v & c2) + ((v >> 2) & c2);
v = (((v + (v >> 4)) & c3) * c4) >> 56;

return (int)v;
}
#endif
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -105,10 +105,8 @@ public static uint ReverseEndianness(uint value)
// Testing shows that throughput increases if the AND
// is performed before the ROL / ROR.

uint mask_xx_zz = (value & 0x00FF00FFU);
uint mask_ww_yy = (value & 0xFF00FF00U);
return ((mask_xx_zz >> 8) | (mask_xx_zz << 24))
+ ((mask_ww_yy << 8) | (mask_ww_yy >> 24));
return BitOps.RotateRight(value & 0x00FF00FFu, 8) // xx zz
+ BitOps.RotateLeft(value & 0xFF00FF00u, 8); // ww yy
}

/// <summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,11 +103,9 @@ public static int CountDigits(uint value)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int CountHexDigits(ulong value)
{
int right = 64 - BitOps.LeadingZeroCount(value | 1);
return (right + 3) >> 2;
return (64 - BitOps.LeadingZeroCount(value | 1) + 3) >> 2;
}


// Counts the number of trailing '0' digits in a decimal number.
// e.g., value = 0 => retVal = 0, valueWithoutTrailingZeros = 0
// value = 1234 => retVal = 0, valueWithoutTrailingZeros = 1234
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1579,7 +1579,7 @@ private void Drain()
{
for (int i = 16; i != 80; i++)
{
this.w[i] = Rol1((this.w[i - 3] ^ this.w[i - 8] ^ this.w[i - 14] ^ this.w[i - 16]));
this.w[i] = BitOps.RotateLeft((this.w[i - 3] ^ this.w[i - 8] ^ this.w[i - 14] ^ this.w[i - 16]), 1);
}

unchecked
Expand All @@ -1594,28 +1594,28 @@ private void Drain()
{
const uint k = 0x5A827999;
uint f = (b & c) | ((~b) & d);
uint temp = Rol5(a) + f + e + k + this.w[i]; e = d; d = c; c = Rol30(b); b = a; a = temp;
uint temp = BitOps.RotateLeft(a, 5) + f + e + k + this.w[i]; e = d; d = c; c = BitOps.RotateLeft(b, 30); b = a; a = temp;
}

for (int i = 20; i != 40; i++)
{
uint f = b ^ c ^ d;
const uint k = 0x6ED9EBA1;
uint temp = Rol5(a) + f + e + k + this.w[i]; e = d; d = c; c = Rol30(b); b = a; a = temp;
uint temp = BitOps.RotateLeft(a, 5) + f + e + k + this.w[i]; e = d; d = c; c = BitOps.RotateLeft(b, 30); b = a; a = temp;
}

for (int i = 40; i != 60; i++)
{
uint f = (b & c) | (b & d) | (c & d);
const uint k = 0x8F1BBCDC;
uint temp = Rol5(a) + f + e + k + this.w[i]; e = d; d = c; c = Rol30(b); b = a; a = temp;
uint temp = BitOps.RotateLeft(a, 5) + f + e + k + this.w[i]; e = d; d = c; c = BitOps.RotateLeft(b, 30); b = a; a = temp;
}

for (int i = 60; i != 80; i++)
{
uint f = b ^ c ^ d;
const uint k = 0xCA62C1D6;
uint temp = Rol5(a) + f + e + k + this.w[i]; e = d; d = c; c = Rol30(b); b = a; a = temp;
uint temp = BitOps.RotateLeft(a, 5) + f + e + k + this.w[i]; e = d; d = c; c = BitOps.RotateLeft(b, 30); b = a; a = temp;
}

this.w[80] += a;
Expand All @@ -1628,21 +1628,6 @@ private void Drain()
this.length += 512; // 64 bytes == 512 bits
this.pos = 0;
}

private static uint Rol1(uint input)
{
return (input << 1) | (input >> 31);
}

private static uint Rol5(uint input)
{
return (input << 5) | (input >> 27);
}

private static uint Rol30(uint input)
{
return (input << 30) | (input >> 2);
}
}

private static Guid GenerateGuidFromName(string name)
Expand Down
14 changes: 3 additions & 11 deletions src/libraries/System.Private.CoreLib/src/System/HashCode.cs
Original file line number Diff line number Diff line change
Expand Up @@ -252,10 +252,6 @@ public static int Combine<T1, T2, T3, T4, T5, T6, T7, T8>(T1 value1, T2 value2,
return (int)hash;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static uint Rol(uint value, int count)
=> (value << count) | (value >> (32 - count));

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void Initialize(out uint v1, out uint v2, out uint v3, out uint v4)
{
Expand All @@ -268,23 +264,19 @@ private static void Initialize(out uint v1, out uint v2, out uint v3, out uint v
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static uint Round(uint hash, uint input)
{
hash += input * Prime2;
hash = Rol(hash, 13);
hash *= Prime1;
return hash;
return BitOps.RotateLeft(hash + input * Prime2, 13) * Prime1;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static uint QueueRound(uint hash, uint queuedValue)
{
hash += queuedValue * Prime3;
return Rol(hash, 17) * Prime4;
return BitOps.RotateLeft(hash + queuedValue * Prime3, 17) * Prime4;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static uint MixState(uint v1, uint v2, uint v3, uint v4)
{
return Rol(v1, 1) + Rol(v2, 7) + Rol(v3, 12) + Rol(v4, 18);
return BitOps.RotateLeft(v1, 1) + BitOps.RotateLeft(v2, 7) + BitOps.RotateLeft(v3, 12) + BitOps.RotateLeft(v4, 18);
}

private static uint MixEmptyState()
Expand Down
Loading

0 comments on commit 441fb94

Please sign in to comment.