Skip to content

Commit

Permalink
Optimize BitOperations.PopCount() with arm64 intrinsics (dotnet#35636)
Browse files Browse the repository at this point in the history
* Intrinsicy BitOperations.PopCount() for arm64

Co-authored-by: Tanner Gooding <[email protected]>
  • Loading branch information
kunalspathak and tannergooding authored May 8, 2020
1 parent 2973724 commit 83712df
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.Arm;
using System.Runtime.Intrinsics.X86;

Expand Down Expand Up @@ -243,6 +244,19 @@ public static int PopCount(uint value)
return (int)Popcnt.PopCount(value);
}

if (AdvSimd.Arm64.IsSupported)
{
// PopCount works on vector so convert input value to vector first.

// Vector64.CreateScalar(uint) generates suboptimal code by storing and
// loading the result to memory.
// See https://github.com/dotnet/runtime/issues/35976 for details.
// Hence use Vector64.Create(ulong) to create Vector64<ulong> and operate on that.
Vector64<ulong> input = Vector64.Create((ulong)value);
Vector64<byte> aggregated = AdvSimd.Arm64.AddAcross(AdvSimd.PopCount(input.AsByte()));
return AdvSimd.Extract(aggregated, 0);
}

return SoftwareFallback(value);

static int SoftwareFallback(uint value)
Expand Down Expand Up @@ -274,6 +288,14 @@ public static int PopCount(ulong value)
return (int)Popcnt.X64.PopCount(value);
}

if (AdvSimd.Arm64.IsSupported)
{
// PopCount works on vector so convert input value to vector first.
Vector64<ulong> input = Vector64.Create(value);
Vector64<byte> aggregated = AdvSimd.Arm64.AddAcross(AdvSimd.PopCount(input.AsByte()));
return AdvSimd.Extract(aggregated, 0);
}

#if TARGET_32BIT
return PopCount((uint)value) // lo
+ PopCount((uint)(value >> 32)); // hi
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,16 @@

namespace System.Runtime.Intrinsics
{
internal static class Vector64
{
public static Vector64<ulong> Create(ulong value) => throw new PlatformNotSupportedException();
public static Vector64<byte> AsByte<T>(this Vector64<T> vector) where T : struct => throw new PlatformNotSupportedException();
}
internal readonly struct Vector64<T>
where T : struct
{
}

internal static class Vector128
{
public static Vector128<short> Create(short value) => throw new PlatformNotSupportedException();
Expand Down Expand Up @@ -130,4 +140,14 @@ public abstract class Arm64
public static int LeadingZeroCount(uint value) => throw new PlatformNotSupportedException();
public static uint ReverseElementBits(uint value) => throw new PlatformNotSupportedException();
}

internal abstract class AdvSimd : ArmBase
{
public new abstract class Arm64 : ArmBase.Arm64
{
public static Vector64<byte> AddAcross(Vector64<byte> value) => throw new PlatformNotSupportedException();
}
public static byte Extract(Vector64<byte> vector, byte index) => throw new PlatformNotSupportedException();
public static Vector64<byte> PopCount(Vector64<byte> value) => throw new PlatformNotSupportedException();
}
}

0 comments on commit 83712df

Please sign in to comment.