Skip to content

Commit

Permalink
Vectorize Span.Reverse for Arm64 by using Vector128 APIs (dotnet#72780)
Browse files Browse the repository at this point in the history
  • Loading branch information
SwapnilGaikwad authored Aug 10, 2022
1 parent ee5b738 commit f244adb
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 47 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2347,9 +2347,8 @@ public static void Reverse(ref byte buf, nuint length)
buf = ref Unsafe.Add(ref buf, numIters * numElements);
length -= numIters * numElements * 2;
}
else if (Ssse3.IsSupported && (nuint)Vector128<byte>.Count * 2 <= length)
else if (Vector128.IsHardwareAccelerated && (nuint)Vector128<byte>.Count * 2 <= length)
{
Vector128<byte> reverseMask = Vector128.Create((byte)15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
nuint numElements = (nuint)Vector128<byte>.Count;
nuint numIters = (length / numElements) / 2;
for (nuint i = 0; i < numIters; i++)
Expand All @@ -2369,8 +2368,10 @@ public static void Reverse(ref byte buf, nuint length)
// +---------------------------------------------------------------+
// | P | O | N | M | L | K | J | I | H | G | F | E | D | C | B | A |
// +---------------------------------------------------------------+
tempFirst = Ssse3.Shuffle(tempFirst, reverseMask);
tempLast = Ssse3.Shuffle(tempLast, reverseMask);
tempFirst = Vector128.Shuffle(tempFirst, Vector128.Create(
(byte)15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0));
tempLast = Vector128.Shuffle(tempLast, Vector128.Create(
(byte)15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0));

// Store the reversed vectors
tempLast.StoreUnsafe(ref buf, firstOffset);
Expand Down
35 changes: 18 additions & 17 deletions src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1986,10 +1986,10 @@ private static int FindFirstMatchedLane(Vector128<ushort> compareResult)

public static void Reverse(ref char buf, nuint length)
{
ref byte bufByte = ref Unsafe.As<char, byte>(ref buf);
nuint byteLength = length * sizeof(char);
if (Avx2.IsSupported && (nuint)Vector256<short>.Count * 2 <= length)
{
ref byte bufByte = ref Unsafe.As<char, byte>(ref buf);
nuint byteLength = length * sizeof(char);
Vector256<byte> reverseMask = Vector256.Create(
(byte)14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1, // first 128-bit lane
14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1); // second 128-bit lane
Expand Down Expand Up @@ -2028,20 +2028,22 @@ public static void Reverse(ref char buf, nuint length)
}
bufByte = ref Unsafe.Add(ref bufByte, numIters * numElements);
length -= numIters * (nuint)Vector256<short>.Count * 2;
// Store any remaining values one-by-one
buf = ref Unsafe.As<byte, char>(ref bufByte);
}
else if (Ssse3.IsSupported && (nuint)Vector128<short>.Count * 2 <= length)
else if (Vector128.IsHardwareAccelerated && (nuint)Vector128<short>.Count * 2 <= length)
{
Vector128<byte> reverseMask = Vector128.Create((byte)14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1);
nuint numElements = (nuint)Vector128<byte>.Count;
nuint numIters = ((length * sizeof(char)) / numElements) / 2;
ref short bufShort = ref Unsafe.As<char, short>(ref buf);
nuint numElements = (nuint)Vector128<short>.Count;
nuint numIters = (length / numElements) / 2;
for (nuint i = 0; i < numIters; i++)
{
nuint firstOffset = i * numElements;
nuint lastOffset = byteLength - ((1 + i) * numElements);
nuint lastOffset = length - ((1 + i) * numElements);

// Load in values from beginning and end of the array.
Vector128<byte> tempFirst = Vector128.LoadUnsafe(ref bufByte, firstOffset);
Vector128<byte> tempLast = Vector128.LoadUnsafe(ref bufByte, lastOffset);
Vector128<short> tempFirst = Vector128.LoadUnsafe(ref bufShort, firstOffset);
Vector128<short> tempLast = Vector128.LoadUnsafe(ref bufShort, lastOffset);

// Shuffle to reverse each vector:
// +-------------------------------+
Expand All @@ -2051,19 +2053,18 @@ public static void Reverse(ref char buf, nuint length)
// +-------------------------------+
// | H | G | F | E | D | C | B | A |
// +-------------------------------+
tempFirst = Ssse3.Shuffle(tempFirst, reverseMask);
tempLast = Ssse3.Shuffle(tempLast, reverseMask);
tempFirst = Vector128.Shuffle(tempFirst, Vector128.Create(7, 6, 5, 4, 3, 2, 1, 0));
tempLast = Vector128.Shuffle(tempLast, Vector128.Create(7, 6, 5, 4, 3, 2, 1, 0));

// Store the reversed vectors
tempLast.StoreUnsafe(ref bufByte, firstOffset);
tempFirst.StoreUnsafe(ref bufByte, lastOffset);
tempLast.StoreUnsafe(ref bufShort, firstOffset);
tempFirst.StoreUnsafe(ref bufShort, lastOffset);
}
bufByte = ref Unsafe.Add(ref bufByte, numIters * numElements);
bufShort = ref Unsafe.Add(ref bufShort, numIters * numElements);
length -= numIters * (nuint)Vector128<short>.Count * 2;
// Store any remaining values one-by-one
buf = ref Unsafe.As<short, char>(ref bufShort);
}

// Store any remaining values one-by-one
buf = ref Unsafe.As<byte, char>(ref bufByte);
ReverseInner(ref buf, length);
}
}
Expand Down
32 changes: 15 additions & 17 deletions src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.Arm;
using System.Runtime.Intrinsics.X86;

namespace System
Expand Down Expand Up @@ -441,7 +442,7 @@ public static void Reverse(ref int buf, nuint length)
buf = ref Unsafe.Add(ref buf, numIters * numElements);
length -= numIters * numElements * 2;
}
else if (Sse2.IsSupported && (nuint)Vector128<int>.Count * 2 <= length)
else if (Vector128.IsHardwareAccelerated && (nuint)Vector128<int>.Count * 2 <= length)
{
nuint numElements = (nuint)Vector128<int>.Count;
nuint numIters = (length / numElements) / 2;
Expand All @@ -462,8 +463,8 @@ public static void Reverse(ref int buf, nuint length)
// +---------------+
// | D | C | B | A |
// +---------------+
tempFirst = Sse2.Shuffle(tempFirst, 0b00_01_10_11);
tempLast = Sse2.Shuffle(tempLast, 0b00_01_10_11);
tempFirst = Vector128.Shuffle(tempFirst, Vector128.Create(3, 2, 1, 0));
tempLast = Vector128.Shuffle(tempLast, Vector128.Create(3, 2, 1, 0));

// Store the values into final location
tempLast.StoreUnsafe(ref buf, firstOffset);
Expand Down Expand Up @@ -508,19 +509,17 @@ public static void Reverse(ref long buf, nuint length)
buf = ref Unsafe.Add(ref buf, numIters * numElements);
length -= numIters * numElements * 2;
}
else if (Sse2.IsSupported && (nuint)Vector128<long>.Count * 2 <= length)
else if (Vector128.IsHardwareAccelerated && (nuint)Vector128<long>.Count * 2 <= length)
{
ref int bufInt = ref Unsafe.As<long, int>(ref buf);
nuint intLength = length * (sizeof(long) / sizeof(int));
nuint numElements = (nuint)Vector128<int>.Count;
nuint numIters = (intLength / numElements) / 2;
nuint numElements = (nuint)Vector128<long>.Count;
nuint numIters = (length / numElements) / 2;
for (nuint i = 0; i < numIters; i++)
{
nuint firstOffset = i * numElements;
nuint lastOffset = intLength - ((1 + i) * numElements);
nuint lastOffset = length - ((1 + i) * numElements);
// Load the values into vectors
Vector128<int> tempFirst = Vector128.LoadUnsafe(ref bufInt, firstOffset);
Vector128<int> tempLast = Vector128.LoadUnsafe(ref bufInt, lastOffset);
Vector128<long> tempFirst = Vector128.LoadUnsafe(ref buf, firstOffset);
Vector128<long> tempLast = Vector128.LoadUnsafe(ref buf, lastOffset);

// Shuffle to reverse each vector:
// +-------+
Expand All @@ -530,15 +529,14 @@ public static void Reverse(ref long buf, nuint length)
// +-------+
// | B | A |
// +-------+
tempFirst = Sse2.Shuffle(tempFirst, 0b0100_1110);
tempLast = Sse2.Shuffle(tempLast, 0b0100_1110);
tempFirst = Vector128.Shuffle(tempFirst, Vector128.Create(1, 0));
tempLast = Vector128.Shuffle(tempLast, Vector128.Create(1, 0));

// Store the values into final location
tempLast.StoreUnsafe(ref bufInt, firstOffset);
tempFirst.StoreUnsafe(ref bufInt, lastOffset);
tempLast.StoreUnsafe(ref buf, firstOffset);
tempFirst.StoreUnsafe(ref buf, lastOffset);
}
bufInt = ref Unsafe.Add(ref bufInt, numIters * numElements);
buf = ref Unsafe.As<int, long>(ref bufInt);
buf = ref Unsafe.Add(ref buf, numIters * numElements);
length -= numIters * (nuint)Vector128<long>.Count * 2;
}

Expand Down
Loading

0 comments on commit f244adb

Please sign in to comment.