Skip to content

Commit

Permalink
Improve Ascii.FromUtf16 and FromHexString (dotnet#102735)
Browse files Browse the repository at this point in the history
* Use cheaper Vector.Narrow altenatives in Ascii

* Use existing Ascii helper in HexConverter
  • Loading branch information
MihaZupan authored May 29, 2024
1 parent 0fe2e9a commit 5c74f63
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 10 deletions.
3 changes: 2 additions & 1 deletion src/libraries/Common/src/System/HexConverter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.Arm;
using System.Runtime.Intrinsics.X86;
using System.Text;
using System.Text.Unicode;
#endif

Expand Down Expand Up @@ -261,7 +262,7 @@ public static bool TryDecodeFromUtf16_Vector128(ReadOnlySpan<char> chars, Span<b
// single UTF8 ASCII vector - the implementation can be shared with UTF8 paths.
Vector128<ushort> vec1 = Vector128.LoadUnsafe(ref srcRef, offset);
Vector128<ushort> vec2 = Vector128.LoadUnsafe(ref srcRef, offset + (nuint)Vector128<ushort>.Count);
Vector128<byte> vec = Vector128.Narrow(vec1, vec2);
Vector128<byte> vec = Ascii.ExtractAsciiVector(vec1, vec2);

// Based on "Algorithm #3" https://github.com/WojciechMula/toys/blob/master/simd-parse-hex/geoff_algorithm.cpp
// by Geoff Langdale and Wojciech Mula
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.Arm;
using System.Runtime.Intrinsics.Wasm;
using System.Runtime.Intrinsics.X86;

namespace System.Text
Expand Down Expand Up @@ -1651,7 +1652,7 @@ private static bool AllCharsInVectorAreAscii<T>(Vector512<T> vector)
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector128<byte> ExtractAsciiVector(Vector128<ushort> vectorFirst, Vector128<ushort> vectorSecond)
internal static Vector128<byte> ExtractAsciiVector(Vector128<ushort> vectorFirst, Vector128<ushort> vectorSecond)
{
// Narrows two vectors of words [ w7 w6 w5 w4 w3 w2 w1 w0 ] and [ w7' w6' w5' w4' w3' w2' w1' w0' ]
// to a vector of bytes [ b7 ... b0 b7' ... b0'].
Expand All @@ -1665,12 +1666,32 @@ private static Vector128<byte> ExtractAsciiVector(Vector128<ushort> vectorFirst,
{
return AdvSimd.Arm64.UnzipEven(vectorFirst.AsByte(), vectorSecond.AsByte());
}
else if (PackedSimd.IsSupported)
{
return PackedSimd.ConvertNarrowingSaturateUnsigned(vectorFirst.AsInt16(), vectorSecond.AsInt16());
}
else
{
return Vector128.Narrow(vectorFirst, vectorSecond);
}
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector256<byte> ExtractAsciiVector(Vector256<ushort> vectorFirst, Vector256<ushort> vectorSecond)
{
return Avx2.IsSupported
? PackedSpanHelpers.FixUpPackedVector256Result(Avx2.PackUnsignedSaturate(vectorFirst.AsInt16(), vectorSecond.AsInt16()))
: Vector256.Narrow(vectorFirst, vectorSecond);
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector512<byte> ExtractAsciiVector(Vector512<ushort> vectorFirst, Vector512<ushort> vectorSecond)
{
return Avx512BW.IsSupported
? PackedSpanHelpers.FixUpPackedVector512Result(Avx512BW.PackUnsignedSaturate(vectorFirst.AsInt16(), vectorSecond.AsInt16()))
: Vector512.Narrow(vectorFirst, vectorSecond);
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static unsafe nuint NarrowUtf16ToAscii_Intrinsified(char* pUtf16Buffer, byte* pAsciiBuffer, nuint elementCount)
{
Expand Down Expand Up @@ -1821,7 +1842,7 @@ private static unsafe nuint NarrowUtf16ToAscii_Intrinsified_256(char* pUtf16Buff
// Turn the 16 ASCII chars we just read into 16 ASCII bytes, then copy it to the destination.

ref byte asciiBuffer = ref *pAsciiBuffer;
Vector256<byte> asciiVector = Vector256.Narrow(utf16VectorFirst, utf16VectorFirst);
Vector256<byte> asciiVector = ExtractAsciiVector(utf16VectorFirst, utf16VectorFirst);
asciiVector.GetLower().StoreUnsafe(ref asciiBuffer, 0);
nuint currentOffsetInElements = Vector256.Size / 2; // we processed 16 elements so far

Expand All @@ -1847,7 +1868,7 @@ private static unsafe nuint NarrowUtf16ToAscii_Intrinsified_256(char* pUtf16Buff
}

// Turn the 16 ASCII chars we just read into 16 ASCII bytes, then copy it to the destination.
asciiVector = Vector256.Narrow(utf16VectorFirst, utf16VectorFirst);
asciiVector = ExtractAsciiVector(utf16VectorFirst, utf16VectorFirst);
asciiVector.GetLower().StoreUnsafe(ref asciiBuffer, currentOffsetInElements);
}

Expand Down Expand Up @@ -1877,7 +1898,7 @@ private static unsafe nuint NarrowUtf16ToAscii_Intrinsified_256(char* pUtf16Buff
// Build up the ASCII vector and perform the store.

Debug.Assert(((nuint)pAsciiBuffer + currentOffsetInElements) % Vector256.Size == 0, "Write should be aligned.");
asciiVector = Vector256.Narrow(utf16VectorFirst, utf16VectorSecond);
asciiVector = ExtractAsciiVector(utf16VectorFirst, utf16VectorSecond);
asciiVector.StoreUnsafe(ref asciiBuffer, currentOffsetInElements);

currentOffsetInElements += Vector256.Size;
Expand All @@ -1900,7 +1921,7 @@ private static unsafe nuint NarrowUtf16ToAscii_Intrinsified_256(char* pUtf16Buff
// First part was all ASCII, narrow and aligned write. Note we're only filling in the low half of the vector.

Debug.Assert(((nuint)pAsciiBuffer + currentOffsetInElements) % Vector128.Size == 0, "Destination should be 128-bit-aligned.");
asciiVector = Vector256.Narrow(utf16VectorFirst, utf16VectorFirst);
asciiVector = ExtractAsciiVector(utf16VectorFirst, utf16VectorFirst);
asciiVector.GetLower().StoreUnsafe(ref asciiBuffer, currentOffsetInElements);
currentOffsetInElements += Vector256.Size / 2;

Expand Down Expand Up @@ -1938,7 +1959,7 @@ private static unsafe nuint NarrowUtf16ToAscii_Intrinsified_512(char* pUtf16Buff
// Turn the 32 ASCII chars we just read into 32 ASCII bytes, then copy it to the destination.

ref byte asciiBuffer = ref *pAsciiBuffer;
Vector512<byte> asciiVector = Vector512.Narrow(utf16VectorFirst, utf16VectorFirst);
Vector512<byte> asciiVector = ExtractAsciiVector(utf16VectorFirst, utf16VectorFirst);
asciiVector.GetLower().StoreUnsafe(ref asciiBuffer, 0); // how to store the lower part of a avx512
nuint currentOffsetInElements = Vector512.Size / 2; // we processed 32 elements so far

Expand All @@ -1965,7 +1986,7 @@ private static unsafe nuint NarrowUtf16ToAscii_Intrinsified_512(char* pUtf16Buff
}

// Turn the 32 ASCII chars we just read into 32 ASCII bytes, then copy it to the destination.
asciiVector = Vector512.Narrow(utf16VectorFirst, utf16VectorFirst);
asciiVector = ExtractAsciiVector(utf16VectorFirst, utf16VectorFirst);
asciiVector.GetLower().StoreUnsafe(ref asciiBuffer, currentOffsetInElements);
}

Expand Down Expand Up @@ -1995,7 +2016,7 @@ private static unsafe nuint NarrowUtf16ToAscii_Intrinsified_512(char* pUtf16Buff
// Build up the ASCII vector and perform the store.

Debug.Assert(((nuint)pAsciiBuffer + currentOffsetInElements) % Vector512.Size == 0, "Write should be aligned.");
asciiVector = Vector512.Narrow(utf16VectorFirst, utf16VectorSecond);
asciiVector = ExtractAsciiVector(utf16VectorFirst, utf16VectorSecond);
asciiVector.StoreUnsafe(ref asciiBuffer, currentOffsetInElements);

currentOffsetInElements += Vector512.Size;
Expand All @@ -2018,7 +2039,7 @@ private static unsafe nuint NarrowUtf16ToAscii_Intrinsified_512(char* pUtf16Buff
// First part was all ASCII, narrow and aligned write. Note we're only filling in the low half of the vector.

Debug.Assert(((nuint)pAsciiBuffer + currentOffsetInElements) % Vector256.Size == 0, "Destination should be 256-bit-aligned.");
asciiVector = Vector512.Narrow(utf16VectorFirst, utf16VectorFirst);
asciiVector = ExtractAsciiVector(utf16VectorFirst, utf16VectorFirst);
asciiVector.GetLower().StoreUnsafe(ref asciiBuffer, currentOffsetInElements);
currentOffsetInElements += Vector512.Size / 2;

Expand Down

0 comments on commit 5c74f63

Please sign in to comment.