diff --git a/src/libraries/Common/src/System/HexConverter.cs b/src/libraries/Common/src/System/HexConverter.cs index 7bae713b87cd2..84005ac9398f0 100644 --- a/src/libraries/Common/src/System/HexConverter.cs +++ b/src/libraries/Common/src/System/HexConverter.cs @@ -6,6 +6,7 @@ #if SYSTEM_PRIVATE_CORELIB using System.Runtime.InteropServices; using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; using System.Runtime.Intrinsics.X86; #endif @@ -89,11 +90,8 @@ public static void ToCharsBuffer(byte value, Span buffer, int startingInde } #if SYSTEM_PRIVATE_CORELIB - private static void EncodeToUtf16_Ssse3(ReadOnlySpan bytes, Span chars, Casing casing) + private static void EncodeToUtf16_Vector128(ReadOnlySpan bytes, Span chars, Casing casing) { - Debug.Assert(bytes.Length >= 4); - nint pos = 0; - Vector128 shuffleMask = Vector128.Create( 0xFF, 0xFF, 0, 0xFF, 0xFF, 0xFF, 1, 0xFF, 0xFF, 0xFF, 2, 0xFF, 0xFF, 0xFF, 3, 0xFF); @@ -108,40 +106,70 @@ private static void EncodeToUtf16_Ssse3(ReadOnlySpan bytes, Span cha (byte)'8', (byte)'9', (byte)'a', (byte)'b', (byte)'c', (byte)'d', (byte)'e', (byte)'f'); + nuint pos = 0; + Debug.Assert(bytes.Length >= 4); + + // it's used to ensure we can process the trailing elements in the same SIMD loop (with possible overlap) + // but we won't double compute for any evenly divisible by 4 length since we + // compare pos > lengthSubVector128 rather than pos >= lengthSubVector128 + nuint lengthSubVector128 = (nuint)bytes.Length - (nuint)Vector128.Count; + ref byte destRef = ref Unsafe.As(ref MemoryMarshal.GetReference(chars)); do { // Read 32bits from "bytes" span at "pos" offset uint block = Unsafe.ReadUnaligned( ref Unsafe.Add(ref MemoryMarshal.GetReference(bytes), pos)); + // TODO: Remove once cross-platform Shuffle is landed + // https://github.com/dotnet/runtime/issues/63331 + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector128 Shuffle(Vector128 value, Vector128 mask) + { + if (Ssse3.IsSupported) + { + return Ssse3.Shuffle(value, mask); + } + else if (!AdvSimd.Arm64.IsSupported) + { + ThrowHelper.ThrowNotSupportedException(); + } + return AdvSimd.Arm64.VectorTableLookup(value, mask); + } + // Calculate nibbles - Vector128 lowNibbles = Ssse3.Shuffle( + Vector128 lowNibbles = Shuffle( Vector128.CreateScalarUnsafe(block).AsByte(), shuffleMask); - Vector128 highNibbles = Sse2.ShiftRightLogical( - Sse2.ShiftRightLogical128BitLane(lowNibbles, 2).AsInt32(), 4).AsByte(); + + // ExtractVector128 is not entirely the same as ShiftRightLogical128BitLane, but it works here since + // first two bytes in lowNibbles are guaranteed to be zeros + Vector128 shifted = Sse2.IsSupported ? + Sse2.ShiftRightLogical128BitLane(lowNibbles, 2) : + AdvSimd.ExtractVector128(lowNibbles, lowNibbles, 2); + + Vector128 highNibbles = Vector128.ShiftRightLogical(shifted.AsInt32(), 4).AsByte(); // Lookup the hex values at the positions of the indices - Vector128 indices = Sse2.And( - Sse2.Or(lowNibbles, highNibbles), Vector128.Create((byte)0xF)); - Vector128 hex = Ssse3.Shuffle(asciiTable, indices); + Vector128 indices = (lowNibbles | highNibbles) & Vector128.Create((byte)0xF); + Vector128 hex = Shuffle(asciiTable, indices); // The high bytes (0x00) of the chars have also been converted // to ascii hex '0', so clear them out. - hex = Sse2.And(hex, Vector128.Create((ushort)0xFF).AsByte()); + hex &= Vector128.Create((ushort)0xFF).AsByte(); + hex.StoreUnsafe(ref destRef, pos * 4); // we encode 4 bytes as a single char (0x0-0xF) + pos += (nuint)Vector128.Count; - // Save to "chars" at pos*2 offset - Unsafe.WriteUnaligned( - ref Unsafe.As( - ref Unsafe.Add(ref MemoryMarshal.GetReference(chars), pos * 2)), hex); + if (pos == (nuint)bytes.Length) + { + return; + } - pos += 4; - } while (pos < bytes.Length - 3); + // Overlap with the current chunk for trailing elements + if (pos > lengthSubVector128) + { + pos = lengthSubVector128; + } - // Process trailing elements (bytes.Length % 4) - for (; pos < bytes.Length; pos++) - { - ToCharsBuffer(Unsafe.Add(ref MemoryMarshal.GetReference(bytes), pos), chars, (int)pos * 2, casing); - } + } while (true); } #endif @@ -150,9 +178,9 @@ public static void EncodeToUtf16(ReadOnlySpan bytes, Span chars, Cas Debug.Assert(chars.Length >= bytes.Length * 2); #if SYSTEM_PRIVATE_CORELIB - if (Ssse3.IsSupported && bytes.Length >= 4) + if ((AdvSimd.Arm64.IsSupported || Ssse3.IsSupported) && bytes.Length >= 4) { - EncodeToUtf16_Ssse3(bytes, chars, casing); + EncodeToUtf16_Vector128(bytes, chars, casing); return; } #endif