Skip to content

Commit

Permalink
Remove SSE2-specific logic from UTF-16 validation code (dotnet#52349)
Browse files Browse the repository at this point in the history
If a device does not support SSE41 but does support SIMD hardware acceleration, this method will fall back down generalized Vector-enabled code paths.
  • Loading branch information
GrabYourPitchforks authored May 6, 2021
1 parent 54ce2b9 commit 89df0ef
Showing 2 changed files with 9 additions and 19 deletions.
Original file line number Diff line number Diff line change
@@ -66,7 +66,7 @@ internal static unsafe partial class Utf16Utility
// ARM64-intrinsicified code paths. ARM64 platforms may still use the vectorized
// non-intrinsicified 'else' block below.

if (/* (AdvSimd.Arm64.IsSupported && BitConverter.IsLittleEndian) || */ Sse2.IsSupported)
if (/* (AdvSimd.Arm64.IsSupported && BitConverter.IsLittleEndian) || */ Sse41.IsSupported)
{
if (inputLength >= Vector128<ushort>.Count)
{
@@ -96,25 +96,16 @@ internal static unsafe partial class Utf16Utility
pInputBuffer += Vector128<ushort>.Count; // eagerly bump this now in preparation for next loop, will adjust later if necessary
Vector128<ushort> charIsNonAscii;

// Sets the 0x0080 bit of each element in 'charIsNonAscii' if the corresponding
// input was 0x0080 <= [value]. (i.e., [value] is non-ASCII.)

if (AdvSimd.Arm64.IsSupported)
{
// Sets the 0x0080 bit of each element in 'charIsNonAscii' if the corresponding
// input was 0x0080 <= [value]. (i.e., [value] is non-ASCII.)
charIsNonAscii = AdvSimd.Min(utf16Data, vector0080);
}
else if (Sse41.IsSupported)
{
// Sets the 0x0080 bit of each element in 'charIsNonAscii' if the corresponding
// input was 0x0080 <= [value]. (i.e., [value] is non-ASCII.)
charIsNonAscii = Sse41.Min(utf16Data, vector0080);
}
else
{
// Sets the 0x0080 bit of each element in 'charIsNonAscii' if the corresponding
// input was 0x0080 <= [value] <= 0x7FFF. The case where 0x8000 <= [value] will
// be handled in a few lines.

charIsNonAscii = Sse2.AndNot(Sse2.CompareGreaterThan(vector0080.AsInt16(), utf16Data.AsInt16()).AsUInt16(), vector0080);
charIsNonAscii = Sse41.Min(utf16Data, vector0080);
}

#if DEBUG
@@ -139,9 +130,8 @@ internal static unsafe partial class Utf16Utility

// Since 3-byte elements have a value >= 0x0800, we'll perform a saturating add of 0x7800 in order to
// get all 3-byte elements to have their 0x8000 bits set. A saturating add will not set the 0x8000
// bit for 1-byte or 2-byte elements. 2-byte and 3-byte elements MAY have their 0x0080 bits set,
// but this is not required, and the 0x0080 bits (see 'charIsNonAscii' above) will be explicitly
// set for such elements anyway by the immediately following OR instruction.
// bit for 1-byte or 2-byte elements. The 0x0080 bit will already have been set for non-ASCII (2-byte
// and 3-byte) elements.

if (AdvSimd.IsSupported)
{
Original file line number Diff line number Diff line change
@@ -165,11 +165,11 @@ private static void GetIndexOfFirstInvalidUtf16Sequence_Test_Core(string unproce
processedInput[0] = '\u0080'; // 2-byte UTF-8 sequence
processedInput[1] = '\u0800'; // 3-byte UTF-8 sequence
processedInput[2] = '\u0080'; // 2-byte UTF-8 sequence
processedInput[3] = '\u0800'; // 3-byte UTF-8 sequence
processedInput[3] = '\u8000'; // 3-byte UTF-8 sequence (a negative number, when signed)
processedInput[4] = '\u0080'; // 2-byte UTF-8 sequence
processedInput[5] = '\u0800'; // 3-byte UTF-8 sequence
processedInput[6] = '\u0080'; // 2-byte UTF-8 sequence
processedInput[7] = '\u0800'; // 3-byte UTF-8 sequence
processedInput[7] = '\u8880'; // 3-byte UTF-8 sequence (a negative number, when signed, with 0x0800 and 0x0080 marker bits also set)

expectedUtf8ByteCount += 12;

0 comments on commit 89df0ef

Please sign in to comment.