Skip to content

Commit

Permalink
Clean up Latin1Encoding implementation and vectorize its logic (dotne…
Browse files Browse the repository at this point in the history
…t#32994)

* Clean up Latin1Encoding implementation
- Vectorizes Latin1 narrowing / widening code paths
- Re-plumbs Latin1Encoding to use refactored Encoding workhorses
- Removes unused EncodingNLS type
- Removes unused DecoderBestFitFallback type
- Uses "? replacement" behavior for all Encoding subclassed types by default, except Latin1Encoding which still uses best-fit
- Also includes perf improvements for vectorized ASCII transcoding logic
  • Loading branch information
GrabYourPitchforks authored Jun 11, 2020
1 parent 6084542 commit 775bbf0
Show file tree
Hide file tree
Showing 24 changed files with 2,791 additions and 1,413 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -874,25 +874,27 @@
<Compile Include="$(MSBuildThisFileDirectory)System\Text\ASCIIUtility.Helpers.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\CodePageDataItem.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\Decoder.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\DecoderBestFitFallback.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\DecoderExceptionFallback.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\DecoderFallback.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\DecoderNLS.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\DecoderReplacementFallback.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\Encoder.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\EncoderBestFitFallback.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\EncoderExceptionFallback.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\EncoderFallback.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\EncoderLatin1BestFitFallback.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\EncoderLatin1BestFitFallback.Data.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\EncoderNLS.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\EncoderReplacementFallback.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\Encoding.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\Encoding.Internal.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\EncodingData.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\EncodingInfo.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\EncodingNLS.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\EncodingProvider.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\EncodingTable.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\Latin1Encoding.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\Latin1Encoding.Sealed.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\Latin1Utility.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\Latin1Utility.Helpers.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\NormalizationForm.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\Rune.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\SpanRuneEnumerator.cs" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -663,8 +663,8 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuin
// jumps as much as possible in the optimistic case of "all ASCII". If we see non-ASCII
// data, we jump out of the hot paths to targets at the end of the method.

Vector128<ushort> asciiMaskForPTEST = Vector128.Create((ushort)0xFF80); // used for PTEST on supported hardware
Vector128<ushort> asciiMaskForPADDUSW = Vector128.Create((ushort)0x7F80); // used for PADDUSW
Vector128<ushort> asciiMaskForTestZ = Vector128.Create((ushort)0xFF80); // used for PTEST on supported hardware
Vector128<ushort> asciiMaskForAddSaturate = Vector128.Create((ushort)0x7F80); // used for PADDUSW
const uint NonAsciiDataSeenMask = 0b_1010_1010_1010_1010; // used for determining whether 'currentMask' contains non-ASCII data

#if SYSTEM_PRIVATE_CORELIB
Expand All @@ -679,7 +679,7 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuin
// has value >= 0x0800 (non-ASCII). Then we'll treat the vector as a BYTE vector in order
// to extract the mask. Reminder: the 0x0080 bit of each WORD should be ignored.

currentMask = (uint)Sse2.MoveMask(Sse2.AddSaturate(firstVector, asciiMaskForPADDUSW).AsByte());
currentMask = (uint)Sse2.MoveMask(Sse2.AddSaturate(firstVector, asciiMaskForAddSaturate).AsByte());

if ((currentMask & NonAsciiDataSeenMask) != 0)
{
Expand Down Expand Up @@ -733,15 +733,15 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuin
{
// If a non-ASCII bit is set in any WORD of the combined vector, we have seen non-ASCII data.
// Jump to the non-ASCII handler to figure out which particular vector contained non-ASCII data.
if (!Sse41.TestZ(combinedVector, asciiMaskForPTEST))
if (!Sse41.TestZ(combinedVector, asciiMaskForTestZ))
{
goto FoundNonAsciiDataInFirstOrSecondVector;
}
}
else
{
// See comment earlier in the method for an explanation of how the below logic works.
currentMask = (uint)Sse2.MoveMask(Sse2.AddSaturate(combinedVector, asciiMaskForPADDUSW).AsByte());
currentMask = (uint)Sse2.MoveMask(Sse2.AddSaturate(combinedVector, asciiMaskForAddSaturate).AsByte());
if ((currentMask & NonAsciiDataSeenMask) != 0)
{
goto FoundNonAsciiDataInFirstOrSecondVector;
Expand Down Expand Up @@ -776,15 +776,15 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuin
{
// If a non-ASCII bit is set in any WORD of the combined vector, we have seen non-ASCII data.
// Jump to the non-ASCII handler to figure out which particular vector contained non-ASCII data.
if (!Sse41.TestZ(firstVector, asciiMaskForPTEST))
if (!Sse41.TestZ(firstVector, asciiMaskForTestZ))
{
goto FoundNonAsciiDataInFirstVector;
}
}
else
{
// See comment earlier in the method for an explanation of how the below logic works.
currentMask = (uint)Sse2.MoveMask(Sse2.AddSaturate(firstVector, asciiMaskForPADDUSW).AsByte());
currentMask = (uint)Sse2.MoveMask(Sse2.AddSaturate(firstVector, asciiMaskForAddSaturate).AsByte());
if ((currentMask & NonAsciiDataSeenMask) != 0)
{
goto FoundNonAsciiDataInCurrentMask;
Expand All @@ -809,15 +809,15 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuin
{
// If a non-ASCII bit is set in any WORD of the combined vector, we have seen non-ASCII data.
// Jump to the non-ASCII handler to figure out which particular vector contained non-ASCII data.
if (!Sse41.TestZ(firstVector, asciiMaskForPTEST))
if (!Sse41.TestZ(firstVector, asciiMaskForTestZ))
{
goto FoundNonAsciiDataInFirstVector;
}
}
else
{
// See comment earlier in the method for an explanation of how the below logic works.
currentMask = (uint)Sse2.MoveMask(Sse2.AddSaturate(firstVector, asciiMaskForPADDUSW).AsByte());
currentMask = (uint)Sse2.MoveMask(Sse2.AddSaturate(firstVector, asciiMaskForAddSaturate).AsByte());
if ((currentMask & NonAsciiDataSeenMask) != 0)
{
goto FoundNonAsciiDataInCurrentMask;
Expand All @@ -841,14 +841,14 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuin
// See comment earlier in the method for an explanation of how the below logic works.
if (Sse41.IsSupported)
{
if (!Sse41.TestZ(firstVector, asciiMaskForPTEST))
if (!Sse41.TestZ(firstVector, asciiMaskForTestZ))
{
goto FoundNonAsciiDataInFirstVector;
}
}
else
{
currentMask = (uint)Sse2.MoveMask(Sse2.AddSaturate(firstVector, asciiMaskForPADDUSW).AsByte());
currentMask = (uint)Sse2.MoveMask(Sse2.AddSaturate(firstVector, asciiMaskForAddSaturate).AsByte());
if ((currentMask & NonAsciiDataSeenMask) != 0)
{
goto FoundNonAsciiDataInCurrentMask;
Expand All @@ -863,7 +863,7 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuin
FoundNonAsciiDataInFirstVector:

// See comment earlier in the method for an explanation of how the below logic works.
currentMask = (uint)Sse2.MoveMask(Sse2.AddSaturate(firstVector, asciiMaskForPADDUSW).AsByte());
currentMask = (uint)Sse2.MoveMask(Sse2.AddSaturate(firstVector, asciiMaskForAddSaturate).AsByte());

FoundNonAsciiDataInCurrentMask:

Expand Down Expand Up @@ -1302,8 +1302,8 @@ private static unsafe nuint NarrowUtf16ToAscii_Sse2(char* pUtf16Buffer, byte* pA
Debug.Assert(BitConverter.IsLittleEndian);
Debug.Assert(elementCount >= 2 * SizeOfVector128);

Vector128<short> asciiMaskForPTEST = Vector128.Create(unchecked((short)0xFF80)); // used for PTEST on supported hardware
Vector128<ushort> asciiMaskForPADDUSW = Vector128.Create((ushort)0x7F80); // used for PADDUSW
Vector128<short> asciiMaskForTestZ = Vector128.Create(unchecked((short)0xFF80)); // used for PTEST on supported hardware
Vector128<ushort> asciiMaskForAddSaturate = Vector128.Create((ushort)0x7F80); // used for PADDUSW
const int NonAsciiDataSeenMask = 0b_1010_1010_1010_1010; // used for determining whether the pmovmskb operation saw non-ASCII chars

// First, perform an unaligned read of the first part of the input buffer.
Expand All @@ -1315,14 +1315,14 @@ private static unsafe nuint NarrowUtf16ToAscii_Sse2(char* pUtf16Buffer, byte* pA

if (Sse41.IsSupported)
{
if (!Sse41.TestZ(utf16VectorFirst, asciiMaskForPTEST))
if (!Sse41.TestZ(utf16VectorFirst, asciiMaskForTestZ))
{
return 0;
}
}
else
{
if ((Sse2.MoveMask(Sse2.AddSaturate(utf16VectorFirst.AsUInt16(), asciiMaskForPADDUSW).AsByte()) & NonAsciiDataSeenMask) != 0)
if ((Sse2.MoveMask(Sse2.AddSaturate(utf16VectorFirst.AsUInt16(), asciiMaskForAddSaturate).AsByte()) & NonAsciiDataSeenMask) != 0)
{
return 0;
}
Expand Down Expand Up @@ -1355,14 +1355,14 @@ private static unsafe nuint NarrowUtf16ToAscii_Sse2(char* pUtf16Buffer, byte* pA
// See comments earlier in this method for information about how this works.
if (Sse41.IsSupported)
{
if (!Sse41.TestZ(utf16VectorFirst, asciiMaskForPTEST))
if (!Sse41.TestZ(utf16VectorFirst, asciiMaskForTestZ))
{
goto Finish;
}
}
else
{
if ((Sse2.MoveMask(Sse2.AddSaturate(utf16VectorFirst.AsUInt16(), asciiMaskForPADDUSW).AsByte()) & NonAsciiDataSeenMask) != 0)
if ((Sse2.MoveMask(Sse2.AddSaturate(utf16VectorFirst.AsUInt16(), asciiMaskForAddSaturate).AsByte()) & NonAsciiDataSeenMask) != 0)
{
goto Finish;
}
Expand Down Expand Up @@ -1394,20 +1394,20 @@ private static unsafe nuint NarrowUtf16ToAscii_Sse2(char* pUtf16Buffer, byte* pA
// See comments in GetIndexOfFirstNonAsciiChar_Sse2 for information about how this works.
if (Sse41.IsSupported)
{
if (!Sse41.TestZ(combinedVector, asciiMaskForPTEST))
if (!Sse41.TestZ(combinedVector, asciiMaskForTestZ))
{
goto FoundNonAsciiDataInLoop;
}
}
else
{
if ((Sse2.MoveMask(Sse2.AddSaturate(combinedVector.AsUInt16(), asciiMaskForPADDUSW).AsByte()) & NonAsciiDataSeenMask) != 0)
if ((Sse2.MoveMask(Sse2.AddSaturate(combinedVector.AsUInt16(), asciiMaskForAddSaturate).AsByte()) & NonAsciiDataSeenMask) != 0)
{
goto FoundNonAsciiDataInLoop;
}
}

// Build up the UTF-8 vector and perform the store.
// Build up the ASCII vector and perform the store.

asciiVector = Sse2.PackUnsignedSaturate(utf16VectorFirst, utf16VectorSecond);

Expand All @@ -1428,14 +1428,14 @@ private static unsafe nuint NarrowUtf16ToAscii_Sse2(char* pUtf16Buffer, byte* pA
// See comments in GetIndexOfFirstNonAsciiChar_Sse2 for information about how this works.
if (Sse41.IsSupported)
{
if (!Sse41.TestZ(utf16VectorFirst, asciiMaskForPTEST))
if (!Sse41.TestZ(utf16VectorFirst, asciiMaskForTestZ))
{
goto Finish; // found non-ASCII data
}
}
else
{
if ((Sse2.MoveMask(Sse2.AddSaturate(utf16VectorFirst.AsUInt16(), asciiMaskForPADDUSW).AsByte()) & NonAsciiDataSeenMask) != 0)
if ((Sse2.MoveMask(Sse2.AddSaturate(utf16VectorFirst.AsUInt16(), asciiMaskForAddSaturate).AsByte()) & NonAsciiDataSeenMask) != 0)
{
goto Finish; // found non-ASCII data
}
Expand Down Expand Up @@ -1637,6 +1637,12 @@ private static unsafe nuint WidenAsciiToUtf16_Sse2(byte* pAsciiBuffer, char* pUt

nuint finalOffsetWhereCanRunLoop = elementCount - SizeOfVector128;

// Calculating the destination address outside the loop results in significant
// perf wins vs. relying on the JIT to fold memory addressing logic into the
// write instructions. See: https://github.com/dotnet/runtime/issues/33002

char* pCurrentWriteAddress = pUtf16Buffer + currentOffset;

do
{
// In a loop, perform an unaligned read, widen to two vectors, then aligned write the two vectors.
Expand All @@ -1650,13 +1656,14 @@ private static unsafe nuint WidenAsciiToUtf16_Sse2(byte* pAsciiBuffer, char* pUt
goto NonAsciiDataSeenInInnerLoop;
}

byte* pStore = (byte*)(pUtf16Buffer + currentOffset);
Sse2.StoreAligned(pStore, Sse2.UnpackLow(asciiVector, zeroVector));
Vector128<byte> low = Sse2.UnpackLow(asciiVector, zeroVector);
Sse2.StoreAligned((byte*)pCurrentWriteAddress, low);

pStore += SizeOfVector128;
Sse2.StoreAligned(pStore, Sse2.UnpackHigh(asciiVector, zeroVector));
Vector128<byte> high = Sse2.UnpackHigh(asciiVector, zeroVector);
Sse2.StoreAligned((byte*)pCurrentWriteAddress + SizeOfVector128, high);

currentOffset += SizeOfVector128;
pCurrentWriteAddress += SizeOfVector128;
} while (currentOffset <= finalOffsetWhereCanRunLoop);

Finish:
Expand Down
Loading

0 comments on commit 775bbf0

Please sign in to comment.