Clean up Latin1Encoding implementation and vectorize its logic (dotne…

…t#32994) * Clean up Latin1Encoding implementation - Vectorizes Latin1 narrowing / widening code paths - Re-plumbs Latin1Encoding to use refactored Encoding workhorses - Removes unused EncodingNLS type - Removes unused DecoderBestFitFallback type - Uses "? replacement" behavior for all Encoding subclassed types by default, except Latin1Encoding which still uses best-fit - Also includes perf improvements for vectorized ASCII transcoding logic
elinor-fung · Jun 11, 2020 · 775bbf0 · 775bbf0
1 parent 6084542
commit 775bbf0
Show file tree

Hide file tree

Showing 24 changed files with 2,791 additions and 1,413 deletions.
diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems
@@ -874,25 +874,27 @@
     <Compile Include="$(MSBuildThisFileDirectory)System\Text\ASCIIUtility.Helpers.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Text\CodePageDataItem.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Text\Decoder.cs" />
-    <Compile Include="$(MSBuildThisFileDirectory)System\Text\DecoderBestFitFallback.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Text\DecoderExceptionFallback.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Text\DecoderFallback.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Text\DecoderNLS.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Text\DecoderReplacementFallback.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Text\Encoder.cs" />
-    <Compile Include="$(MSBuildThisFileDirectory)System\Text\EncoderBestFitFallback.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Text\EncoderExceptionFallback.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Text\EncoderFallback.cs" />
+    <Compile Include="$(MSBuildThisFileDirectory)System\Text\EncoderLatin1BestFitFallback.cs" />
+    <Compile Include="$(MSBuildThisFileDirectory)System\Text\EncoderLatin1BestFitFallback.Data.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Text\EncoderNLS.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Text\EncoderReplacementFallback.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Text\Encoding.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Text\Encoding.Internal.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Text\EncodingData.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Text\EncodingInfo.cs" />
-    <Compile Include="$(MSBuildThisFileDirectory)System\Text\EncodingNLS.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Text\EncodingProvider.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Text\EncodingTable.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Text\Latin1Encoding.cs" />
+    <Compile Include="$(MSBuildThisFileDirectory)System\Text\Latin1Encoding.Sealed.cs" />
+    <Compile Include="$(MSBuildThisFileDirectory)System\Text\Latin1Utility.cs" />
+    <Compile Include="$(MSBuildThisFileDirectory)System\Text\Latin1Utility.Helpers.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Text\NormalizationForm.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Text\Rune.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Text\SpanRuneEnumerator.cs" />

diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIUtility.cs b/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIUtility.cs
@@ -663,8 +663,8 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuin
             // jumps as much as possible in the optimistic case of "all ASCII". If we see non-ASCII
             // data, we jump out of the hot paths to targets at the end of the method.
 
-            Vector128<ushort> asciiMaskForPTEST = Vector128.Create((ushort)0xFF80); // used for PTEST on supported hardware
-            Vector128<ushort> asciiMaskForPADDUSW = Vector128.Create((ushort)0x7F80); // used for PADDUSW
+            Vector128<ushort> asciiMaskForTestZ = Vector128.Create((ushort)0xFF80); // used for PTEST on supported hardware
+            Vector128<ushort> asciiMaskForAddSaturate = Vector128.Create((ushort)0x7F80); // used for PADDUSW
             const uint NonAsciiDataSeenMask = 0b_1010_1010_1010_1010; // used for determining whether 'currentMask' contains non-ASCII data
 
 #if SYSTEM_PRIVATE_CORELIB
@@ -679,7 +679,7 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuin
             // has value >= 0x0800 (non-ASCII). Then we'll treat the vector as a BYTE vector in order
             // to extract the mask. Reminder: the 0x0080 bit of each WORD should be ignored.
 
-            currentMask = (uint)Sse2.MoveMask(Sse2.AddSaturate(firstVector, asciiMaskForPADDUSW).AsByte());
+            currentMask = (uint)Sse2.MoveMask(Sse2.AddSaturate(firstVector, asciiMaskForAddSaturate).AsByte());
 
             if ((currentMask & NonAsciiDataSeenMask) != 0)
             {
@@ -733,15 +733,15 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuin
                     {
                         // If a non-ASCII bit is set in any WORD of the combined vector, we have seen non-ASCII data.
                         // Jump to the non-ASCII handler to figure out which particular vector contained non-ASCII data.
-                        if (!Sse41.TestZ(combinedVector, asciiMaskForPTEST))
+                        if (!Sse41.TestZ(combinedVector, asciiMaskForTestZ))
                         {
                             goto FoundNonAsciiDataInFirstOrSecondVector;
                         }
                     }
                     else
                     {
                         // See comment earlier in the method for an explanation of how the below logic works.
-                        currentMask = (uint)Sse2.MoveMask(Sse2.AddSaturate(combinedVector, asciiMaskForPADDUSW).AsByte());
+                        currentMask = (uint)Sse2.MoveMask(Sse2.AddSaturate(combinedVector, asciiMaskForAddSaturate).AsByte());
                         if ((currentMask & NonAsciiDataSeenMask) != 0)
                         {
                             goto FoundNonAsciiDataInFirstOrSecondVector;
@@ -776,15 +776,15 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuin
             {
                 // If a non-ASCII bit is set in any WORD of the combined vector, we have seen non-ASCII data.
                 // Jump to the non-ASCII handler to figure out which particular vector contained non-ASCII data.
-                if (!Sse41.TestZ(firstVector, asciiMaskForPTEST))
+                if (!Sse41.TestZ(firstVector, asciiMaskForTestZ))
                 {
                     goto FoundNonAsciiDataInFirstVector;
                 }
             }
             else
             {
                 // See comment earlier in the method for an explanation of how the below logic works.
-                currentMask = (uint)Sse2.MoveMask(Sse2.AddSaturate(firstVector, asciiMaskForPADDUSW).AsByte());
+                currentMask = (uint)Sse2.MoveMask(Sse2.AddSaturate(firstVector, asciiMaskForAddSaturate).AsByte());
                 if ((currentMask & NonAsciiDataSeenMask) != 0)
                 {
                     goto FoundNonAsciiDataInCurrentMask;
@@ -809,15 +809,15 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuin
                 {
                     // If a non-ASCII bit is set in any WORD of the combined vector, we have seen non-ASCII data.
                     // Jump to the non-ASCII handler to figure out which particular vector contained non-ASCII data.
-                    if (!Sse41.TestZ(firstVector, asciiMaskForPTEST))
+                    if (!Sse41.TestZ(firstVector, asciiMaskForTestZ))
                     {
                         goto FoundNonAsciiDataInFirstVector;
                     }
                 }
                 else
                 {
                     // See comment earlier in the method for an explanation of how the below logic works.
-                    currentMask = (uint)Sse2.MoveMask(Sse2.AddSaturate(firstVector, asciiMaskForPADDUSW).AsByte());
+                    currentMask = (uint)Sse2.MoveMask(Sse2.AddSaturate(firstVector, asciiMaskForAddSaturate).AsByte());
                     if ((currentMask & NonAsciiDataSeenMask) != 0)
                     {
                         goto FoundNonAsciiDataInCurrentMask;
@@ -841,14 +841,14 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuin
             // See comment earlier in the method for an explanation of how the below logic works.
             if (Sse41.IsSupported)
             {
-                if (!Sse41.TestZ(firstVector, asciiMaskForPTEST))
+                if (!Sse41.TestZ(firstVector, asciiMaskForTestZ))
                 {
                     goto FoundNonAsciiDataInFirstVector;
                 }
             }
             else
             {
-                currentMask = (uint)Sse2.MoveMask(Sse2.AddSaturate(firstVector, asciiMaskForPADDUSW).AsByte());
+                currentMask = (uint)Sse2.MoveMask(Sse2.AddSaturate(firstVector, asciiMaskForAddSaturate).AsByte());
                 if ((currentMask & NonAsciiDataSeenMask) != 0)
                 {
                     goto FoundNonAsciiDataInCurrentMask;
@@ -863,7 +863,7 @@ private static unsafe nuint GetIndexOfFirstNonAsciiChar_Sse2(char* pBuffer, nuin
         FoundNonAsciiDataInFirstVector:
 
             // See comment earlier in the method for an explanation of how the below logic works.
-            currentMask = (uint)Sse2.MoveMask(Sse2.AddSaturate(firstVector, asciiMaskForPADDUSW).AsByte());
+            currentMask = (uint)Sse2.MoveMask(Sse2.AddSaturate(firstVector, asciiMaskForAddSaturate).AsByte());
 
         FoundNonAsciiDataInCurrentMask:
 
@@ -1302,8 +1302,8 @@ private static unsafe nuint NarrowUtf16ToAscii_Sse2(char* pUtf16Buffer, byte* pA
             Debug.Assert(BitConverter.IsLittleEndian);
             Debug.Assert(elementCount >= 2 * SizeOfVector128);
 
-            Vector128<short> asciiMaskForPTEST = Vector128.Create(unchecked((short)0xFF80)); // used for PTEST on supported hardware
-            Vector128<ushort> asciiMaskForPADDUSW = Vector128.Create((ushort)0x7F80); // used for PADDUSW
+            Vector128<short> asciiMaskForTestZ = Vector128.Create(unchecked((short)0xFF80)); // used for PTEST on supported hardware
+            Vector128<ushort> asciiMaskForAddSaturate = Vector128.Create((ushort)0x7F80); // used for PADDUSW
             const int NonAsciiDataSeenMask = 0b_1010_1010_1010_1010; // used for determining whether the pmovmskb operation saw non-ASCII chars
 
             // First, perform an unaligned read of the first part of the input buffer.
@@ -1315,14 +1315,14 @@ private static unsafe nuint NarrowUtf16ToAscii_Sse2(char* pUtf16Buffer, byte* pA
 
             if (Sse41.IsSupported)
             {
-                if (!Sse41.TestZ(utf16VectorFirst, asciiMaskForPTEST))
+                if (!Sse41.TestZ(utf16VectorFirst, asciiMaskForTestZ))
                 {
                     return 0;
                 }
             }
             else
             {
-                if ((Sse2.MoveMask(Sse2.AddSaturate(utf16VectorFirst.AsUInt16(), asciiMaskForPADDUSW).AsByte()) & NonAsciiDataSeenMask) != 0)
+                if ((Sse2.MoveMask(Sse2.AddSaturate(utf16VectorFirst.AsUInt16(), asciiMaskForAddSaturate).AsByte()) & NonAsciiDataSeenMask) != 0)
                 {
                     return 0;
                 }
@@ -1355,14 +1355,14 @@ private static unsafe nuint NarrowUtf16ToAscii_Sse2(char* pUtf16Buffer, byte* pA
                 // See comments earlier in this method for information about how this works.
                 if (Sse41.IsSupported)
                 {
-                    if (!Sse41.TestZ(utf16VectorFirst, asciiMaskForPTEST))
+                    if (!Sse41.TestZ(utf16VectorFirst, asciiMaskForTestZ))
                     {
                         goto Finish;
                     }
                 }
                 else
                 {
-                    if ((Sse2.MoveMask(Sse2.AddSaturate(utf16VectorFirst.AsUInt16(), asciiMaskForPADDUSW).AsByte()) & NonAsciiDataSeenMask) != 0)
+                    if ((Sse2.MoveMask(Sse2.AddSaturate(utf16VectorFirst.AsUInt16(), asciiMaskForAddSaturate).AsByte()) & NonAsciiDataSeenMask) != 0)
                     {
                         goto Finish;
                     }
@@ -1394,20 +1394,20 @@ private static unsafe nuint NarrowUtf16ToAscii_Sse2(char* pUtf16Buffer, byte* pA
                 // See comments in GetIndexOfFirstNonAsciiChar_Sse2 for information about how this works.
                 if (Sse41.IsSupported)
                 {
-                    if (!Sse41.TestZ(combinedVector, asciiMaskForPTEST))
+                    if (!Sse41.TestZ(combinedVector, asciiMaskForTestZ))
                     {
                         goto FoundNonAsciiDataInLoop;
                     }
                 }
                 else
                 {
-                    if ((Sse2.MoveMask(Sse2.AddSaturate(combinedVector.AsUInt16(), asciiMaskForPADDUSW).AsByte()) & NonAsciiDataSeenMask) != 0)
+                    if ((Sse2.MoveMask(Sse2.AddSaturate(combinedVector.AsUInt16(), asciiMaskForAddSaturate).AsByte()) & NonAsciiDataSeenMask) != 0)
                     {
                         goto FoundNonAsciiDataInLoop;
                     }
                 }
 
-                // Build up the UTF-8 vector and perform the store.
+                // Build up the ASCII vector and perform the store.
 
                 asciiVector = Sse2.PackUnsignedSaturate(utf16VectorFirst, utf16VectorSecond);
 
@@ -1428,14 +1428,14 @@ private static unsafe nuint NarrowUtf16ToAscii_Sse2(char* pUtf16Buffer, byte* pA
             // See comments in GetIndexOfFirstNonAsciiChar_Sse2 for information about how this works.
             if (Sse41.IsSupported)
             {
-                if (!Sse41.TestZ(utf16VectorFirst, asciiMaskForPTEST))
+                if (!Sse41.TestZ(utf16VectorFirst, asciiMaskForTestZ))
                 {
                     goto Finish; // found non-ASCII data
                 }
             }
             else
             {
-                if ((Sse2.MoveMask(Sse2.AddSaturate(utf16VectorFirst.AsUInt16(), asciiMaskForPADDUSW).AsByte()) & NonAsciiDataSeenMask) != 0)
+                if ((Sse2.MoveMask(Sse2.AddSaturate(utf16VectorFirst.AsUInt16(), asciiMaskForAddSaturate).AsByte()) & NonAsciiDataSeenMask) != 0)
                 {
                     goto Finish; // found non-ASCII data
                 }
@@ -1637,6 +1637,12 @@ private static unsafe nuint WidenAsciiToUtf16_Sse2(byte* pAsciiBuffer, char* pUt
 
             nuint finalOffsetWhereCanRunLoop = elementCount - SizeOfVector128;
 
+            // Calculating the destination address outside the loop results in significant
+            // perf wins vs. relying on the JIT to fold memory addressing logic into the
+            // write instructions. See: https://github.com/dotnet/runtime/issues/33002
+
+            char* pCurrentWriteAddress = pUtf16Buffer + currentOffset;
+
             do
             {
                 // In a loop, perform an unaligned read, widen to two vectors, then aligned write the two vectors.
@@ -1650,13 +1656,14 @@ private static unsafe nuint WidenAsciiToUtf16_Sse2(byte* pAsciiBuffer, char* pUt
                     goto NonAsciiDataSeenInInnerLoop;
                 }
 
-                byte* pStore = (byte*)(pUtf16Buffer + currentOffset);
-                Sse2.StoreAligned(pStore, Sse2.UnpackLow(asciiVector, zeroVector));
+                Vector128<byte> low = Sse2.UnpackLow(asciiVector, zeroVector);
+                Sse2.StoreAligned((byte*)pCurrentWriteAddress, low);
 
-                pStore += SizeOfVector128;
-                Sse2.StoreAligned(pStore, Sse2.UnpackHigh(asciiVector, zeroVector));
+                Vector128<byte> high = Sse2.UnpackHigh(asciiVector, zeroVector);
+                Sse2.StoreAligned((byte*)pCurrentWriteAddress + SizeOfVector128, high);
 
                 currentOffset += SizeOfVector128;
+                pCurrentWriteAddress += SizeOfVector128;
             } while (currentOffset <= finalOffsetWhereCanRunLoop);
 
         Finish: