Skip to content

Commit

Permalink
Add tests for custom JavaScriptEncoder to cover the virtual code path…
Browse files Browse the repository at this point in the history
…s in TextEncoder, and address previous feedback. (dotnet/corefx#42064)

* Move using directive within ifdef to make it clear when its used.

* Add more tests for custom text encoder case.

* Fix typo in comment gaurd -> guard

* Fix up the using directives that got removed during merge conflict
resolution.

* Address feedback - fix 0x7F case, rename vectors to be self-documenting.


Commit migrated from dotnet/corefx@62e6ccf
  • Loading branch information
ahsonkhan authored Oct 30, 2019
1 parent 70e8131 commit 1510fdb
Show file tree
Hide file tree
Showing 6 changed files with 182 additions and 100 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,9 @@ public override unsafe int FindFirstCharacterToEncodeUtf8(ReadOnlySpan<byte> utf
// Load the next 16 bytes.
Vector128<sbyte> sourceValue = Sse2.LoadVector128(startingAddress);

Vector128<sbyte> mask = Sse2Helper.CreateAsciiMask(sourceValue);
int index = Sse2.MoveMask(mask);
// Check for ASCII text. Any byte that's not in the ASCII range will already be negative when
// casted to signed byte.
int index = Sse2.MoveMask(sourceValue);

if (index != 0)
{
Expand Down Expand Up @@ -196,6 +197,7 @@ public override unsafe int FindFirstCharacterToEncodeUtf8(ReadOnlySpan<byte> utf
idx += utf8BytesConsumedForScalar;
}
}
Debug.Assert(idx == utf8Text.Length);

idx = -1; // All bytes are allowed.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,17 @@ public static Vector128<short> CreateEscapingMask_UnsafeRelaxedJavaScriptEncoder
{
Debug.Assert(Sse2.IsSupported);

// Space ' ', anything in the control characters range, and anything above short.MaxValue but less than or equal char.MaxValue
Vector128<short> mask = Sse2.CompareLessThan(sourceValue, s_mask_UInt16_0x20);
// Anything in the control characters range, and anything above short.MaxValue but less than or equal char.MaxValue
// That's because anything between 32768 and 65535 (inclusive) will overflow and become negative.
Vector128<short> mask = Sse2.CompareLessThan(sourceValue, s_spaceMaskInt16);

mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x22)); // Quotation Mark '"'
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x5C)); // Reverse Solidus '\'
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_quotationMarkMaskInt16));
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_reverseSolidusMaskInt16));

// Anything above the ASCII range, and also including the leftover control character in the ASCII range - 0x7F
// When this method is called with only ASCII data, 0x7F is the only value that would meet this comparison.
// However, when called from "Default", the source could contain characters outside the ASCII range.
mask = Sse2.Or(mask, Sse2.CompareGreaterThan(sourceValue, s_tildeMaskInt16));

return mask;
}
Expand All @@ -31,10 +37,16 @@ public static Vector128<sbyte> CreateEscapingMask_UnsafeRelaxedJavaScriptEncoder
{
Debug.Assert(Sse2.IsSupported);

Vector128<sbyte> mask = Sse2.CompareLessThan(sourceValue, s_mask_SByte_0x20); // Control characters, and anything above 0x7E since sbyte.MaxValue is 0x7E
// Anything in the control characters range (except 0x7F), and anything above sbyte.MaxValue but less than or equal byte.MaxValue
// That's because anything between 128 and 255 (inclusive) will overflow and become negative.
Vector128<sbyte> mask = Sse2.CompareLessThan(sourceValue, s_spaceMaskSByte);

mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_quotationMarkMaskSByte));
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_reverseSolidusMaskSByte));

mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x22)); // Quotation Mark "
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x5C)); // Reverse Solidus \
// Leftover control character in the ASCII range - 0x7F
// Since we are dealing with sbytes, 0x7F is the only value that would meet this comparison.
mask = Sse2.Or(mask, Sse2.CompareGreaterThan(sourceValue, s_tildeMaskSByte));

return mask;
}
Expand All @@ -46,14 +58,12 @@ public static Vector128<short> CreateEscapingMask_DefaultJavaScriptEncoderBasicL

Vector128<short> mask = CreateEscapingMask_UnsafeRelaxedJavaScriptEncoder(sourceValue);

mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x26)); // Ampersand '&'
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x27)); // Apostrophe '''
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x2B)); // Plus sign '+'
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x3C)); // Less Than Sign '<'
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x3E)); // Greater Than Sign '>'
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_UInt16_0x60)); // Grave Access '`'

mask = Sse2.Or(mask, Sse2.CompareGreaterThan(sourceValue, s_mask_UInt16_0x7E)); // Tilde '~', anything above the ASCII range
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_ampersandMaskInt16));
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_apostropheMaskInt16));
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_plusSignMaskInt16));
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_lessThanSignMaskInt16));
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_greaterThanSignMaskInt16));
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_graveAccentMaskInt16));

return mask;
}
Expand All @@ -65,12 +75,12 @@ public static Vector128<sbyte> CreateEscapingMask_DefaultJavaScriptEncoderBasicL

Vector128<sbyte> mask = CreateEscapingMask_UnsafeRelaxedJavaScriptEncoder(sourceValue);

mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x26)); // Ampersand &
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x27)); // Apostrophe '
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x2B)); // Plus sign +
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x3C)); // Less Than Sign <
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x3E)); // Greater Than Sign >
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_mask_SByte_0x60)); // Grave Access `
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_ampersandMaskSByte));
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_apostropheMaskSByte));
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_plusSignMaskSByte));
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_lessThanSignMaskSByte));
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_greaterThanSignMaskSByte));
mask = Sse2.Or(mask, Sse2.CompareEqual(sourceValue, s_graveAccentMaskSByte));

return mask;
}
Expand All @@ -80,48 +90,38 @@ public static Vector128<short> CreateAsciiMask(Vector128<short> sourceValue)
{
Debug.Assert(Sse2.IsSupported);

Vector128<short> mask = Sse2.CompareLessThan(sourceValue, s_mask_UInt16_0x00); // Null, anything above short.MaxValue but less than or equal char.MaxValue
mask = Sse2.Or(mask, Sse2.CompareGreaterThan(sourceValue, s_mask_UInt16_0x7E)); // Tilde '~', anything above the ASCII range
// Anything above short.MaxValue but less than or equal char.MaxValue
// That's because anything between 32768 and 65535 (inclusive) will overflow and become negative.
Vector128<short> mask = Sse2.CompareLessThan(sourceValue, s_nullMaskInt16);

return mask;
}
// Anything above the ASCII range
mask = Sse2.Or(mask, Sse2.CompareGreaterThan(sourceValue, s_maxAsciiCharacterMaskInt16));

[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<sbyte> CreateAsciiMask(Vector128<sbyte> sourceValue)
{
Debug.Assert(Sse2.IsSupported);

// Null, anything above sbyte.MaxValue but less than or equal byte.MaxValue (i.e. anything above the ASCII range)
Vector128<sbyte> mask = Sse2.CompareLessThan(sourceValue, s_mask_SByte_0x00);
return mask;
}

private static readonly Vector128<short> s_mask_UInt16_0x00 = Vector128<short>.Zero; // Null

private static readonly Vector128<short> s_mask_UInt16_0x20 = Vector128.Create((short)0x20); // Space ' '

private static readonly Vector128<short> s_mask_UInt16_0x22 = Vector128.Create((short)0x22); // Quotation Mark '"'
private static readonly Vector128<short> s_mask_UInt16_0x26 = Vector128.Create((short)0x26); // Ampersand '&'
private static readonly Vector128<short> s_mask_UInt16_0x27 = Vector128.Create((short)0x27); // Apostrophe '''
private static readonly Vector128<short> s_mask_UInt16_0x2B = Vector128.Create((short)0x2B); // Plus sign '+'
private static readonly Vector128<short> s_mask_UInt16_0x3C = Vector128.Create((short)0x3C); // Less Than Sign '<'
private static readonly Vector128<short> s_mask_UInt16_0x3E = Vector128.Create((short)0x3E); // Greater Than Sign '>'
private static readonly Vector128<short> s_mask_UInt16_0x5C = Vector128.Create((short)0x5C); // Reverse Solidus '\'
private static readonly Vector128<short> s_mask_UInt16_0x60 = Vector128.Create((short)0x60); // Grave Access '`'

private static readonly Vector128<short> s_mask_UInt16_0x7E = Vector128.Create((short)0x7E); // Tilde '~'

private static readonly Vector128<sbyte> s_mask_SByte_0x00 = Vector128<sbyte>.Zero; // Null

private static readonly Vector128<sbyte> s_mask_SByte_0x20 = Vector128.Create((sbyte)0x20); // Space ' '

private static readonly Vector128<sbyte> s_mask_SByte_0x22 = Vector128.Create((sbyte)0x22); // Quotation Mark '"'
private static readonly Vector128<sbyte> s_mask_SByte_0x26 = Vector128.Create((sbyte)0x26); // Ampersand '&'
private static readonly Vector128<sbyte> s_mask_SByte_0x27 = Vector128.Create((sbyte)0x27); // Apostrophe '''
private static readonly Vector128<sbyte> s_mask_SByte_0x2B = Vector128.Create((sbyte)0x2B); // Plus sign '+'
private static readonly Vector128<sbyte> s_mask_SByte_0x3C = Vector128.Create((sbyte)0x3C); // Less Than Sign '<'
private static readonly Vector128<sbyte> s_mask_SByte_0x3E = Vector128.Create((sbyte)0x3E); // Greater Than Sign '>'
private static readonly Vector128<sbyte> s_mask_SByte_0x5C = Vector128.Create((sbyte)0x5C); // Reverse Solidus '\'
private static readonly Vector128<sbyte> s_mask_SByte_0x60 = Vector128.Create((sbyte)0x60); // Grave Access '`'
private static readonly Vector128<short> s_nullMaskInt16 = Vector128<short>.Zero;
private static readonly Vector128<short> s_spaceMaskInt16 = Vector128.Create((short)' ');
private static readonly Vector128<short> s_quotationMarkMaskInt16 = Vector128.Create((short)'"');
private static readonly Vector128<short> s_ampersandMaskInt16 = Vector128.Create((short)'&');
private static readonly Vector128<short> s_apostropheMaskInt16 = Vector128.Create((short)'\'');
private static readonly Vector128<short> s_plusSignMaskInt16 = Vector128.Create((short)'+');
private static readonly Vector128<short> s_lessThanSignMaskInt16 = Vector128.Create((short)'<');
private static readonly Vector128<short> s_greaterThanSignMaskInt16 = Vector128.Create((short)'>');
private static readonly Vector128<short> s_reverseSolidusMaskInt16 = Vector128.Create((short)'\\');
private static readonly Vector128<short> s_graveAccentMaskInt16 = Vector128.Create((short)'`');
private static readonly Vector128<short> s_tildeMaskInt16 = Vector128.Create((short)'~');
private static readonly Vector128<short> s_maxAsciiCharacterMaskInt16 = Vector128.Create((short)0x7F); // Delete control character

private static readonly Vector128<sbyte> s_spaceMaskSByte = Vector128.Create((sbyte)' ');
private static readonly Vector128<sbyte> s_quotationMarkMaskSByte = Vector128.Create((sbyte)'"');
private static readonly Vector128<sbyte> s_ampersandMaskSByte = Vector128.Create((sbyte)'&');
private static readonly Vector128<sbyte> s_apostropheMaskSByte = Vector128.Create((sbyte)'\'');
private static readonly Vector128<sbyte> s_plusSignMaskSByte = Vector128.Create((sbyte)'+');
private static readonly Vector128<sbyte> s_lessThanSignMaskSByte = Vector128.Create((sbyte)'<');
private static readonly Vector128<sbyte> s_greaterThanSignMaskSByte = Vector128.Create((sbyte)'>');
private static readonly Vector128<sbyte> s_reverseSolidusMaskSByte = Vector128.Create((sbyte)'\\');
private static readonly Vector128<sbyte> s_graveAccentMaskSByte = Vector128.Create((sbyte)'`');
private static readonly Vector128<sbyte> s_tildeMaskSByte = Vector128.Create((sbyte)'~');
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -714,8 +714,9 @@ public virtual unsafe int FindFirstCharacterToEncodeUtf8(ReadOnlySpan<byte> utf8
// Load the next 16 bytes.
Vector128<sbyte> sourceValue = Sse2.LoadVector128(startingAddress);

Vector128<sbyte> mask = Sse2Helper.CreateAsciiMask(sourceValue);
int index = Sse2.MoveMask(mask);
// Check for ASCII text. Any byte that's not in the ASCII range will already be negative when
// casted to signed byte.
int index = Sse2.MoveMask(sourceValue);

if (index != 0)
{
Expand Down Expand Up @@ -812,6 +813,7 @@ public virtual unsafe int FindFirstCharacterToEncodeUtf8(ReadOnlySpan<byte> utf8
idx += utf8BytesConsumedForScalar;
}
}
Debug.Assert(idx == utf8Text.Length);

idx = -1; // All bytes are allowed.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -153,8 +153,9 @@ public override unsafe int FindFirstCharacterToEncodeUtf8(ReadOnlySpan<byte> utf
// Load the next 16 bytes.
Vector128<sbyte> sourceValue = Sse2.LoadVector128(startingAddress);

Vector128<sbyte> mask = Sse2Helper.CreateAsciiMask(sourceValue);
int index = Sse2.MoveMask(mask);
// Check for ASCII text. Any byte that's not in the ASCII range will already be negative when
// casted to signed byte.
int index = Sse2.MoveMask(sourceValue);

if (index != 0)
{
Expand Down Expand Up @@ -194,7 +195,7 @@ public override unsafe int FindFirstCharacterToEncodeUtf8(ReadOnlySpan<byte> utf
else
{
// Check if any of the 16 bytes need to be escaped.
mask = Sse2Helper.CreateEscapingMask_UnsafeRelaxedJavaScriptEncoder(sourceValue);
Vector128<sbyte> mask = Sse2Helper.CreateEscapingMask_UnsafeRelaxedJavaScriptEncoder(sourceValue);

index = Sse2.MoveMask(mask);
// If index == 0, that means none of the 16 bytes needed to be escaped.
Expand Down Expand Up @@ -245,6 +246,7 @@ public override unsafe int FindFirstCharacterToEncodeUtf8(ReadOnlySpan<byte> utf
idx += utf8BytesConsumedForScalar;
}
}
Debug.Assert(idx == utf8Text.Length);

idx = -1; // All bytes are allowed.

Expand Down
Loading

0 comments on commit 1510fdb

Please sign in to comment.