Skip to content

Commit

Permalink
Improve perf of Utf8Parser.TryParse(out [u]long, default) (dotnet#52423)
Browse files Browse the repository at this point in the history
  • Loading branch information
GrabYourPitchforks authored Jul 12, 2021
1 parent 1509b1a commit f787f38
Show file tree
Hide file tree
Showing 2 changed files with 142 additions and 122 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -336,107 +336,121 @@ private static bool TryParseInt32D(ReadOnlySpan<byte> source, out int value, out

private static bool TryParseInt64D(ReadOnlySpan<byte> source, out long value, out int bytesConsumed)
{
if (source.Length < 1)
{
bytesConsumed = 0;
value = default;
return false;
}
long sign = 0; // 0 if the value is positive, -1 if the value is negative
int idx = 0;

int indexOfFirstDigit = 0;
int sign = 1;
if (source[0] == '-')
// We use 'nuint' for the firstChar and nextChar data types in this method because
// it gives us a free early zero-extension to 64 bits when running on a 64-bit platform.

nuint firstChar;
while (true)
{
indexOfFirstDigit = 1;
sign = -1;
if ((uint)idx >= (uint)source.Length) { goto FalseExit; }
firstChar = (uint)source[idx] - '0';
if ((uint)firstChar <= 9) { break; }

if (source.Length <= indexOfFirstDigit)
// We saw something that wasn't a digit. If it's a '+' or a '-',
// we'll set the 'sign' value appropriately and resume the "read
// first char" loop from the next index. If this loops more than
// once (idx != 0), it means we saw a sign character followed by
// a non-digit character, which should be considered an error.

if (idx != 0)
{
bytesConsumed = 0;
value = default;
return false;
goto FalseExit;
}
}
else if (source[0] == '+')
{
indexOfFirstDigit = 1;

if (source.Length <= indexOfFirstDigit)
idx++;

if ((uint)firstChar == unchecked((uint)('-' - '0')))
{
sign--; // set to -1
}
else if ((uint)firstChar != unchecked((uint)('+' - '0')))
{
bytesConsumed = 0;
value = default;
return false;
goto FalseExit; // not a digit, not '-', and not '+'; fail
}
}

int overflowLength = ParserHelpers.Int64OverflowLength + indexOfFirstDigit;
ulong parsedValue = firstChar;
int overflowLength = ParserHelpers.Int64OverflowLength + idx; // +idx to account for any sign char we read
idx++;

// Parse the first digit separately. If invalid here, we need to return false.
long firstDigit = source[indexOfFirstDigit] - 48; // '0'
if (firstDigit < 0 || firstDigit > 9)
{
bytesConsumed = 0;
value = default;
return false;
}
ulong parsedValue = (ulong)firstDigit;
// At this point, we successfully read a single digit character.
// The only failure condition from here on out is integer overflow.

if (source.Length < overflowLength)
{
// Length is less than Parsers.Int64OverflowLength; overflow is not possible
for (int index = indexOfFirstDigit + 1; index < source.Length; index++)
// If the input span is short enough such that integer overflow isn't an issue,
// don't bother performing overflow checks. Just keep shifting in new digits
// until we see a non-digit character or until we've exhausted our input buffer.

while (true)
{
long nextDigit = source[index] - 48; // '0'
if (nextDigit < 0 || nextDigit > 9)
{
bytesConsumed = index;
value = ((long)parsedValue) * sign;
return true;
}
parsedValue = parsedValue * 10 + (ulong)nextDigit;
if ((uint)idx >= (uint)source.Length) { break; } // EOF
nuint nextChar = (uint)source[idx] - '0';
if ((uint)nextChar > 9) { break; } // not a digit
parsedValue = parsedValue * 10 + nextChar;
idx++;
}
}
else
{
// Length is greater than Parsers.Int64OverflowLength; overflow is only possible after Parsers.Int64OverflowLength
// digits. There may be no overflow after Parsers.Int64OverflowLength if there are leading zeroes.
for (int index = indexOfFirstDigit + 1; index < overflowLength - 1; index++)
{
long nextDigit = source[index] - 48; // '0'
if (nextDigit < 0 || nextDigit > 9)
{
bytesConsumed = index;
value = ((long)parsedValue) * sign;
return true;
}
parsedValue = parsedValue * 10 + (ulong)nextDigit;
}
for (int index = overflowLength - 1; index < source.Length; index++)
while (true)
{
long nextDigit = source[index] - 48; // '0'
if (nextDigit < 0 || nextDigit > 9)
if ((uint)idx >= (uint)source.Length) { break; } // EOF
nuint nextChar = (uint)source[idx] - '0';
if ((uint)nextChar > 9) { break; } // not a digit
idx++;

// The const below is the smallest unsigned x for which "x * 10 + 9"
// might overflow long.MaxValue. If the current accumulator is below
// this const, there's no risk of overflowing.

const ulong OverflowRisk = 0x0CCC_CCCC_CCCC_CCCCul;

if (parsedValue < OverflowRisk)
{
bytesConsumed = index;
value = ((long)parsedValue) * sign;
return true;
parsedValue = parsedValue * 10 + nextChar;
continue;
}
// If parsedValue > (long.MaxValue / 10), any more appended digits will cause overflow.
// if parsedValue == (long.MaxValue / 10), any nextDigit greater than 7 or 8 (depending on sign) implies overflow.
bool positive = sign > 0;
bool nextDigitTooLarge = nextDigit > 8 || (positive && nextDigit > 7);
if (parsedValue > long.MaxValue / 10 || parsedValue == long.MaxValue / 10 && nextDigitTooLarge)

// If the current accumulator is exactly equal to the const above,
// then "accumulator * 10 + 7" is the highest we can go without overflowing
// long.MaxValue. (If we know the value is negative, we can instead allow
// +8, since the range of negative numbers is one higher than the range of
// positive numbers.) This also implies that if the current accumulator
// is higher than the const above, there's no hope that we'll succeed,
// so we may as well just fail now.
//
// The (nextChar + sign) trick below works because sign is 0 or -1,
// so if sign is -1 then this actually checks that nextChar > 8.
// n.b. signed arithmetic below because nextChar may be 0.

if (parsedValue != OverflowRisk || (int)nextChar + (int)sign > 7)
{
bytesConsumed = 0;
value = default;
return false;
goto FalseExit;
}
parsedValue = parsedValue * 10 + (ulong)nextDigit;

parsedValue = OverflowRisk * 10 + nextChar;
}
}

bytesConsumed = source.Length;
value = ((long)parsedValue) * sign;
// 'sign' is 0 for non-negative and -1 for negative. This allows us to perform
// cheap arithmetic + bitwise operations to mimic a multiplication by 1 or -1
// without incurring the cost of an actual multiplication operation.
//
// If sign = 0, this becomes value = (parsedValue ^ 0) - 0 = parsedValue
// If sign = -1, this becomes value = (parsedValue ^ -1) - (-1) = ~parsedValue + 1 = -parsedValue

bytesConsumed = idx;
value = ((long)parsedValue ^ sign) - sign;
return true;

FalseExit:
bytesConsumed = 0;
value = default;
return false;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -277,77 +277,83 @@ private static bool TryParseUInt32D(ReadOnlySpan<byte> source, out uint value, o

private static bool TryParseUInt64D(ReadOnlySpan<byte> source, out ulong value, out int bytesConsumed)
{
if (source.Length < 1)
if (source.IsEmpty)
{
bytesConsumed = 0;
value = default;
return false;
goto FalseExit;
}

// We use 'nuint' for the firstDigit and nextChar data types in this method because
// it gives us a free early zero-extension to 64 bits when running on a 64-bit platform.
//
// Parse the first digit separately. If invalid here, we need to return false.
ulong firstDigit = source[0] - 48u; // '0'
if (firstDigit > 9)
{
bytesConsumed = 0;
value = default;
return false;
}

nuint firstDigit = (uint)source[0] - '0';
if ((uint)firstDigit > 9) { goto FalseExit; }
ulong parsedValue = firstDigit;

if (source.Length < ParserHelpers.Int64OverflowLength)
// At this point, we successfully read a single digit character.
// The only failure condition from here on out is integer overflow.

int idx = 1;
if (source.Length < ParserHelpers.UInt64OverflowLength)
{
// Length is less than Parsers.Int64OverflowLength; overflow is not possible
for (int index = 1; index < source.Length; index++)
// If the input span is short enough such that integer overflow isn't an issue,
// don't bother performing overflow checks. Just keep shifting in new digits
// until we see a non-digit character or until we've exhausted our input buffer.

while (true)
{
ulong nextDigit = source[index] - 48u; // '0'
if (nextDigit > 9)
{
bytesConsumed = index;
value = parsedValue;
return true;
}
parsedValue = parsedValue * 10 + nextDigit;
if ((uint)idx >= (uint)source.Length) { break; } // EOF
nuint nextChar = (uint)source[idx] - '0';
if ((uint)nextChar > 9) { break; } // not a digit
parsedValue = parsedValue * 10 + nextChar;
idx++;
}
}
else
{
// Length is greater than Parsers.Int64OverflowLength; overflow is only possible after Parsers.Int64OverflowLength
// digits. There may be no overflow after Parsers.Int64OverflowLength if there are leading zeroes.
for (int index = 1; index < ParserHelpers.Int64OverflowLength - 1; index++)
while (true)
{
ulong nextDigit = source[index] - 48u; // '0'
if (nextDigit > 9)
{
bytesConsumed = index;
value = parsedValue;
return true;
}
parsedValue = parsedValue * 10 + nextDigit;
}
for (int index = ParserHelpers.Int64OverflowLength - 1; index < source.Length; index++)
{
ulong nextDigit = source[index] - 48u; // '0'
if (nextDigit > 9)
if ((uint)idx >= (uint)source.Length) { break; } // EOF
nuint nextChar = (uint)source[idx] - '0';
if ((uint)nextChar > 9) { break; } // not a digit
idx++;

// The const below is the smallest unsigned x for which "x * 10 + 9"
// might overflow ulong.MaxValue. If the current accumulator is below
// this const, there's no risk of overflowing.

const ulong OverflowRisk = 0x1999_9999_9999_9999ul;

if (parsedValue < OverflowRisk)
{
bytesConsumed = index;
value = parsedValue;
return true;
parsedValue = parsedValue * 10 + nextChar;
continue;
}
// If parsedValue > (ulong.MaxValue / 10), any more appended digits will cause overflow.
// if parsedValue == (ulong.MaxValue / 10), any nextDigit greater than 5 implies overflow.
if (parsedValue > ulong.MaxValue / 10 || (parsedValue == ulong.MaxValue / 10 && nextDigit > 5))

// If the current accumulator is exactly equal to the const above,
// then "accumulator * 10 + 5" is the highest we can go without overflowing
// ulong.MaxValue. This also implies that if the current accumulator
// is higher than the const above, there's no hope that we'll succeed,
// so we may as well just fail now.

if (parsedValue != OverflowRisk || (uint)nextChar > 5)
{
bytesConsumed = 0;
value = default;
return false;
goto FalseExit;
}
parsedValue = parsedValue * 10 + nextDigit;

parsedValue = OverflowRisk * 10 + nextChar;
}
}

bytesConsumed = source.Length;
bytesConsumed = idx;
value = parsedValue;
return true;

FalseExit:
bytesConsumed = 0;
value = default;
return false;
}
}
}

0 comments on commit f787f38

Please sign in to comment.