Skip to content

Commit

Permalink
Bug 1478170 - Implement SourceUnits::peekCodePoint for UTF-8. r=arai
Browse files Browse the repository at this point in the history
--HG--
extra : rebase_source : 7d256802f430ba813588fe9535630f67a30fbd56
  • Loading branch information
jswalden committed Jul 25, 2018
1 parent c80f0a2 commit 83ae856
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 0 deletions.
22 changes: 22 additions & 0 deletions js/src/frontend/TokenStream.h
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@
#include "mozilla/Attributes.h"
#include "mozilla/Casting.h"
#include "mozilla/DebugOnly.h"
#include "mozilla/Maybe.h"
#include "mozilla/MemoryChecking.h"
#include "mozilla/PodOperations.h"
#include "mozilla/TextUtils.h"
Expand Down Expand Up @@ -1089,6 +1090,27 @@ PeekCodePoint(const char16_t* const ptr, const char16_t* const end)
return PeekedCodePoint<char16_t>(c, len);
}

inline PeekedCodePoint<mozilla::Utf8Unit>
PeekCodePoint(const mozilla::Utf8Unit* const ptr, const mozilla::Utf8Unit* const end)
{
if (MOZ_UNLIKELY(ptr >= end))
return PeekedCodePoint<mozilla::Utf8Unit>::none();

const mozilla::Utf8Unit lead = ptr[0];
if (mozilla::IsAscii(lead))
return PeekedCodePoint<mozilla::Utf8Unit>(lead.toUint8(), 1);

const mozilla::Utf8Unit* afterLead = ptr + 1;
mozilla::Maybe<char32_t> codePoint = mozilla::DecodeOneUtf8CodePoint(lead, &afterLead, end);
if (codePoint.isNothing())
return PeekedCodePoint<mozilla::Utf8Unit>::none();

auto len = mozilla::AssertedCast<uint8_t>(mozilla::PointerRangeSize(ptr, afterLead));
MOZ_ASSERT(len <= 4);

return PeekedCodePoint<mozilla::Utf8Unit>(codePoint.value(), len);
}

// This is the low-level interface to the JS source code buffer. It just gets
// raw Unicode code units -- 16-bit char16_t units of source text that are not
// (always) full code points, and 8-bit units of UTF-8 source text soon.
Expand Down
7 changes: 7 additions & 0 deletions mfbt/Utf8.h
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,13 @@ union Utf8Unit
// that other code.
};

/** Returns true iff |aUnit| is an ASCII value. */
inline bool
IsAscii(Utf8Unit aUnit)
{
return IsAscii(aUnit.toUint8());
}

/**
* Returns true if the given length-delimited memory consists of a valid UTF-8
* string, false otherwise.
Expand Down

0 comments on commit 83ae856

Please sign in to comment.