Skip to content

Commit

Permalink
hermes/utf: export a couple of more functions
Browse files Browse the repository at this point in the history
Summary:
Export a couple of minor UTF-8 related function from the hermes/utf
module. Technically they may better belong in the support crate because
they are not Hermes-specific, but I didn't want to create a whole new
module for two trivial functions, plus the customer is a customer of the
hermes_parser module as well.

Reviewed By: avp

Differential Revision: D31044168

fbshipit-source-id: 099080ba2869c9df65e8f6713878b2895f950373
  • Loading branch information
tmikov authored and facebook-github-bot committed Sep 23, 2021
1 parent a4fbdda commit 13136bb
Showing 1 changed file with 19 additions and 5 deletions.
24 changes: 19 additions & 5 deletions unsupported/juno/hermes/src/utf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,25 @@ const UTF16_HIGH_SURROGATE: u32 = 0xD800;
const UTF16_LOW_SURROGATE: u32 = 0xDC00;
const UNICODE_REPLACEMENT_CHARACTER: u32 = char::REPLACEMENT_CHARACTER as u32;

/// Return whether the character is part of a UTF-8 sequence.
#[inline]
fn is_utf8_start(ch: u8) -> bool {
pub fn is_utf8(ch: u8) -> bool {
(ch & 0x80) != 0
}

/// Return true if this is a UTF-8 leading byte.
#[inline]
pub fn is_utf8_lead(ch: u8) -> bool {
(ch & 0xC0) == 0xC0
}

/// Return true if this is a UTF-8 continuation byte, or in other words, this
/// is a byte in the "middle" of a UTF-8 codepoint.
#[inline]
pub fn is_utf8_continuation(ch: u8) -> bool {
(ch & 0xC0) == 0x80
}

/// Returns whether cp is a high surrogate.
#[inline]
fn is_high_surrogate(cp: u32) -> bool {
Expand Down Expand Up @@ -91,7 +105,7 @@ fn decode_utf8<const ALLOW_SURROGATES: bool>(
from: &mut usize,
ch: u8,
) -> Result<u32, UTFError> {
if !is_utf8_start(ch) {
if !is_utf8(ch) {
*from += 1;
Ok(ch as u32)
} else {
Expand All @@ -105,7 +119,7 @@ fn decode_utf8_slow_path<const ALLOW_SURROGATES: bool>(
from: &mut usize,
ch: u32,
) -> Result<u32, UTFError> {
debug_assert!(is_utf8_start(ch as u8));
debug_assert!(is_utf8(ch as u8));
let result: u32;
let len = src.len();
if (ch & 0xE0) == 0xC0 {
Expand Down Expand Up @@ -194,7 +208,7 @@ pub fn utf8_with_surrogates_to_utf16(src: &[u8]) -> Result<Vec<u16>, UTFError> {
while from < len {
// We checked `from` already.
let b = unsafe { *src.get_unchecked(from) };
if !is_utf8_start(b) {
if !is_utf8(b) {
from += 1;
v.push(b as u16);
continue;
Expand Down Expand Up @@ -223,7 +237,7 @@ fn utf8_with_surrogates_to_string_helper(src: &[u8]) -> (String, Option<UTFError
while from < len {
// We checked `from` already.
let b = unsafe { *src.get_unchecked(from) };
if !is_utf8_start(b) {
if !is_utf8(b) {
from += 1;
str.push(b as char);
continue;
Expand Down

0 comments on commit 13136bb

Please sign in to comment.