hermes/utf: export a couple of more functions

Summary: Export a couple of minor UTF-8 related function from the hermes/utf module. Technically they may better belong in the support crate because they are not Hermes-specific, but I didn't want to create a whole new module for two trivial functions, plus the customer is a customer of the hermes_parser module as well. Reviewed By: avp Differential Revision: D31044168 fbshipit-source-id: 099080ba2869c9df65e8f6713878b2895f950373
meplay · Sep 23, 2021 · 13136bb · 13136bb
1 parent a4fbdda
commit 13136bb
Showing 1 changed file with 19 additions and 5 deletions.
diff --git a/unsupported/juno/hermes/src/utf.rs b/unsupported/juno/hermes/src/utf.rs
@@ -58,11 +58,25 @@ const UTF16_HIGH_SURROGATE: u32 = 0xD800;
 const UTF16_LOW_SURROGATE: u32 = 0xDC00;
 const UNICODE_REPLACEMENT_CHARACTER: u32 = char::REPLACEMENT_CHARACTER as u32;
 
+/// Return whether the character is part of a UTF-8 sequence.
 #[inline]
-fn is_utf8_start(ch: u8) -> bool {
+pub fn is_utf8(ch: u8) -> bool {
     (ch & 0x80) != 0
 }
 
+/// Return true if this is a UTF-8 leading byte.
+#[inline]
+pub fn is_utf8_lead(ch: u8) -> bool {
+    (ch & 0xC0) == 0xC0
+}
+
+/// Return true if this is a UTF-8 continuation byte, or in other words, this
+/// is a byte in the "middle" of a UTF-8 codepoint.
+#[inline]
+pub fn is_utf8_continuation(ch: u8) -> bool {
+    (ch & 0xC0) == 0x80
+}
+
 /// Returns whether cp is a high surrogate.
 #[inline]
 fn is_high_surrogate(cp: u32) -> bool {
@@ -91,7 +105,7 @@ fn decode_utf8<const ALLOW_SURROGATES: bool>(
     from: &mut usize,
     ch: u8,
 ) -> Result<u32, UTFError> {
-    if !is_utf8_start(ch) {
+    if !is_utf8(ch) {
         *from += 1;
         Ok(ch as u32)
     } else {
@@ -105,7 +119,7 @@ fn decode_utf8_slow_path<const ALLOW_SURROGATES: bool>(
     from: &mut usize,
     ch: u32,
 ) -> Result<u32, UTFError> {
-    debug_assert!(is_utf8_start(ch as u8));
+    debug_assert!(is_utf8(ch as u8));
     let result: u32;
     let len = src.len();
     if (ch & 0xE0) == 0xC0 {
@@ -194,7 +208,7 @@ pub fn utf8_with_surrogates_to_utf16(src: &[u8]) -> Result<Vec<u16>, UTFError> {
     while from < len {
         // We checked `from` already.
         let b = unsafe { *src.get_unchecked(from) };
-        if !is_utf8_start(b) {
+        if !is_utf8(b) {
             from += 1;
             v.push(b as u16);
             continue;
@@ -223,7 +237,7 @@ fn utf8_with_surrogates_to_string_helper(src: &[u8]) -> (String, Option<UTFError
     while from < len {
         // We checked `from` already.
         let b = unsafe { *src.get_unchecked(from) };
-        if !is_utf8_start(b) {
+        if !is_utf8(b) {
             from += 1;
             str.push(b as char);
             continue;