From d759695c07cb934fc605be67e4f4492a9dd4fba3 Mon Sep 17 00:00:00 2001 From: Markus Unterwaditzer Date: Mon, 17 Jan 2022 16:23:52 +0100 Subject: [PATCH] move to jetscii --- Cargo.toml | 6 +++--- README.md | 2 +- src/reader.rs | 23 ++++++----------------- 3 files changed, 10 insertions(+), 21 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index a70ea39..82caefd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,17 +21,17 @@ glob = "0.3.0" libtest-mimic = "0.3.0" [features] -# By default this crate depends on the memchr library for best performance. +# By default this crate depends on the jetscii library for best performance. # Disabling this feature will leave you with 100% safe Rust and no dependencies. # This may come in handy if you encounter packaging/build problems. -default = ["memchr"] +default = ["jetscii"] # If this feature is enabled, html5gum will expose private APIs and start # printing debug information to stdout. integration-tests = [] [dependencies] -memchr = { version = "2.4.1", optional = true } +jetscii = { version = "0.5.1", optional = true } [[bench]] name = "data_state" diff --git a/README.md b/README.md index 66645c2..5f49337 100644 --- a/README.md +++ b/README.md @@ -67,7 +67,7 @@ This allows you to: ## Other features * No unsafe Rust -* Only dependency is `memchr`, and can be disabled via crate features (see `Cargo.toml`) +* Only dependency is `jetscii`, and can be disabled via crate features (see `Cargo.toml`) ## Alternative HTML parsers diff --git a/src/reader.rs b/src/reader.rs index b24be64..6c79de8 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -41,7 +41,8 @@ pub trait Reader { /// /// The default implementation simply reads one character and calls `read_cb` with that /// character, ignoring the needle entirely. It is recommended to manually implement - /// `read_until` if there is any sort of in-memory buffer where `memchr` can be run on. + /// `read_until` if there is any sort of in-memory buffer where some sort of efficient string + /// search (see `memchr` or `jetscii` crate) can be run on. /// /// The return value is usually borrowed from underlying buffers. If that's not possible, a /// small buffer is provided as `char_buf` to put a single character into. @@ -384,20 +385,8 @@ impl<'a> Readable<'a> for File { #[inline] fn fast_find(needle: &[u8], haystack: &[u8]) -> Option { - #[cfg(feature = "memchr")] - if needle.iter().all(u8::is_ascii) { - if needle.len() == 3 { - return memchr::memchr3(needle[0], needle[1], needle[2], haystack); - } else if needle.len() == 2 { - return memchr::memchr2(needle[0], needle[1], haystack); - } else if needle.len() == 1 { - return memchr::memchr(needle[0], haystack); - } - } - - let (i, _) = haystack - .iter() - .enumerate() - .find(|(_, &b)| needle.contains(&b))?; - Some(i) + debug_assert!(needle.len() <= 16); + let mut needle_arr = [0; 16]; + needle_arr[..needle.len()].copy_from_slice(needle); + jetscii::Bytes::new(needle_arr, needle.len() as i32, |b| needle.contains(&b)).find(haystack) }