Skip to content

Commit

Permalink
perf(codegen): check last char with byte methods (oxc-project#6509)
Browse files Browse the repository at this point in the history
When checking what last char in buffer is, avoid calculating a `char` when only comparing to an ASCII char (byte) anyway.
  • Loading branch information
overlookmotel committed Oct 14, 2024
1 parent 18b68ff commit 77f3a1a
Show file tree
Hide file tree
Showing 4 changed files with 106 additions and 22 deletions.
84 changes: 75 additions & 9 deletions crates/oxc_codegen/src/code_buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,17 +125,55 @@ impl CodeBuffer {
/// let mut code = CodeBuffer::new();
/// code.print_str("foo");
///
/// assert_eq!(code.peek_nth_back(0), Some('o'));
/// assert_eq!(code.peek_nth_back(2), Some('f'));
/// assert_eq!(code.peek_nth_back(3), None);
/// assert_eq!(code.peek_nth_char_back(0), Some('o'));
/// assert_eq!(code.peek_nth_char_back(2), Some('f'));
/// assert_eq!(code.peek_nth_char_back(3), None);
/// ```
#[inline]
#[must_use = "Peeking is pointless if the peeked char isn't used"]
pub fn peek_nth_back(&self, n: usize) -> Option<char> {
pub fn peek_nth_char_back(&self, n: usize) -> Option<char> {
// SAFETY: All methods of `CodeBuffer` ensure `buf` is valid UTF-8
unsafe { std::str::from_utf8_unchecked(&self.buf) }.chars().nth_back(n)
}

/// Peek the `n`th byte from the end of the buffer.
///
/// When `n` is zero, the last byte is returned.
/// Returns [`None`] if `n` exceeds the length of the buffer.
///
/// # Example
/// ```
/// use oxc_codegen::CodeBuffer;
/// let mut code = CodeBuffer::new();
/// code.print_str("foo");
///
/// assert_eq!(code.peek_nth_byte_back(0), Some(b'o'));
/// assert_eq!(code.peek_nth_byte_back(2), Some(b'f'));
/// assert_eq!(code.peek_nth_byte_back(3), None);
/// ```
#[inline]
#[must_use = "Peeking is pointless if the peeked char isn't used"]
pub fn peek_nth_byte_back(&self, n: usize) -> Option<u8> {
let len = self.len();
if n < len {
Some(self.buf[len - 1 - n])
} else {
None
}
}

/// Peek the last byte from the end of the buffer.
#[inline]
pub fn last_byte(&self) -> Option<u8> {
self.buf.last().copied()
}

/// Peek the last char from the end of the buffer.
#[inline]
pub fn last_char(&self) -> Option<char> {
self.peek_nth_char_back(0)
}

/// Push a single ASCII byte into the buffer.
///
/// # Panics
Expand Down Expand Up @@ -460,12 +498,40 @@ mod test {
}

#[test]
fn peek_nth_back() {
fn peek_nth_char_back() {
let mut code = CodeBuffer::new();
code.print_str("bar");

assert_eq!(code.peek_nth_char_back(0), Some('r'));
assert_eq!(code.peek_nth_char_back(1), Some('a'));
assert_eq!(code.peek_nth_char_back(2), Some('b'));
assert_eq!(code.peek_nth_char_back(3), None);
}

#[test]
fn peek_nth_byte_back() {
let mut code = CodeBuffer::new();
code.print_str("foo");
code.print_str("bar");

assert_eq!(code.peek_nth_back(0), Some('o'));
assert_eq!(code.peek_nth_back(2), Some('f'));
assert_eq!(code.peek_nth_back(3), None);
assert_eq!(code.peek_nth_byte_back(0), Some(b'r'));
assert_eq!(code.peek_nth_byte_back(1), Some(b'a'));
assert_eq!(code.peek_nth_byte_back(2), Some(b'b'));
assert_eq!(code.peek_nth_byte_back(3), None);
}

#[test]
fn last_byte() {
let mut code = CodeBuffer::new();
assert_eq!(code.last_byte(), None);
code.print_str("bar");
assert_eq!(code.last_byte(), Some(b'r'));
}

#[test]
fn last_char() {
let mut code = CodeBuffer::new();
assert_eq!(code.last_char(), None);
code.print_str("bar");
assert_eq!(code.last_char(), Some('r'));
}
}
2 changes: 1 addition & 1 deletion crates/oxc_codegen/src/comment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ impl<'a> Codegen<'a> {

if comments.first().is_some_and(|c| c.preceded_by_newline) {
// Skip printing newline if this comment is already on a newline.
if self.peek_nth_back(0).is_some_and(|c| c != '\n' && c != '\t') {
if self.last_byte().is_some_and(|b| b != b'\n' && b != b'\t') {
self.print_hard_newline();
self.print_indent();
}
Expand Down
6 changes: 3 additions & 3 deletions crates/oxc_codegen/src/gen.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1194,11 +1194,11 @@ impl<'a> Gen for BigIntLiteral<'a> {
impl<'a> Gen for RegExpLiteral<'a> {
fn gen(&self, p: &mut Codegen, _ctx: Context) {
p.add_source_mapping(self.span.start);
let last = p.peek_nth_back(0);
let last = p.last_byte();
let pattern_text = self.regex.pattern.source_text(p.source_text);
// Avoid forming a single-line comment or "</script" sequence
if Some('/') == last
|| (Some('<') == last && pattern_text.cow_to_lowercase().starts_with("script"))
if last == Some(b'/')
|| (last == Some(b'<') && pattern_text.cow_to_lowercase().starts_with("script"))
{
p.print_hard_space();
}
Expand Down
36 changes: 27 additions & 9 deletions crates/oxc_codegen/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use oxc_ast::ast::{
use oxc_mangler::Mangler;
use oxc_span::{GetSpan, Span};
use oxc_syntax::{
identifier::is_identifier_part,
identifier::{is_identifier_part, is_identifier_part_ascii},
operator::{BinaryOperator, UnaryOperator, UpdateOperator},
precedence::Precedence,
};
Expand Down Expand Up @@ -292,17 +292,31 @@ impl<'a> Codegen<'a> {

#[inline]
fn print_space_before_identifier(&mut self) {
if self
.peek_nth_back(0)
.is_some_and(|ch| is_identifier_part(ch) || self.prev_reg_exp_end == self.code.len())
{
self.print_hard_space();
let Some(byte) = self.last_byte() else { return };

if self.prev_reg_exp_end != self.code.len() {
let is_identifier = if byte.is_ascii() {
// Fast path for ASCII (very common case)
is_identifier_part_ascii(byte as char)
} else {
is_identifier_part(self.last_char().unwrap())
};
if !is_identifier {
return;
}
}

self.print_hard_space();
}

#[inline]
fn last_byte(&self) -> Option<u8> {
self.code.last_byte()
}

#[inline]
fn peek_nth_back(&self, n: usize) -> Option<char> {
self.code.peek_nth_back(n)
fn last_char(&self) -> Option<char> {
self.code.last_char()
}

#[inline]
Expand Down Expand Up @@ -533,7 +547,11 @@ impl<'a> Codegen<'a> {
|| ((prev == bin_op_sub || prev == un_op_neg)
&& (next == bin_op_sub || next == un_op_neg || next == un_op_pre_dec))
|| (prev == un_op_post_dec && next == bin_op_gt)
|| (prev == un_op_not && next == un_op_pre_dec && self.peek_nth_back(1) == Some('<'))
|| (prev == un_op_not
&& next == un_op_pre_dec
// `prev == UnaryOperator::LogicalNot` which means last byte is ASCII,
// and therefore previous character is 1 byte from end of buffer
&& self.code.peek_nth_byte_back(1) == Some(b'<'))
{
self.print_hard_space();
}
Expand Down

0 comments on commit 77f3a1a

Please sign in to comment.