Skip to content

Commit 0ead128

Browse files
committed
syntax: make Ast the size of a pointer
This puts every Ast value behind a box to conserve space. It makes things like Vec<Ast> quite a bit smaller than what they would be otherwise, which is especially beneficial for the representation of concatenations and alternations. This doesn't quite solve the memory usage problems though, since an AstKind is still quite big (over 200 bytes). The next step will be boxing each of the variants of an AstKind which should hopefully resolve the issue. Ref #1090
1 parent b8c2066 commit 0ead128

File tree

5 files changed

+332
-272
lines changed

5 files changed

+332
-272
lines changed

regex-syntax/src/ast/mod.rs

+120-60
Original file line numberDiff line numberDiff line change
@@ -429,9 +429,19 @@ pub struct Comment {
429429
///
430430
/// This type defines its own destructor that uses constant stack space and
431431
/// heap space proportional to the size of the `Ast`.
432+
///
433+
/// This type boxes the actual kind of the AST element so that an `Ast` value
434+
/// itself has a very small size. This in turn makes things like `Vec<Ast>` use
435+
/// a lot less memory than it might otherwise, which is particularly beneficial
436+
/// for representing long concatenations or alternations.
437+
#[derive(Clone, Debug, Eq, PartialEq)]
438+
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
439+
pub struct Ast(pub Box<AstKind>);
440+
441+
/// The kind of an abstract syntax element.
432442
#[derive(Clone, Debug, Eq, PartialEq)]
433443
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
434-
pub enum Ast {
444+
pub enum AstKind {
435445
/// An empty regex that matches everything.
436446
Empty(Span),
437447
/// A set of flags, e.g., `(?is)`.
@@ -456,44 +466,94 @@ pub enum Ast {
456466
}
457467

458468
impl Ast {
469+
/// Create an "empty" AST item.
470+
pub fn empty(span: Span) -> Ast {
471+
Ast(Box::new(AstKind::Empty(span)))
472+
}
473+
474+
/// Create a "flags" AST item.
475+
pub fn flags(e: SetFlags) -> Ast {
476+
Ast(Box::new(AstKind::Flags(e)))
477+
}
478+
479+
/// Create a "literal" AST item.
480+
pub fn literal(e: Literal) -> Ast {
481+
Ast(Box::new(AstKind::Literal(e)))
482+
}
483+
484+
/// Create a "dot" AST item.
485+
pub fn dot(span: Span) -> Ast {
486+
Ast(Box::new(AstKind::Dot(span)))
487+
}
488+
489+
/// Create a "assertion" AST item.
490+
pub fn assertion(e: Assertion) -> Ast {
491+
Ast(Box::new(AstKind::Assertion(e)))
492+
}
493+
494+
/// Create a "class" AST item.
495+
pub fn class(e: Class) -> Ast {
496+
Ast(Box::new(AstKind::Class(e)))
497+
}
498+
499+
/// Create a "repetition" AST item.
500+
pub fn repetition(e: Repetition) -> Ast {
501+
Ast(Box::new(AstKind::Repetition(e)))
502+
}
503+
504+
/// Create a "group" AST item.
505+
pub fn group(e: Group) -> Ast {
506+
Ast(Box::new(AstKind::Group(e)))
507+
}
508+
509+
/// Create a "alternation" AST item.
510+
pub fn alternation(e: Alternation) -> Ast {
511+
Ast(Box::new(AstKind::Alternation(e)))
512+
}
513+
514+
/// Create a "concat" AST item.
515+
pub fn concat(e: Concat) -> Ast {
516+
Ast(Box::new(AstKind::Concat(e)))
517+
}
518+
459519
/// Return the span of this abstract syntax tree.
460520
pub fn span(&self) -> &Span {
461-
match *self {
462-
Ast::Empty(ref span) => span,
463-
Ast::Flags(ref x) => &x.span,
464-
Ast::Literal(ref x) => &x.span,
465-
Ast::Dot(ref span) => span,
466-
Ast::Assertion(ref x) => &x.span,
467-
Ast::Class(ref x) => x.span(),
468-
Ast::Repetition(ref x) => &x.span,
469-
Ast::Group(ref x) => &x.span,
470-
Ast::Alternation(ref x) => &x.span,
471-
Ast::Concat(ref x) => &x.span,
521+
match *self.0 {
522+
AstKind::Empty(ref span) => span,
523+
AstKind::Flags(ref x) => &x.span,
524+
AstKind::Literal(ref x) => &x.span,
525+
AstKind::Dot(ref span) => span,
526+
AstKind::Assertion(ref x) => &x.span,
527+
AstKind::Class(ref x) => x.span(),
528+
AstKind::Repetition(ref x) => &x.span,
529+
AstKind::Group(ref x) => &x.span,
530+
AstKind::Alternation(ref x) => &x.span,
531+
AstKind::Concat(ref x) => &x.span,
472532
}
473533
}
474534

475535
/// Return true if and only if this Ast is empty.
476536
pub fn is_empty(&self) -> bool {
477-
match *self {
478-
Ast::Empty(_) => true,
537+
match *self.0 {
538+
AstKind::Empty(_) => true,
479539
_ => false,
480540
}
481541
}
482542

483543
/// Returns true if and only if this AST has any (including possibly empty)
484544
/// subexpressions.
485545
fn has_subexprs(&self) -> bool {
486-
match *self {
487-
Ast::Empty(_)
488-
| Ast::Flags(_)
489-
| Ast::Literal(_)
490-
| Ast::Dot(_)
491-
| Ast::Assertion(_) => false,
492-
Ast::Class(_)
493-
| Ast::Repetition(_)
494-
| Ast::Group(_)
495-
| Ast::Alternation(_)
496-
| Ast::Concat(_) => true,
546+
match *self.0 {
547+
AstKind::Empty(_)
548+
| AstKind::Flags(_)
549+
| AstKind::Literal(_)
550+
| AstKind::Dot(_)
551+
| AstKind::Assertion(_) => false,
552+
AstKind::Class(_)
553+
| AstKind::Repetition(_)
554+
| AstKind::Group(_)
555+
| AstKind::Alternation(_)
556+
| AstKind::Concat(_) => true,
497557
}
498558
}
499559
}
@@ -526,14 +586,14 @@ pub struct Alternation {
526586
impl Alternation {
527587
/// Return this alternation as an AST.
528588
///
529-
/// If this alternation contains zero ASTs, then Ast::Empty is
530-
/// returned. If this alternation contains exactly 1 AST, then the
531-
/// corresponding AST is returned. Otherwise, Ast::Alternation is returned.
589+
/// If this alternation contains zero ASTs, then `Ast::empty` is returned.
590+
/// If this alternation contains exactly 1 AST, then the corresponding AST
591+
/// is returned. Otherwise, `Ast::alternation` is returned.
532592
pub fn into_ast(mut self) -> Ast {
533593
match self.asts.len() {
534-
0 => Ast::Empty(self.span),
594+
0 => Ast::empty(self.span),
535595
1 => self.asts.pop().unwrap(),
536-
_ => Ast::Alternation(self),
596+
_ => Ast::alternation(self),
537597
}
538598
}
539599
}
@@ -551,14 +611,14 @@ pub struct Concat {
551611
impl Concat {
552612
/// Return this concatenation as an AST.
553613
///
554-
/// If this concatenation contains zero ASTs, then Ast::Empty is
555-
/// returned. If this concatenation contains exactly 1 AST, then the
556-
/// corresponding AST is returned. Otherwise, Ast::Concat is returned.
614+
/// If this alternation contains zero ASTs, then `Ast::empty` is returned.
615+
/// If this alternation contains exactly 1 AST, then the corresponding AST
616+
/// is returned. Otherwise, `Ast::concat` is returned.
557617
pub fn into_ast(mut self) -> Ast {
558618
match self.asts.len() {
559-
0 => Ast::Empty(self.span),
619+
0 => Ast::empty(self.span),
560620
1 => self.asts.pop().unwrap(),
561-
_ => Ast::Concat(self),
621+
_ => Ast::concat(self),
562622
}
563623
}
564624
}
@@ -1544,43 +1604,43 @@ impl Drop for Ast {
15441604
fn drop(&mut self) {
15451605
use core::mem;
15461606

1547-
match *self {
1548-
Ast::Empty(_)
1549-
| Ast::Flags(_)
1550-
| Ast::Literal(_)
1551-
| Ast::Dot(_)
1552-
| Ast::Assertion(_)
1607+
match *self.0 {
1608+
AstKind::Empty(_)
1609+
| AstKind::Flags(_)
1610+
| AstKind::Literal(_)
1611+
| AstKind::Dot(_)
1612+
| AstKind::Assertion(_)
15531613
// Classes are recursive, so they get their own Drop impl.
1554-
| Ast::Class(_) => return,
1555-
Ast::Repetition(ref x) if !x.ast.has_subexprs() => return,
1556-
Ast::Group(ref x) if !x.ast.has_subexprs() => return,
1557-
Ast::Alternation(ref x) if x.asts.is_empty() => return,
1558-
Ast::Concat(ref x) if x.asts.is_empty() => return,
1614+
| AstKind::Class(_) => return,
1615+
AstKind::Repetition(ref x) if !x.ast.has_subexprs() => return,
1616+
AstKind::Group(ref x) if !x.ast.has_subexprs() => return,
1617+
AstKind::Alternation(ref x) if x.asts.is_empty() => return,
1618+
AstKind::Concat(ref x) if x.asts.is_empty() => return,
15591619
_ => {}
15601620
}
15611621

15621622
let empty_span = || Span::splat(Position::new(0, 0, 0));
1563-
let empty_ast = || Ast::Empty(empty_span());
1623+
let empty_ast = || Ast::empty(empty_span());
15641624
let mut stack = vec![mem::replace(self, empty_ast())];
15651625
while let Some(mut ast) = stack.pop() {
1566-
match ast {
1567-
Ast::Empty(_)
1568-
| Ast::Flags(_)
1569-
| Ast::Literal(_)
1570-
| Ast::Dot(_)
1571-
| Ast::Assertion(_)
1626+
match *ast.0 {
1627+
AstKind::Empty(_)
1628+
| AstKind::Flags(_)
1629+
| AstKind::Literal(_)
1630+
| AstKind::Dot(_)
1631+
| AstKind::Assertion(_)
15721632
// Classes are recursive, so they get their own Drop impl.
1573-
| Ast::Class(_) => {}
1574-
Ast::Repetition(ref mut x) => {
1633+
| AstKind::Class(_) => {}
1634+
AstKind::Repetition(ref mut x) => {
15751635
stack.push(mem::replace(&mut x.ast, empty_ast()));
15761636
}
1577-
Ast::Group(ref mut x) => {
1637+
AstKind::Group(ref mut x) => {
15781638
stack.push(mem::replace(&mut x.ast, empty_ast()));
15791639
}
1580-
Ast::Alternation(ref mut x) => {
1640+
AstKind::Alternation(ref mut x) => {
15811641
stack.extend(x.asts.drain(..));
15821642
}
1583-
Ast::Concat(ref mut x) => {
1643+
AstKind::Concat(ref mut x) => {
15841644
stack.extend(x.asts.drain(..));
15851645
}
15861646
}
@@ -1663,9 +1723,9 @@ mod tests {
16631723

16641724
let run = || {
16651725
let span = || Span::splat(Position::new(0, 0, 0));
1666-
let mut ast = Ast::Empty(span());
1726+
let mut ast = Ast::empty(span());
16671727
for i in 0..200 {
1668-
ast = Ast::Group(Group {
1728+
ast = Ast::group(Group {
16691729
span: span(),
16701730
kind: GroupKind::CaptureIndex(i),
16711731
ast: Box::new(ast),

0 commit comments

Comments
 (0)