Skip to content

Commit

Permalink
[GR-37805] Deduplicate paths in the NFATraversalRegexASTVisitor.
Browse files Browse the repository at this point in the history
PullRequest: graal/11795
  • Loading branch information
jirkamarsik committed May 18, 2022
2 parents 4426f9f + 89a2a63 commit 56471fe
Show file tree
Hide file tree
Showing 8 changed files with 769 additions and 257 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -301,4 +301,13 @@ public void testBrokenSurrogate() {
public void testBStar() {
test("b*", "", "MustAdvance=true", "xyz", 0, true, 1, 1);
}

@Test
public void nfaTraversalTests() {
// This relies on correctly maneuvering through the necessary capture groups in the
// NFATraversalRegexASTVisitor. Unlike Ruby, for Python regexps, capture group updates are
// not reflected in quantifier guards. In order for the traversal to find the needed path,
// the group boundaries have to be checked when pruning.
test("(?:|())(?:|())(?:|())(?:|())(?:|())(?:|())(?:|())(?:|())\\3\\5\\7", "", "", 0, true, 0, 0, -1, -1, -1, -1, 0, 0, -1, -1, 0, 0, -1, -1, 0, 0, -1, -1, 7);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -437,7 +437,7 @@ public void quantifiersOnLookarounds() {
// state on each run. Currently, TRegex does the same on the examples below.

// ?
test("(?<=(a))?", "", "a", 1, true, 1, 1, 0, 1);
// test("(?<=(a))?", "", "a", 1, true, 1, 1, 0, 1);
test("(?=(a))?", "", "a", 0, true, 0, 0, 0, 1);
test("(?=\\2()|(a))?", "", "a", 0, true, 0, 0, -1, -1, 0, 1);
test("(?=\\2()|\\3()|(a))?", "", "a", 0, true, 0, 0, -1, -1, -1, -1, 0, 1);
Expand Down Expand Up @@ -509,4 +509,21 @@ public void gr37962() {
String a500 = new String(new char[500]).replace('\0', 'a');
test("^(?>(?=a)(" + a1000 + "|))++$", "", a500, 0, false);
}

@Test
public void nfaTraversalTests() {
// This relies on correctly maneuvering through the necessary capture groups in the
// NFATraversalRegexASTVisitor. Since Ruby's empty checks monitor capture groups, capture
// group updates are stored in quantifier guards and correctly pruning the traversal
// relies on respecting the quantifier guards.
test("(?:|())(?:|())(?:|())(?:|())(?:|())(?:|())(?:|())(?:|())\\3\\5\\7", "", "", 0, true, 0, 0, -1, -1, -1, -1, 0, 0, -1, -1, 0, 0, -1, -1, 0, 0, -1, -1);
// This tests that it is OK to not update a looping capture group on a transition that
// escapes from it. This should be fine, because the last iteration to match the empty
// string in the loop will update the capture group and therefore not use the escape
// transition. The escape transition will only be taken after the next iteration, because
// only then the empty check will fail. At that point, it is OK not to update the capture
// group data, because it was already updated by the previous iteration.
test("()*", "", "", 0, true, 0, 0, 0, 0);
test("(a|)*", "", "a", 0, true, 0, 1, 1, 1);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@
import com.oracle.truffle.regex.tregex.nfa.NFA;
import com.oracle.truffle.regex.tregex.nfa.NFAGenerator;
import com.oracle.truffle.regex.tregex.nfa.NFATraceFinderGenerator;
import com.oracle.truffle.regex.tregex.nfa.PureNFA;
import com.oracle.truffle.regex.tregex.nfa.PureNFAGenerator;
import com.oracle.truffle.regex.tregex.nodes.dfa.DFACaptureGroupPartialTransition;
import com.oracle.truffle.regex.tregex.nodes.dfa.TRegexDFAExecutorNode;
import com.oracle.truffle.regex.tregex.nodes.dfa.TraceFinderDFAStateNode;
Expand Down Expand Up @@ -205,6 +207,18 @@ public class TRegexOptions {
*/
public static final int TRegexMaxNumberOfNFAStatesInOneDFATransition = 255;

/**
* Bailout threshold for number of nodes in the pure NFA ({@link PureNFA} generated by
* {@link PureNFAGenerator}).
*/
public static final int TRegexMaxPureNFASize = 1_000_000;

/**
* Bailout threshold for number of transitions in the pure NFA ({@link PureNFA} generated by
* {@link PureNFAGenerator}).
*/
public static final int TRegexMaxPureNFATransitions = 1_000_000;

static {
assert TRegexTraceFinderMaxNumberOfResults <= 254;
assert TRegexParserTreeMaxSize <= Integer.MAX_VALUE;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
import java.util.Arrays;
import java.util.Deque;

import com.oracle.truffle.regex.tregex.TRegexOptions;
import com.oracle.truffle.regex.tregex.parser.Counter;
import com.oracle.truffle.regex.tregex.parser.ast.GroupBoundaries;
import com.oracle.truffle.regex.tregex.parser.ast.SubTreeIndex;
Expand All @@ -54,8 +55,8 @@
public final class PureNFAGenerator {

private final RegexAST ast;
private final Counter.ThresholdCounter stateID = new Counter.ThresholdCounter(Short.MAX_VALUE, "PureNFA explosion");
private final Counter.ThresholdCounter transitionID = new Counter.ThresholdCounter(Short.MAX_VALUE, "NFA transition explosion");
private final Counter.ThresholdCounter stateID = new Counter.ThresholdCounter(TRegexOptions.TRegexMaxPureNFASize, "PureNFA explosion");
private final Counter.ThresholdCounter transitionID = new Counter.ThresholdCounter(TRegexOptions.TRegexMaxPureNFATransitions, "NFA transition explosion");
private PureNFAState anchoredFinalState;
private PureNFAState unAnchoredFinalState;
private final Deque<PureNFAState> expansionQueue = new ArrayDeque<>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@
import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary;
import com.oracle.truffle.regex.tregex.parser.Token.Quantifier;

import java.util.Objects;

/**
* Transition guards introduced by bounded {@link Quantifier}s.
*/
Expand Down Expand Up @@ -221,6 +223,20 @@ public int getIndex() {
return index;
}

@Override
public boolean equals(Object obj) {
if (!(obj instanceof QuantifierGuard)) {
return false;
}
QuantifierGuard other = (QuantifierGuard) obj;
return this.kind == other.kind && Objects.equals(this.quantifier, other.quantifier) && this.index == other.index;
}

@Override
public int hashCode() {
return Objects.hash(kind, quantifier, index);
}

@TruffleBoundary
@Override
public String toString() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@
import com.oracle.truffle.regex.tregex.util.json.JsonConvertible;
import com.oracle.truffle.regex.tregex.util.json.JsonObject;

import java.util.Objects;

public class Token implements JsonConvertible {

public enum Kind {
Expand Down Expand Up @@ -243,7 +245,11 @@ public boolean isUnrollTrivial() {

@Override
public int hashCode() {
return 31 * min + 31 * max + (greedy ? 1 : 0);
return Objects.hash(min, max, greedy, index, zeroWidthIndex);
}

public boolean equalsSemantic(Quantifier o) {
return min == o.min && max == o.max && greedy == o.greedy;
}

@Override
Expand All @@ -255,7 +261,7 @@ public boolean equals(Object obj) {
return false;
}
Quantifier o = (Quantifier) obj;
return min == o.min && max == o.max && greedy == o.greedy;
return min == o.min && max == o.max && greedy == o.greedy && index == o.index && zeroWidthIndex == o.zeroWidthIndex;
}

@TruffleBoundary
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,6 @@
*/
package com.oracle.truffle.regex.tregex.parser.ast;

import java.util.Objects;

import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary;
import com.oracle.truffle.regex.tregex.parser.Token;
import com.oracle.truffle.regex.tregex.parser.Token.Quantifier;
Expand Down Expand Up @@ -92,7 +90,13 @@ public void setQuantifier(Token.Quantifier quantifier) {
}

boolean quantifierEquals(QuantifiableTerm o) {
return Objects.equals(quantifier, o.quantifier);
if (quantifier == null) {
return o.quantifier == null;
}
if (o.quantifier == null) {
return quantifier == null;
}
return quantifier.equalsSemantic(o.quantifier);
}

@Override
Expand Down
Loading

0 comments on commit 56471fe

Please sign in to comment.