Skip to content

Commit

Permalink
Improve detection of exponential REDoS
Browse files Browse the repository at this point in the history
A bit of a major update. There used to be many false positives for
exponential REDoS hidden inside sequences, since regexploit's focus
is polynomial complexity REDoS.

This change does a lot of work which will possibly slow regexploit
down. Hopefully not too much. But gets nice results.
  • Loading branch information
b-c-ds authored and bcaller committed Jan 14, 2021
1 parent 2f7f5f8 commit 169439b
Show file tree
Hide file tree
Showing 8 changed files with 133 additions and 6 deletions.
11 changes: 10 additions & 1 deletion regexploit/ast/branch.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
from typing import Iterator, List, Optional

from regexploit.ast.char import Character
from regexploit.ast.repeat import FiniteRepeat
from regexploit.ast.repeat import FiniteRepeat, InfiniteRepeat
from regexploit.ast.sequence import Sequence


@dataclass(frozen=True)
Expand Down Expand Up @@ -47,6 +48,14 @@ def __repr__(self) -> str:
middle = " | ".join(str(b) for b in self.branches)
return f"BR( {middle} ){'?' if self.optional else ''}"

def matching_repeats(self):
for b in self.branches:
if b.starriness > 0:
if isinstance(b, InfiniteRepeat):
yield b
elif isinstance(b, Sequence):
yield from b.matching_repeats()


def make_branch(branches: List):
if len(branches) == 1:
Expand Down
9 changes: 9 additions & 0 deletions regexploit/ast/char.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,15 @@ def minimum_length(self) -> int:
def starriness(self) -> int:
return 0

def __hash__(self) -> int:
return hash(
(
self.positive,
tuple(sorted(self.literals)) if self.literals else None,
tuple(sorted(self.categories)) if self.categories else None,
)
)

def exact_character_class(self) -> "Character":
return self

Expand Down
4 changes: 1 addition & 3 deletions regexploit/ast/groupref.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,8 @@ def subpattern_to_groupref(subpattern):
if subpattern.starriness == 0:
return subpattern
if isinstance(subpattern, repeat.FiniteRepeat):
return repeat.FiniteRepeat(
return subpattern.alter_repeat(
subpattern_to_groupref(subpattern.repeat),
subpattern.minimum_repeats,
subpattern.maximum_repeats,
)
if isinstance(subpattern, repeat.InfiniteRepeat):
return repeat.FiniteRepeat(
Expand Down
10 changes: 10 additions & 0 deletions regexploit/ast/repeat.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,18 +44,28 @@ def maximal_character_class(self) -> Character:

@dataclass(frozen=True)
class InfiniteRepeat(Repeat):
forced_starriness: Optional[int] = None

@property
def starriness(self) -> int:
if self.forced_starriness is not None:
return self.forced_starriness
# a*a*a* is cubic whereas (a*)* is exponential but here we just call it 10
return 1 + self.repeat.starriness * 10

def __repr__(self) -> str:
return f"{self.repeat}{{{self.minimum_repeats}+}}"

def alter_repeat(self, repeat) -> "InfiniteRepeat":
return InfiniteRepeat(repeat, self.minimum_repeats)


@dataclass(frozen=True)
class FiniteRepeat(Repeat):
maximum_repeats: int

def __repr__(self) -> str:
return f"{self.repeat}{{{self.minimum_repeats},{self.maximum_repeats}}}"

def alter_repeat(self, repeat) -> "FiniteRepeat":
return FiniteRepeat(repeat, self.minimum_repeats, self.maximum_repeats)
54 changes: 53 additions & 1 deletion regexploit/ast/sequence.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from dataclasses import dataclass
from typing import List, Optional
from typing import List, Optional, Set

from regexploit.ast.char import Character
from regexploit.ast.repeat import InfiniteRepeat


@dataclass(frozen=True)
Expand Down Expand Up @@ -40,6 +41,7 @@ def exact_character_class(self) -> Optional[Character]:
def overall_character_class(self) -> Optional[Character]:
"""
aa*a -> a, abc -> None, [ab][abc] -> [ab]
a?b -> b, a+b -> None, \w+b*
"""
c = Character.ANY()
for e in self.elements:
Expand All @@ -48,6 +50,56 @@ def overall_character_class(self) -> Optional[Character]:
return None
return c

def matching_repeats(self):
"""Complicated way to get the possible character classes for a sequence"""
c = Character.ANY()
has_mandatory = False
optionals = []
starriness = 0
minimum_length = 0
is_infinite = 0
for e in self.elements:
if e.minimum_length:
c &= e.overall_character_class()
if not c:
return None
has_mandatory = True
starriness += e.starriness
minimum_length += e.minimum_length
elif e.starriness > 0:
optionals.append(e)
possibilities = {c: starriness} if has_mandatory else {}
for e in optionals:
if new_c := e.overall_character_class() & c:
if new_c in possibilities:
possibilities[new_c] += e.starriness
else:
possibilities[new_c] = e.starriness

if len(possibilities) > 1:
# (a*[ab]*a*[bc]*[bcd]*.+a*)*@ has classes {.: 1, [a]: 5, [[a-b]]: 2, [[b-c]]: 3, [[b-d]]: 2, [b]: 3}
# This could blow up!
poss_chars = list(possibilities.items())
merged_chars = {}
while poss_chars:
c_a, s_a = poss_chars.pop()
for c_b, s_b in poss_chars:
if (merged := c_a & c_b) is not None:
if merged == c_a:
possibilities[c_a] += s_b
elif merged == c_b:
possibilities[c_b] += s_a
else:
if merged not in merged_chars:
merged_chars[merged] = set()
merged_chars[merged] |= {(c_a, s_a), (c_b, s_b)}
for merged, set_of_chars in merged_chars.items():
possibilities[merged] = sum(s for _, s in set_of_chars)

for cc, s in possibilities.items():
if s:
yield InfiniteRepeat(cc, minimum_length, forced_starriness=s)

def maximal_character_class(self) -> Character:
"""
Only useful when this Sequence is inside a Repeat
Expand Down
16 changes: 15 additions & 1 deletion regexploit/redos.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from regexploit.ast.at import EndOfString
from regexploit.ast.branch import Branch
from regexploit.ast.char import Character
from regexploit.ast.repeat import InfiniteRepeat
from regexploit.ast.repeat import InfiniteRepeat, Repeat
from regexploit.ast.sequence import Sequence


Expand Down Expand Up @@ -78,12 +78,25 @@ def expand_branches(seq: Sequence) -> Iterator[Sequence]:
for tail in expand_branches(Sequence(seq.elements[i + 1 :])):
yield Sequence(head_plus_branch + tail.elements)
return # All processing in yields
elif isinstance(elem, Repeat) and elem.starriness > 10:
logging.debug("Exponential: %s", elem)
if isinstance(elem.repeat, (Sequence, Branch)):
for tail in expand_branches(Sequence(seq.elements[i + 1 :])):
yield Sequence(head + [elem] + tail.elements)
for pseudo_repeat in elem.repeat.matching_repeats():
logging.debug("Pseudo repeat %s", pseudo_repeat)
yield Sequence(
head + [elem.alter_repeat(pseudo_repeat)] + tail.elements
)
else:
head.append(elem)
else:
head.append(elem)
yield Sequence(head)


def find_redos(sequence_with_branches) -> Iterator[Redos]:
logging.debug(sequence_with_branches)
if not isinstance(
sequence_with_branches, Sequence
): # singleton like Branch (ab|cd)
Expand All @@ -93,6 +106,7 @@ def find_redos(sequence_with_branches) -> Iterator[Redos]:


def find_redos_in_branchless_sequence(seq: Sequence) -> Iterator[Redos]:
logging.debug(seq)
for i, elem in enumerate(seq.elements):
# TODO branches
if isinstance(elem, InfiniteRepeat) and (c := elem.overall_character_class()):
Expand Down
24 changes: 24 additions & 0 deletions tests/test_redos.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,3 +210,27 @@ def test_negative_lookahead():
r = rs[0]
assert r.starriness == 4
assert r.repeated_character == from_regex(r"a")


@pytest.mark.parametrize(
"r",
[
r"(a?b+)+c",
r"(x*[ab]*x?[bc]*x?)*c",
r"(x?[ab]+x?[bc]+\w*x?)*c",
],
)
def test_regexlib_sequence_exponential(r):
rs = find_redos(r)
r = rs[0]
assert r.starriness > 10
assert r.repeated_character == from_regex(r"b")
assert r.killer is None


def test_dt_branch_exponential():
rs = find_redos(r"a(z|\w*b)*d")
r = rs[0]
assert r.starriness == 11
assert r.repeated_character == from_regex(r"b")
assert r.killer is None
11 changes: 11 additions & 0 deletions tests/test_repeat.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,17 @@ def test_character_class():
assert r.exact_character_class() == from_regex(r"a")


def test_subsequence_character_class():
r = from_regex(r"(a?b+)*")
assert r.starriness == 11
assert r.minimum_length == 0
assert r.exact_character_class() is None
assert r.overall_character_class() is None
inner_repeats = list(r.repeat.matching_repeats())
assert len(inner_repeats) == 1
assert inner_repeats[0].overall_character_class() == from_regex(r"b")


def test_negative_lookahead_infinite():
r = SreOpParser().parse_sre(r"(?!b)[a-d]+")
assert r == SreOpParser().parse_sre(r"[acd][a-d]*")
Expand Down

0 comments on commit 169439b

Please sign in to comment.