Skip to content

Commit

Permalink
Avoid catastrophic backtracking in hr regex
Browse files Browse the repository at this point in the history
  • Loading branch information
waylan committed Oct 24, 2020
1 parent 897c854 commit 18b17e1
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 5 deletions.
1 change: 1 addition & 0 deletions docs/change_log/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ Under development: version 3.3.3 (a bug-fix release).

* Unify all block-level tags (#1047).
* Fix issue where some empty elements would have text rendered as `None` when using `md_in_html` (#1049).
* Avoid catastrophic backtracking in `hr` regex (#1055).

Oct 19, 2020: version 3.3.2 (a bug-fix release).

Expand Down
9 changes: 4 additions & 5 deletions markdown/blockprocessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -496,16 +496,15 @@ def run(self, parent, blocks):
class HRProcessor(BlockProcessor):
""" Process Horizontal Rules. """

RE = r'^[ ]{0,3}((-+[ ]{0,2}){3,}|(_+[ ]{0,2}){3,}|(\*+[ ]{0,2}){3,})[ ]*$'
# Python's re module doesn't officially support atomic grouping. However you can fake it.
# See https://stackoverflow.com/a/13577411/866026
RE = r'^[ ]{0,3}(?=(?P<atomicgroup>(-+[ ]{0,2}){3,}|(_+[ ]{0,2}){3,}|(\*+[ ]{0,2}){3,}))(?P=atomicgroup)[ ]*$'
# Detect hr on any line of a block.
SEARCH_RE = re.compile(RE, re.MULTILINE)

def test(self, parent, block):
m = self.SEARCH_RE.search(block)
# No atomic grouping in python so we simulate it here for performance.
# The regex only matches what would be in the atomic group - the HR.
# Then check if we are at end of block or if next char is a newline.
if m and (m.end() == len(block) or block[m.end()] == '\n'):
if m:
# Save match object on class instance so we can use it later.
self.match = m
return True
Expand Down
23 changes: 23 additions & 0 deletions tests/test_syntax/blocks/test_hr.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,3 +377,26 @@ def test_not_hr_2_underscores_spaces(self):

'<p>_ _</p>'
)

def test_2_consecutive_hr(self):
self.assertMarkdownRenders(
self.dedent(
"""
- - -
- - -
"""
),
self.dedent(
"""
<hr />
<hr />
"""
)
)

def test_not_hr_end_in_char(self):
self.assertMarkdownRenders(
'--------------------------------------c',

'<p>--------------------------------------c</p>'
)

0 comments on commit 18b17e1

Please sign in to comment.