Skip to content

Commit

Permalink
Optimize several regexes from quadratic time to linear time
Browse files Browse the repository at this point in the history
Part of the discussion in Python-Markdown#798.

Signed-off-by: Anders Kaseorg <[email protected]>
  • Loading branch information
andersk authored and waylan committed Mar 7, 2019
1 parent 4b11593 commit cb47805
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 6 deletions.
10 changes: 5 additions & 5 deletions markdown/inlinepatterns.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,10 +147,10 @@ def build_inlinepatterns(md, **kwargs):
NOT_STRONG_RE = r'((^|\s)(\*|_)(\s|$))'

# <http://www.123.com>
AUTOLINK_RE = r'<((?:[Ff]|[Hh][Tt])[Tt][Pp][Ss]?://[^>]*)>'
AUTOLINK_RE = r'<((?:[Ff]|[Hh][Tt])[Tt][Pp][Ss]?://[^<>]*)>'

# <[email protected]>
AUTOMAIL_RE = r'<([^> \!]*@[^> ]*)>'
AUTOMAIL_RE = r'<([^<> !]*@[^@<> ]*)>'

# <...>
HTML_RE = r'(\<([a-zA-Z/][^\>]*?|\!--.*?--)\>)'
Expand Down Expand Up @@ -433,7 +433,7 @@ def get_stash(m):

class LinkInlineProcessor(InlineProcessor):
""" Return a link element from the given match. """
RE_LINK = re.compile(r'''\(\s*(?:(<.*?>)\s*(?:(['"])(.*?)\2\s*)?\))?''', re.DOTALL | re.UNICODE)
RE_LINK = re.compile(r'''\(\s*(?:(<[^<>]*>)\s*(?:('[^']*'|"[^"]*")\s*)?\))?''', re.DOTALL | re.UNICODE)
RE_TITLE_CLEAN = re.compile(r'\s')

def handleMatch(self, m, data):
Expand Down Expand Up @@ -467,8 +467,8 @@ def getLink(self, data, index):
if m and m.group(1):
# Matches [Text](<link> "title")
href = m.group(1)[1:-1].strip()
if m.group(3):
title = m.group(3)
if m.group(2):
title = m.group(2)[1:-1]
index = m.end(0)
handled = True
elif m:
Expand Down
2 changes: 1 addition & 1 deletion tests/misc/html.html
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ <h1>Block level html</h1>
</div>

<p>And of course <script>blah</script>.</p>
<p><a href="script&gt;stuff&lt;/script">this <script>link</a></p>
<p><a href="&lt;script&gt;stuff&lt;/script&gt;">this <script>link</a></p>
<p>Some funky <x\]> inline stuff with markdown escaping syntax.</p>
<p><img scr="foo.png" title="Only one inline element on a line." /></p>
<p>And now a line with only an opening bracket:</p>
Expand Down

0 comments on commit cb47805

Please sign in to comment.