From 9aa4586d92181a07d3c054d81ada9b100c879d68 Mon Sep 17 00:00:00 2001 From: Carlos Date: Wed, 11 Aug 2021 15:37:44 +0200 Subject: [PATCH] Improve email address validation for Automatic Links --- docs/change_log/index.md | 5 +- markdown/inlinepatterns.py | 4 +- tests/test_syntax/inline/test_autolinks.py | 63 ++++++++++++++++++++++ 3 files changed, 68 insertions(+), 4 deletions(-) create mode 100644 tests/test_syntax/inline/test_autolinks.py diff --git a/docs/change_log/index.md b/docs/change_log/index.md index 11c997b85..fdf213b49 100644 --- a/docs/change_log/index.md +++ b/docs/change_log/index.md @@ -6,9 +6,10 @@ Python-Markdown Change Log Under development: version 3.3.5 (a bug-fix release). * Make the `slugify_unicode` function not remove diacritical marks (#1118). -* Fix `[toc]` detection when used with `nl2br` extension (#1160) -* Re-use compiled regex for block level checks (#1169) +* Fix `[toc]` detection when used with `nl2br` extension (#1160). +* Re-use compiled regex for block level checks (#1169). * Don't process shebangs in fenced code blocks when using CodeHilite (#1156). +* Improve email address validation for Automatic Links (#1165). Feb 24, 2021: version 3.3.4 (a bug-fix release). diff --git a/markdown/inlinepatterns.py b/markdown/inlinepatterns.py index b0621a828..f7d604e74 100644 --- a/markdown/inlinepatterns.py +++ b/markdown/inlinepatterns.py @@ -160,10 +160,10 @@ def build_inlinepatterns(md, **kwargs): AUTOLINK_RE = r'<((?:[Ff]|[Hh][Tt])[Tt][Pp][Ss]?://[^<>]*)>' # -AUTOMAIL_RE = r'<([^<> !]*@[^@<> ]*)>' +AUTOMAIL_RE = r'<([^<> !]+@[^@<> ]+)>' # <...> -HTML_RE = r'(<([a-zA-Z/][^<>]*|!--(?:(?!).)*--)>)' +HTML_RE = r'(<(\/?[a-zA-Z][^<>@ ]*( [^<>]*)?|!--(?:(?!).)*--)>)' # "&" (decimal) or "&" (hex) or "&" (named) ENTITY_RE = r'(&(?:\#[0-9]+|\#x[0-9a-fA-F]+|[a-zA-Z0-9]+);)' diff --git a/tests/test_syntax/inline/test_autolinks.py b/tests/test_syntax/inline/test_autolinks.py new file mode 100644 index 000000000..b6bd1cf2d --- /dev/null +++ b/tests/test_syntax/inline/test_autolinks.py @@ -0,0 +1,63 @@ +""" +Python Markdown + +A Python implementation of John Gruber's Markdown. + +Documentation: https://python-markdown.github.io/ +GitHub: https://github.com/Python-Markdown/markdown/ +PyPI: https://pypi.org/project/Markdown/ + +Started by Manfred Stienstra (http://www.dwerg.net/). +Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org). +Currently maintained by Waylan Limberg (https://github.com/waylan), +Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser). + +Copyright 2007-2021 The Python Markdown Project (v. 1.7 and later) +Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b) +Copyright 2004 Manfred Stienstra (the original version) + +License: BSD (see LICENSE.md for details). +""" + +from markdown.test_tools import TestCase + + +class TestAutomaticLinks(TestCase): + + def test_email_address(self): + self.assertMarkdownRenders( + 'asdfasdfadsfasd or you can say ', + '

asdfasdfadsfasd yuri@freewisd' + 'om.org or you can say

' + ) + + def test_mailto_email_address(self): + self.assertMarkdownRenders( + 'instead ', + '

instead ' + 'yuri@freewisdom' + '.org

' + ) + + def test_email_address_with_ampersand(self): + self.assertMarkdownRenders( + '', + '

bob&' + 'sue@example.com

' + ) + + def test_invalid_email_address_local_part(self): + self.assertMarkdownRenders( + 'Missing local-part <@domain>', + '

Missing local-part <@domain>

' + ) + + def test_invalid_email_address_domain(self): + self.assertMarkdownRenders( + 'Missing domain ', + '

Missing domain <local-part@>

' + )