Skip to content

Commit

Permalink
Explicitly omit carot and backslash from abbr
Browse files Browse the repository at this point in the history
  • Loading branch information
waylan authored Mar 6, 2024
1 parent 421f1e8 commit a18765c
Show file tree
Hide file tree
Showing 5 changed files with 76 additions and 38 deletions.
1 change: 1 addition & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
* Include `scripts/*.py` in the generated source tarballs (#1430).
* Ensure lines after heading in loose list are properly detabbed (#1443).
* Give smarty tree processor higher priority than toc (#1440).
* Explicitly omit carrot (`^`) and backslash (`\`) from abbreviations (#1444).

## [3.5.2] -- 2024-01-10

Expand Down
8 changes: 8 additions & 0 deletions docs/extensions/abbreviations.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,14 @@ will be rendered as:
is maintained by the <abbr title="World Wide Web Consortium">W3C</abbr>.</p>
```

The following three characters are not permitted in an abbreviation. Any
abbreviation definitions which include one will not be recognized as an
abbreviation definition.

1. carrot (`^`)
2. backslash (`\`)
3. left square bracket (`]`)

Usage
-----

Expand Down
15 changes: 6 additions & 9 deletions markdown/extensions/abbr.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def extendMarkdown(self, md):
class AbbrPreprocessor(BlockProcessor):
""" Abbreviation Preprocessor - parse text for abbr references. """

RE = re.compile(r'^[*]\[(?P<abbr>[^\]]*)\][ ]?:[ ]*\n?[ ]*(?P<title>.*)$', re.MULTILINE)
RE = re.compile(r'^[*]\[(?P<abbr>[^\]\^\\]*)\][ ]?:[ ]*\n?[ ]*(?P<title>.*)$', re.MULTILINE)

def test(self, parent: etree.Element, block: str) -> bool:
return True
Expand Down Expand Up @@ -73,18 +73,15 @@ def run(self, parent: etree.Element, blocks: list[str]) -> bool:

def _generate_pattern(self, text: str) -> str:
"""
Given a string, returns an regex pattern to match that string.
Given a string, returns a regex pattern to match that string.
'HTML' -> r'(?P<abbr>[H][T][M][L])'
'HTML' -> r'(?P<abbr>\b[H][T][M][L]\b)'
Note: we force each char as a literal match (in brackets) as we don't
know what they will be beforehand.
Note: we force each char as a literal match via a character set (in brackets)
as we don't know what they will be beforehand.
"""
chars = list(text)
for i in range(len(chars)):
chars[i] = r'[%s]' % chars[i]
return r'(?P<abbr>\b%s\b)' % (r''.join(chars))
return f"(?P<abbr>\\b{ ''.join(f'[{ c }]' for c in text) }\\b)"


class AbbrInlineProcessor(InlineProcessor):
Expand Down
29 changes: 0 additions & 29 deletions tests/test_extensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,35 +85,6 @@ def testConfigAsKwargsOnInit(self):
self.assertEqual(ext.getConfigs(), {'foo': 'baz', 'bar': 'blah'})


class TestAbbr(unittest.TestCase):
""" Test abbr extension. """

def setUp(self):
self.md = markdown.Markdown(extensions=['abbr'])

def testSimpleAbbr(self):
""" Test Abbreviations. """
text = 'Some text with an ABBR and a REF. Ignore REFERENCE and ref.' + \
'\n\n*[ABBR]: Abbreviation\n' + \
'*[REF]: Abbreviation Reference'
self.assertEqual(
self.md.convert(text),
'<p>Some text with an <abbr title="Abbreviation">ABBR</abbr> '
'and a <abbr title="Abbreviation Reference">REF</abbr>. Ignore '
'REFERENCE and ref.</p>'
)

def testNestedAbbr(self):
""" Test Nested Abbreviations. """
text = '[ABBR](/foo) and _ABBR_\n\n' + \
'*[ABBR]: Abbreviation'
self.assertEqual(
self.md.convert(text),
'<p><a href="/foo"><abbr title="Abbreviation">ABBR</abbr></a> '
'and <em><abbr title="Abbreviation">ABBR</abbr></em></p>'
)


class TestMetaData(unittest.TestCase):
""" Test `MetaData` extension. """

Expand Down
61 changes: 61 additions & 0 deletions tests/test_syntax/extensions/test_abbr.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,25 @@ def test_abbr_override(self):
)
)

def test_abbr_nested(self):
self.assertMarkdownRenders(
self.dedent(
"""
[ABBR](/foo)
_ABBR_
*[ABBR]: Abbreviation
"""
),
self.dedent(
"""
<p><a href="/foo"><abbr title="Abbreviation">ABBR</abbr></a></p>
<p><em><abbr title="Abbreviation">ABBR</abbr></em></p>
"""
)
)

def test_abbr_no_blank_Lines(self):
self.assertMarkdownRenders(
self.dedent(
Expand Down Expand Up @@ -240,3 +259,45 @@ def test_abbr_single_quoted(self):
"""
)
)

def test_abbr_ignore_special_chars(self):
self.assertMarkdownRenders(
self.dedent(
r"""
[^] [\\] [\]] []]
*[^]: Not an abbreviation
*[\\]: Not an abbreviation
*[\]]: Not an abbreviation
*[]]: Not an abbreviation
"""
),
self.dedent(
r"""
<p>[^] [\] []] []]</p>
<p>*[^]: Not an abbreviation</p>
<p>*[\]: Not an abbreviation</p>
<p>*[]]: Not an abbreviation</p>
<p>*[]]: Not an abbreviation</p>
"""
)
)

def test_abbr_hyphen(self):
self.assertMarkdownRenders(
self.dedent(
"""
ABBR-abbr
*[ABBR-abbr]: Abbreviation
"""
),
self.dedent(
"""
<p><abbr title="Abbreviation">ABBR-abbr</abbr></p>
"""
)
)

0 comments on commit a18765c

Please sign in to comment.