Skip to content

Commit

Permalink
Unescape IDs in TOC.
Browse files Browse the repository at this point in the history
The slugify function will stript the STX and ETX characters from
placeholders for backslash excaped characters. Therefore, we need
to unescape any text before passing it to slugify. Fixes Python-Markdown#864.
  • Loading branch information
waylan committed Nov 25, 2019
1 parent b77c207 commit 15cbaef
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 1 deletion.
9 changes: 8 additions & 1 deletion markdown/extensions/toc.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from . import Extension
from ..treeprocessors import Treeprocessor
from ..util import etree, parseBoolValue, AMP_SUBSTITUTE, HTML_PLACEHOLDER_RE
from ..postprocessors import UnescapePostprocessor
import re
import unicodedata

Expand Down Expand Up @@ -56,6 +57,12 @@ def _html_sub(m):
return HTML_PLACEHOLDER_RE.sub(_html_sub, text)


def unescape(text):
""" Unescape escaped text. """
c = UnescapePostprocessor()
return c.run(text)


def nest_toc_tokens(toc_list):
"""Given an unsorted list with errors and skips, return a nested one.
[{'level': 1}, {'level': 2}]
Expand Down Expand Up @@ -242,7 +249,7 @@ def run(self, doc):

# Do not override pre-existing ids
if "id" not in el.attrib:
innertext = stashedHTML2text(text, self.md)
innertext = unescape(stashedHTML2text(text, self.md))
el.attrib["id"] = unique(self.slugify(innertext, self.sep), used_ids)

toc_tokens.append({
Expand Down
34 changes: 34 additions & 0 deletions tests/test_syntax/extensions/test_toc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
"""
Python Markdown
A Python implementation of John Gruber's Markdown.
Documentation: https://python-markdown.github.io/
GitHub: https://github.com/Python-Markdown/markdown/
PyPI: https://pypi.org/project/Markdown/
Started by Manfred Stienstra (http://www.dwerg.net/).
Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
Currently maintained by Waylan Limberg (https://github.com/waylan),
Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
Copyright 2007-2019 The Python Markdown Project (v. 1.7 and later)
Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
Copyright 2004 Manfred Stienstra (the original version)
License: BSD (see LICENSE.md for details).
"""

from markdown.test_tools import TestCase


class TestTOC(TestCase):

# TODO: Move the rest of the TOC tests here.

def test_escaped_char_in_id(self):
self.assertMarkdownRenders(
r'# escaped\_character',
'<h1 id="escaped_character">escaped_character</h1>',
extensions=['toc']
)

0 comments on commit 15cbaef

Please sign in to comment.