Unescape IDs in TOC.

The slugify function will stript the STX and ETX characters from placeholders for backslash excaped characters. Therefore, we need to unescape any text before passing it to slugify. Fixes Python-Markdown#864.
xiaohuacheung · Nov 25, 2019 · 15cbaef · 15cbaef
1 parent b77c207
commit 15cbaef
Show file tree

Hide file tree

Showing 2 changed files with 42 additions and 1 deletion.
diff --git a/markdown/extensions/toc.py b/markdown/extensions/toc.py
@@ -16,6 +16,7 @@
 from . import Extension
 from ..treeprocessors import Treeprocessor
 from ..util import etree, parseBoolValue, AMP_SUBSTITUTE, HTML_PLACEHOLDER_RE
+from ..postprocessors import UnescapePostprocessor
 import re
 import unicodedata
 
@@ -56,6 +57,12 @@ def _html_sub(m):
     return HTML_PLACEHOLDER_RE.sub(_html_sub, text)
 
 
+def unescape(text):
+    """ Unescape escaped text. """
+    c = UnescapePostprocessor()
+    return c.run(text)
+
+
 def nest_toc_tokens(toc_list):
     """Given an unsorted list with errors and skips, return a nested one.
     [{'level': 1}, {'level': 2}]
@@ -242,7 +249,7 @@ def run(self, doc):
 
                 # Do not override pre-existing ids
                 if "id" not in el.attrib:
-                    innertext = stashedHTML2text(text, self.md)
+                    innertext = unescape(stashedHTML2text(text, self.md))
                     el.attrib["id"] = unique(self.slugify(innertext, self.sep), used_ids)
 
                 toc_tokens.append({

diff --git a/tests/test_syntax/extensions/test_toc.py b/tests/test_syntax/extensions/test_toc.py
@@ -0,0 +1,34 @@
+"""
+Python Markdown
+
+A Python implementation of John Gruber's Markdown.
+
+Documentation: https://python-markdown.github.io/
+GitHub: https://github.com/Python-Markdown/markdown/
+PyPI: https://pypi.org/project/Markdown/
+
+Started by Manfred Stienstra (http://www.dwerg.net/).
+Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
+Currently maintained by Waylan Limberg (https://github.com/waylan),
+Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
+
+Copyright 2007-2019 The Python Markdown Project (v. 1.7 and later)
+Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
+Copyright 2004 Manfred Stienstra (the original version)
+
+License: BSD (see LICENSE.md for details).
+"""
+
+from markdown.test_tools import TestCase
+
+
+class TestTOC(TestCase):
+
+    # TODO: Move the rest of the TOC tests here.
+
+    def test_escaped_char_in_id(self):
+        self.assertMarkdownRenders(
+            r'# escaped\_character',
+            '<h1 id="escaped_character">escaped_character</h1>',
+            extensions=['toc']
+        )