Skip to content

Commit

Permalink
Don't emit stashed HTML tag placeholders in .toc_tokens (Python-Mar…
Browse files Browse the repository at this point in the history
…kdown#901)

Note: this slightly changes existing behavior in that raw HTML tags are no
longer included in the HTML `.toc`. However, the fact that that worked before 
was an oversight. The intention was always to strip all markup. Resolves Python-Markdown#899.
  • Loading branch information
jimporter authored Jan 31, 2020
1 parent 6651746 commit ccf56ed
Show file tree
Hide file tree
Showing 2 changed files with 87 additions and 28 deletions.
13 changes: 9 additions & 4 deletions markdown/extensions/toc.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,16 +43,19 @@ def unique(id, ids):
return id


def stashedHTML2text(text, md):
def stashedHTML2text(text, md, strip_entities=True):
""" Extract raw HTML from stash, reduce to plain text and swap with placeholder. """
def _html_sub(m):
""" Substitute raw html with plain text. """
try:
raw = md.htmlStash.rawHtmlBlocks[int(m.group(1))]
except (IndexError, TypeError): # pragma: no cover
return m.group(0)
# Strip out tags and entities - leaveing text
return re.sub(r'(<[^>]+>)|(&[\#a-zA-Z0-9]+;)', '', raw)
# Strip out tags and/or entities - leaving text
res = re.sub(r'(<[^>]+>)', '', raw)
if strip_entities:
res = re.sub(r'(&[\#a-zA-Z0-9]+;)', '', res)
return res

return HTML_PLACEHOLDER_RE.sub(_html_sub, text)

Expand Down Expand Up @@ -259,7 +262,9 @@ def run(self, doc):
toc_tokens.append({
'level': int(el.tag[-1]),
'id': el.attrib["id"],
'name': el.attrib.get('data-toc-label', text)
'name': unescape(stashedHTML2text(
el.attrib.get('data-toc-label', text), self.md, strip_entities=False
))
})

# Remove the data-toc-label attribute as it is no longer needed
Expand Down
102 changes: 78 additions & 24 deletions tests/test_extensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -780,6 +780,7 @@ def testReset(self):
self.assertStartsWith('<div class="toc">', self.md.toc)
self.md.reset()
self.assertEqual(self.md.toc, '')
self.assertEqual(self.md.toc_tokens, [])

def testUniqueIds(self):
""" Test Unique IDs. """
Expand All @@ -791,6 +792,21 @@ def testUniqueIds(self):
'<h1 id="header_1">Header</h1>\n'
'<h1 id="header_2">Header</h1>'
)
self.assertEqual(
self.md.toc,
'<div class="toc">\n'
'<ul>\n' # noqa
'<li><a href="#header">Header</a></li>\n' # noqa
'<li><a href="#header_1">Header</a></li>\n' # noqa
'<li><a href="#header_2">Header</a></li>\n' # noqa
'</ul>\n' # noqa
'</div>\n'
)
self.assertEqual(self.md.toc_tokens, [
{'level': 1, 'id': 'header', 'name': 'Header', 'children': []},
{'level': 1, 'id': 'header_1', 'name': 'Header', 'children': []},
{'level': 1, 'id': 'header_2', 'name': 'Header', 'children': []},
])

def testHtmlEntities(self):
""" Test Headers with HTML Entities. """
Expand All @@ -799,6 +815,17 @@ def testHtmlEntities(self):
self.md.convert(text),
'<h1 id="foo-bar">Foo &amp; bar</h1>'
)
self.assertEqual(
self.md.toc,
'<div class="toc">\n'
'<ul>\n' # noqa
'<li><a href="#foo-bar">Foo &amp; bar</a></li>\n' # noqa
'</ul>\n' # noqa
'</div>\n'
)
self.assertEqual(self.md.toc_tokens, [
{'level': 1, 'id': 'foo-bar', 'name': 'Foo &amp; bar', 'children': []},
])

def testRawHtml(self):
""" Test Headers with raw HTML. """
Expand All @@ -807,6 +834,17 @@ def testRawHtml(self):
self.md.convert(text),
'<h1 id="foo-bar-baz">Foo <b>Bar</b> Baz.</h1>'
)
self.assertEqual(
self.md.toc,
'<div class="toc">\n'
'<ul>\n' # noqa
'<li><a href="#foo-bar-baz">Foo Bar Baz.</a></li>\n' # noqa
'</ul>\n' # noqa
'</div>\n'
)
self.assertEqual(self.md.toc_tokens, [
{'level': 1, 'id': 'foo-bar-baz', 'name': 'Foo Bar Baz.', 'children': []},
])

def testBaseLevel(self):
""" Test Header Base Level. """
Expand All @@ -833,6 +871,12 @@ def testBaseLevel(self):
'</ul>\n' # noqa
'</div>\n'
)
self.assertEqual(md.toc_tokens, [
{'level': 5, 'id': 'some-header', 'name': 'Some Header', 'children': [
{'level': 6, 'id': 'next-level', 'name': 'Next Level', 'children': []},
{'level': 6, 'id': 'too-high', 'name': 'Too High', 'children': []},
]},
])

def testHeaderInlineMarkup(self):
""" Test Headers with inline markup. """
Expand All @@ -843,6 +887,18 @@ def testHeaderInlineMarkup(self):
'<h1 id="some-header-with-markup">Some <em>Header</em> with '
'<a href="http://example.com">markup</a>.</h1>'
)
self.assertEqual(
self.md.toc,
'<div class="toc">\n'
'<ul>\n' # noqa
'<li><a href="#some-header-with-markup">' # noqa
'Some Header with markup.</a></li>\n' # noqa
'</ul>\n' # noqa
'</div>\n'
)
self.assertEqual(self.md.toc_tokens, [
{'level': 1, 'id': 'some-header-with-markup', 'name': 'Some Header with markup.', 'children': []},
])

def testAnchorLink(self):
""" Test TOC Anchorlink. """
Expand Down Expand Up @@ -942,40 +998,38 @@ def testTitle(self):
def testWithAttrList(self):
""" Test TOC with attr_list Extension. """
md = markdown.Markdown(extensions=['toc', 'attr_list'])
text = '# Header 1\n\n## Header 2 { #foo }\n\n## Header 3 { data-toc-label="Foo Bar"}'
text = ('# Header 1\n\n'
'## Header 2 { #foo }\n\n'
'## Header 3 { data-toc-label="Foo Bar"}\n\n'
'# Header 4 { data-toc-label="Foo <b>Baz</b>" }')
self.assertEqual(
md.convert(text),
'<h1 id="header-1">Header 1</h1>\n'
'<h2 id="foo">Header 2</h2>\n'
'<h2 id="header-3">Header 3</h2>'
'<h2 id="header-3">Header 3</h2>\n'
'<h1 id="header-4">Header 4</h1>'
)
self.assertEqual(
md.toc,
'<div class="toc">\n'
'<ul>\n' # noqa
'<li><a href="#header-1">Header 1</a>' # noqa
'<ul>\n' # noqa
'<li><a href="#foo">Header 2</a></li>\n' # noqa
'<li><a href="#header-3">Foo Bar</a></li>\n' # noqa
'</ul>\n' # noqa
'</li>\n' # noqa
'</ul>\n' # noqa
'<ul>\n' # noqa
'<li><a href="#header-1">Header 1</a>' # noqa
'<ul>\n' # noqa
'<li><a href="#foo">Header 2</a></li>\n' # noqa
'<li><a href="#header-3">Foo Bar</a></li>\n' # noqa
'</ul>\n' # noqa
'</li>\n' # noqa
'<li><a href="#header-4">Foo Baz</a></li>\n' # noqa
'</ul>\n' # noqa
'</div>\n'
)
self.assertEqual(
md.toc_tokens,
[
{
'level': 1,
'id': 'header-1',
'name': 'Header 1',
'children': [
{'level': 2, 'id': 'foo', 'name': 'Header 2', 'children': []},
{'level': 2, 'id': 'header-3', 'name': 'Foo Bar', 'children': []}
]
}
]
)
self.assertEqual(md.toc_tokens, [
{'level': 1, 'id': 'header-1', 'name': 'Header 1', 'children': [
{'level': 2, 'id': 'foo', 'name': 'Header 2', 'children': []},
{'level': 2, 'id': 'header-3', 'name': 'Foo Bar', 'children': []}
]},
{'level': 1, 'id': 'header-4', 'name': 'Foo Baz', 'children': []},
])

def testUniqueFunc(self):
""" Test 'unique' function. """
Expand Down

0 comments on commit ccf56ed

Please sign in to comment.