Skip to content

Commit

Permalink
Properly parse code spans in md_in_html (Python-Markdown#1069)
Browse files Browse the repository at this point in the history
This reverts part of 2766698 and re-implements handling 
of tails in the same manner as the core.

Also, ensure line_offset doesn't raise an error on bad input
(see Python-Markdown#1066) and properly handle script tags in code
spans (same as in the core).

Fixes Python-Markdown#1068.
  • Loading branch information
waylan authored Nov 18, 2020
1 parent 447da66 commit 81cc5b8
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 15 deletions.
4 changes: 4 additions & 0 deletions docs/change_log/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@ title: Change Log
Python-Markdown Change Log
=========================

Under development: version 3.3.4 (a bug-fix release).

* Properly parse code spans in md_in_html (#1069).

Oct 25, 2020: version 3.3.3 (a bug-fix release).

* Unify all block-level tags (#1047).
Expand Down
27 changes: 14 additions & 13 deletions markdown/extensions/md_in_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from ..preprocessors import Preprocessor
from ..postprocessors import RawHtmlPostprocessor
from .. import util
from ..htmlparser import HTMLExtractor
from ..htmlparser import HTMLExtractor, blank_line_re
import xml.etree.ElementTree as etree


Expand Down Expand Up @@ -85,17 +85,9 @@ def get_state(self, tag, attrs):
else: # pragma: no cover
return None

def at_line_start(self):
"""At line start."""

value = super().at_line_start()
if not value and self.cleandoc and self.cleandoc[-1].endswith('\n'):
value = True
return value

def handle_starttag(self, tag, attrs):
# Handle tags that should always be empty and do not specify a closing tag
if tag in self.empty_tags:
if tag in self.empty_tags and (self.at_line_start() or self.intail):
attrs = {key: value if value is not None else key for key, value in attrs}
if "markdown" in attrs:
attrs.pop('markdown')
Expand All @@ -106,13 +98,12 @@ def handle_starttag(self, tag, attrs):
self.handle_empty_tag(data, True)
return

if tag in self.block_level_tags:
if tag in self.block_level_tags and (self.at_line_start() or self.intail):
# Valueless attr (ex: `<tag checked>`) results in `[('checked', None)]`.
# Convert to `{'checked': 'checked'}`.
attrs = {key: value if value is not None else key for key, value in attrs}
state = self.get_state(tag, attrs)

if self.inraw or (state in [None, 'off'] and not self.mdstack) or not self.at_line_start():
if self.inraw or (state in [None, 'off'] and not self.mdstack):
# fall back to default behavior
attrs.pop('markdown', None)
super().handle_starttag(tag, attrs)
Expand All @@ -134,6 +125,9 @@ def handle_starttag(self, tag, attrs):
self.handle_data(self.md.htmlStash.store(text))
else:
self.handle_data(text)
if tag in self.CDATA_CONTENT_ELEMENTS:
# This is presumably a standalone tag in a code span (see #1036).
self.clear_cdata_mode()

def handle_endtag(self, tag):
if tag in self.block_level_tags:
Expand All @@ -159,6 +153,11 @@ def handle_endtag(self, tag):
self.cleandoc.append(self.md.htmlStash.store(element))
self.cleandoc.append('\n\n')
self.state = []
# Check if element has a tail
if not blank_line_re.match(
self.rawdata[self.line_offset + self.offset + len(self.get_endtag_text(tag)):]):
# More content exists after endtag.
self.intail = True
else:
# Treat orphan closing tag as a span level tag.
text = self.get_endtag_text(tag)
Expand Down Expand Up @@ -191,6 +190,8 @@ def handle_startendtag(self, tag, attrs):
self.handle_empty_tag(data, is_block=self.md.is_block_level(tag))

def handle_data(self, data):
if self.intail and '\n' in data:
self.intail = False
if self.inraw or not self.mdstack:
super().handle_data(data)
else:
Expand Down
10 changes: 8 additions & 2 deletions markdown/htmlparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,14 @@ def close(self):
@property
def line_offset(self):
"""Returns char index in self.rawdata for the start of the current line. """
if self.lineno > 1:
return re.match(r'([^\n]*\n){{{}}}'.format(self.lineno-1), self.rawdata).end()
if self.lineno > 1 and '\n' in self.rawdata:
m = re.match(r'([^\n]*\n){{{}}}'.format(self.lineno-1), self.rawdata)
if m:
return m.end()
else: # pragma: no cover
# Value of self.lineno must exceed total number of lines.
# Find index of begining of last line.
return self.rawdata.rfind('\n')
return 0

def at_line_start(self):
Expand Down
66 changes: 66 additions & 0 deletions tests/test_syntax/extensions/test_md_in_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,72 @@ def test_md1_div_linebreaks(self):
)
)

def test_md1_code_span(self):
self.assertMarkdownRenders(
self.dedent(
"""
<div markdown="1">
`<h1>code span</h1>`
</div>
"""
),
self.dedent(
"""
<div>
<p><code>&lt;h1&gt;code span&lt;/h1&gt;</code></p>
</div>
"""
)
)

def test_md1_code_span_oneline(self):
self.assertMarkdownRenders(
'<div markdown="1">`<h1>code span</h1>`</div>',
self.dedent(
"""
<div>
<p><code>&lt;h1&gt;code span&lt;/h1&gt;</code></p>
</div>
"""
)
)

def test_md1_code_span_unclosed(self):
self.assertMarkdownRenders(
self.dedent(
"""
<div markdown="1">
`<p>`
</div>
"""
),
self.dedent(
"""
<div>
<p><code>&lt;p&gt;</code></p>
</div>
"""
)
)

def test_md1_code_span_script_tag(self):
self.assertMarkdownRenders(
self.dedent(
"""
<div markdown="1">
`<script>`
</div>
"""
),
self.dedent(
"""
<div>
<p><code>&lt;script&gt;</code></p>
</div>
"""
)
)

def test_md1_div_blank_lines(self):
self.assertMarkdownRenders(
self.dedent(
Expand Down

0 comments on commit 81cc5b8

Please sign in to comment.