diff --git a/docs/change_log/index.md b/docs/change_log/index.md index 7edb2b9a6..0069c2225 100644 --- a/docs/change_log/index.md +++ b/docs/change_log/index.md @@ -5,6 +5,7 @@ Python-Markdown Change Log Under development: version 3.3.4 (a bug-fix release). +* Properly parse unclosed tags in code spans (#1066). * Properly parse processing instructions in md_in_html (#1070). * Properly parse code spans in md_in_html (#1069). diff --git a/markdown/extensions/md_in_html.py b/markdown/extensions/md_in_html.py index 6d2a0e78d..86cf00d79 100644 --- a/markdown/extensions/md_in_html.py +++ b/markdown/extensions/md_in_html.py @@ -206,6 +206,26 @@ def handle_empty_tag(self, data, is_block): else: self.handle_data(self.md.htmlStash.store(data)) + def parse_pi(self, i): + if self.at_line_start() or self.intail or self.mdstack: + # The same override exists in HTMLExtractor without the check + # for mdstack. Therefore, use HTMLExtractor's parent instead. + return super(HTMLExtractor, self).parse_pi(i) + # This is not the beginning of a raw block so treat as plain data + # and avoid consuming any tags which may follow (see #1066). + self.handle_data('\x00]* # tag name <= added backtick here + (?:[\s/]* # optional whitespace before attribute name + (?:(?<=['"\s/])[^`\s/>][^\s/=>]* # attribute name <= added backtick here + (?:\s*=+\s* # value indicator + (?:'[^']*' # LITA-enclosed value + |"[^"]*" # LIT-enclosed value + |(?!['"])[^`>\s]* # bare value <= added backtick here + ) + (?:\s*,)* # possibly followed by a comma + )?(?:\s|/(?!>))* + )* + )? + \s* # trailing whitespace +""", re.VERBOSE) # Match a blank line at the start of a block of text (two newlines). # The newlines may be preceded by additional whitespace. @@ -230,6 +246,22 @@ def unknown_decl(self, data): end = ']]>' if data.startswith('CDATA[') else ']>' self.handle_empty_tag('<foo

' ) + def test_raw_unclosed_tag_in_code_span(self): + self.assertMarkdownRenders( + self.dedent( + """ + ` + hello + + """ + ), + self.dedent( + """ +

<div.

+
+ hello +
+ """ + ) + ) + + def test_raw_unclosed_tag_in_code_span_space(self): + self.assertMarkdownRenders( + self.dedent( + """ + `
+ hello +
+ """ + ), + self.dedent( + """ +

<div.

+
+ hello +
+ """ + ) + ) + def test_raw_attributes(self): self.assertMarkdownRenders( '

text

', @@ -1073,6 +1115,27 @@ def test_raw_processing_instruction_indented(self): ) ) + def test_raw_processing_instruction_code_span(self): + self.assertMarkdownRenders( + self.dedent( + """ + ` + foo + + """ + ), + self.dedent( + """ +

<?php

+
+ foo +
+ """ + ) + ) + def test_raw_declaration_one_line(self): self.assertMarkdownRenders( '', @@ -1110,6 +1173,27 @@ def test_raw_multiline_declaration(self): ) ) + def test_raw_declaration_code_span(self): + self.assertMarkdownRenders( + self.dedent( + """ + ` + foo + + """ + ), + self.dedent( + """ +

<!

+
+ foo +
+ """ + ) + ) + def test_raw_cdata_one_line(self): self.assertMarkdownRenders( '"); ]]>', @@ -1190,6 +1274,27 @@ def test_raw_cdata_indented(self): ) ) + def test_raw_cdata_code_span(self): + self.assertMarkdownRenders( + self.dedent( + """ + ` + foo + + """ + ), + self.dedent( + """ +

<![

+
+ foo +
+ """ + ) + ) + def test_charref(self): self.assertMarkdownRenders( '§',