Skip to content

Commit

Permalink
Tables: Improvements (Python-Markdown#530)
Browse files Browse the repository at this point in the history
Tables now handle escaped pipes when testing, in table borders, and in
the inline content.  To achieve properly, a bug had to be fixed related
to appending escaped chars to the Markdown class.  Now appended chars
only appear in the current instance.  Lastly the first backtick in a
table can be escaped rounding out the last corner case.
  • Loading branch information
facelessuser authored and waylan committed Jan 19, 2017
1 parent b52293b commit c70b2c4
Show file tree
Hide file tree
Showing 5 changed files with 172 additions and 30 deletions.
6 changes: 3 additions & 3 deletions markdown/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,6 @@ class Markdown(object):
'xhtml5': to_xhtml_string,
}

ESCAPED_CHARS = ['\\', '`', '*', '_', '{', '}', '[', ']',
'(', ')', '>', '#', '+', '-', '.', '!']

def __init__(self, *args, **kwargs):
"""
Creates a new Markdown instance.
Expand Down Expand Up @@ -147,6 +144,9 @@ def __init__(self, *args, **kwargs):
'deprecated along with "safe_mode".',
DeprecationWarning)

self.ESCAPED_CHARS = ['\\', '`', '*', '_', '{', '}', '[', ']',
'(', ')', '>', '#', '+', '-', '.', '!']

self.registeredExtensions = []
self.docType = ""
self.stripTopLevelTags = True
Expand Down
79 changes: 53 additions & 26 deletions markdown/extensions/tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,28 +26,43 @@
class TableProcessor(BlockProcessor):
""" Process Tables. """

RE_CODE_PIPES = re.compile(r'(?:(\\\\)|(`+)|(\\\|)|(\|))')
RE_CODE_PIPES = re.compile(r'(?:(\\\\)|(\\`+)|(`+)|(\\\|)|(\|))')
RE_END_BORDER = re.compile(r'(?<!\\)(?:\\\\)*\|$')

def __init__(self, parser):
self.border = False
self.separator = ''
super(TableProcessor, self).__init__(parser)

def test(self, parent, block):
rows = block.split('\n')
return (len(rows) > 1 and '|' in rows[0] and
'|' in rows[1] and '-' in rows[1] and
rows[1].strip()[0] in ['|', ':', '-'] and
set(rows[1]) <= set('|:- '))
"""
Ensure first two rows (column header and separator row) are valid table rows.
Keep border check and separator row do avoid repeating the work.
"""
is_table = False
header = [row.strip() for row in block.split('\n')[0:2]]
if len(header) == 2:
self.border = header[0].startswith('|')
row = self._split_row(header[0])
is_table = len(row) > 1

if is_table:
row = self._split_row(header[1])
is_table = len(row) > 1 and set(''.join(row)) <= set('|:- ')
if is_table:
self.separator = row
return is_table

def run(self, parent, blocks):
""" Parse a table block and build table. """
block = blocks.pop(0).split('\n')
header = block[0].strip()
seperator = block[1].strip()
rows = [] if len(block) < 3 else block[2:]
# Get format type (bordered by pipes or not)
border = False
if header.startswith('|'):
border = True

# Get alignment of columns
align = []
for c in self._split_row(seperator, border):
for c in self.separator:
c = c.strip()
if c.startswith(':') and c.endswith(':'):
align.append('center')
Expand All @@ -57,21 +72,22 @@ def run(self, parent, blocks):
align.append('right')
else:
align.append(None)

# Build table
table = etree.SubElement(parent, 'table')
thead = etree.SubElement(table, 'thead')
self._build_row(header, thead, align, border)
self._build_row(header, thead, align)
tbody = etree.SubElement(table, 'tbody')
for row in rows:
self._build_row(row.strip(), tbody, align, border)
self._build_row(row.strip(), tbody, align)

def _build_row(self, row, parent, align, border):
def _build_row(self, row, parent, align):
""" Given a row of text, build table cells. """
tr = etree.SubElement(parent, 'tr')
tag = 'td'
if parent.tag == 'thead':
tag = 'th'
cells = self._split_row(row, border)
cells = self._split_row(row)
# We use align here rather than cells to ensure every row
# contains the same number of columns.
for i, a in enumerate(align):
Expand All @@ -83,13 +99,12 @@ def _build_row(self, row, parent, align, border):
if a:
c.set('align', a)

def _split_row(self, row, border):
def _split_row(self, row):
""" split a row of text into list of cells. """
if border:
if self.border:
if row.startswith('|'):
row = row[1:]
if row.endswith('|'):
row = row[:-1]
row = self.RE_END_BORDER.sub('', row)
return self._split(row)

def _split(self, row):
Expand All @@ -106,23 +121,33 @@ def _split(self, row):
for m in self.RE_CODE_PIPES.finditer(row):
# Store ` data (len, start_pos, end_pos)
if m.group(2):
# \`+
# Store length of each tic group: subtract \
tics.append(len(m.group(2)) - 1)
# Store start of group, end of group, and escape length
tic_points.append((m.start(2), m.end(2) - 1, 1))
elif m.group(3):
# `+
# Store length of each tic group
tics.append(len(m.group(2)))
# Store start and end of tic group
tic_points.append((m.start(2), m.end(2) - 1))
tics.append(len(m.group(3)))
# Store start of group, end of group, and escape length
tic_points.append((m.start(3), m.end(3) - 1, 0))
# Store pipe location
elif m.group(4):
pipes.append(m.start(4))
elif m.group(5):
pipes.append(m.start(5))

# Pair up tics according to size if possible
# Subtract the escape length *only* from the opening.
# Walk through tic list and see if tic has a close.
# Store the tic region (start of region, end of region).
pos = 0
tic_len = len(tics)
while pos < tic_len:
try:
index = tics[pos + 1:].index(tics[pos]) + 1
tic_size = tics[pos] - tic_points[pos][2]
if tic_size == 0:
raise ValueError
index = tics[pos + 1:].index(tic_size) + 1
tic_region.append((tic_points[pos][0], tic_points[pos + index][1]))
pos += index + 1
except ValueError:
Expand Down Expand Up @@ -160,6 +185,8 @@ class TableExtension(Extension):

def extendMarkdown(self, md, md_globals):
""" Add an instance of TableProcessor to BlockParser. """
if '|' not in md.ESCAPED_CHARS:
md.ESCAPED_CHARS.append('|')
md.parser.blockprocessors.add('table',
TableProcessor(md.parser),
'<hashheader')
Expand Down
74 changes: 73 additions & 1 deletion tests/extensions/extra/tables.html
Original file line number Diff line number Diff line change
Expand Up @@ -284,4 +284,76 @@ <h2>Table Tests</h2>
<td><code>\</code></td>
</tr>
</tbody>
</table>
</table>
<p>Only the first backtick can be escaped</p>
<table>
<thead>
<tr>
<th>Escaped</th>
<th>Bacticks</th>
</tr>
</thead>
<tbody>
<tr>
<td>`<code>\</code></td>
<td>``</td>
</tr>
</tbody>
</table>
<p>Test escaped pipes</p>
<table>
<thead>
<tr>
<th>Column 1</th>
<th>Column 2</th>
</tr>
</thead>
<tbody>
<tr>
<td><code>|</code> |</td>
<td>Pipes are okay in code and escaped. |</td>
</tr>
</tbody>
</table>
<table>
<thead>
<tr>
<th>Column 1</th>
<th>Column 2</th>
</tr>
</thead>
<tbody>
<tr>
<td>row1</td>
<td>row1 |</td>
</tr>
<tr>
<td>row2</td>
<td>row2</td>
</tr>
</tbody>
</table>
<p>Test header escapes</p>
<table>
<thead>
<tr>
<th><code>`\</code> |</th>
<th><code>\</code> |</th>
</tr>
</thead>
<tbody>
<tr>
<td>row1</td>
<td>row1</td>
</tr>
<tr>
<td>row2</td>
<td>row2</td>
</tr>
</tbody>
</table>
<p>Escaped pipes in format row should not be a table</p>
<p>| Column1 | Column2 |
| ------- || ------- |
| row1 | row1 |
| row2 | row2 |</p>
31 changes: 31 additions & 0 deletions tests/extensions/extra/tables.txt
Original file line number Diff line number Diff line change
Expand Up @@ -90,3 +90,34 @@ Odd backticks | Even backticks
Escapes | More Escapes
------- | ------
`` `\`` | `\`

Only the first backtick can be escaped

Escaped | Bacticks
------- | ------
\`` \` | \`\`

Test escaped pipes

Column 1 | Column 2
-------- | --------
`|` \| | Pipes are okay in code and escaped. \|

| Column 1 | Column 2 |
| -------- | -------- |
| row1 | row1 \|
| row2 | row2 |

Test header escapes

| `` `\`` \| | `\` \|
| ---------- | ---- |
| row1 | row1 |
| row2 | row2 |

Escaped pipes in format row should not be a table

| Column1 | Column2 |
| ------- \|| ------- |
| row1 | row1 |
| row2 | row2 |
12 changes: 12 additions & 0 deletions tests/test_apis.py
Original file line number Diff line number Diff line change
Expand Up @@ -758,3 +758,15 @@ def testExtensonConfigOptionBadFormat(self):
"""
self.create_config_file(config)
self.assertRaises(yaml.YAMLError, parse_options, ['-c', self.tempfile])


class TestEscapeAppend(unittest.TestCase):
""" Tests escape character append. """

def testAppend(self):
""" Test that appended escapes are only in the current instance. """
md = markdown.Markdown()
md.ESCAPED_CHARS.append('|')
self.assertEqual('|' in md.ESCAPED_CHARS, True)
md2 = markdown.Markdown()
self.assertEqual('|' not in md2.ESCAPED_CHARS, True)

0 comments on commit c70b2c4

Please sign in to comment.