diff options
Diffstat (limited to 'markdown/extensions/tables.py')
-rw-r--r-- | markdown/extensions/tables.py | 223 |
1 files changed, 181 insertions, 42 deletions
diff --git a/markdown/extensions/tables.py b/markdown/extensions/tables.py index 1d3c920..c8b1024 100644 --- a/markdown/extensions/tables.py +++ b/markdown/extensions/tables.py @@ -1,44 +1,88 @@ -#!/usr/bin/env Python """ Tables Extension for Python-Markdown ==================================== Added parsing of tables to Python-Markdown. -A simple example: +See <https://Python-Markdown.github.io/extensions/tables> +for documentation. - First Header | Second Header - ------------- | ------------- - Content Cell | Content Cell - Content Cell | Content Cell +Original code Copyright 2009 [Waylan Limberg](http://achinghead.com) + +All changes Copyright 2008-2014 The Python Markdown Project + +License: [BSD](https://opensource.org/licenses/bsd-license.php) -Copyright 2009 - [Waylan Limberg](http://achinghead.com) """ -import markdown -from markdown import etree + +from . import Extension +from ..blockprocessors import BlockProcessor +import xml.etree.ElementTree as etree +import re +PIPE_NONE = 0 +PIPE_LEFT = 1 +PIPE_RIGHT = 2 -class TableProcessor(markdown.blockprocessors.BlockProcessor): +class TableProcessor(BlockProcessor): """ Process Tables. """ + RE_CODE_PIPES = re.compile(r'(?:(\\\\)|(\\`+)|(`+)|(\\\|)|(\|))') + RE_END_BORDER = re.compile(r'(?<!\\)(?:\\\\)*\|$') + + def __init__(self, parser, config): + self.border = False + self.separator = '' + self.config = config + + super().__init__(parser) + def test(self, parent, block): - rows = block.split('\n') - return (len(rows) > 2 and '|' in rows[0] and - '|' in rows[1] and '-' in rows[1] and - rows[1][0] in ['|', ':', '-']) + """ + Ensure first two rows (column header and separator row) are valid table rows. + + Keep border check and separator row do avoid repeating the work. + """ + is_table = False + rows = [row.strip(' ') for row in block.split('\n')] + if len(rows) > 1: + header0 = rows[0] + self.border = PIPE_NONE + if header0.startswith('|'): + self.border |= PIPE_LEFT + if self.RE_END_BORDER.search(header0) is not None: + self.border |= PIPE_RIGHT + row = self._split_row(header0) + row0_len = len(row) + is_table = row0_len > 1 + + # Each row in a single column table needs at least one pipe. + if not is_table and row0_len == 1 and self.border: + for index in range(1, len(rows)): + is_table = rows[index].startswith('|') + if not is_table: + is_table = self.RE_END_BORDER.search(rows[index]) is not None + if not is_table: + break + + if is_table: + row = self._split_row(rows[1]) + is_table = (len(row) == row0_len) and set(''.join(row)) <= set('|:- ') + if is_table: + self.separator = row + + return is_table def run(self, parent, blocks): """ Parse a table block and build table. """ block = blocks.pop(0).split('\n') - header = block[:2] - rows = block[2:] - # Get format type (bordered by pipes or not) - border = False - if header[0].startswith('|'): - border = True + header = block[0].strip(' ') + rows = [] if len(block) < 3 else block[2:] + # Get alignment of columns align = [] - for c in self._split_row(header[1], border): + for c in self.separator: + c = c.strip(' ') if c.startswith(':') and c.endswith(':'): align.append('center') elif c.startswith(':'): @@ -47,51 +91,146 @@ class TableProcessor(markdown.blockprocessors.BlockProcessor): align.append('right') else: align.append(None) + # Build table table = etree.SubElement(parent, 'table') thead = etree.SubElement(table, 'thead') - self._build_row(header[0], thead, align, border) + self._build_row(header, thead, align) tbody = etree.SubElement(table, 'tbody') - for row in rows: - self._build_row(row, tbody, align, border) + if len(rows) == 0: + # Handle empty table + self._build_empty_row(tbody, align) + else: + for row in rows: + self._build_row(row.strip(' '), tbody, align) + + def _build_empty_row(self, parent, align): + """Build an empty row.""" + tr = etree.SubElement(parent, 'tr') + count = len(align) + while count: + etree.SubElement(tr, 'td') + count -= 1 - def _build_row(self, row, parent, align, border): + def _build_row(self, row, parent, align): """ Given a row of text, build table cells. """ tr = etree.SubElement(parent, 'tr') tag = 'td' if parent.tag == 'thead': tag = 'th' - cells = self._split_row(row, border) - # We use align here rather than cells to ensure every row + cells = self._split_row(row) + # We use align here rather than cells to ensure every row # contains the same number of columns. for i, a in enumerate(align): c = etree.SubElement(tr, tag) try: - c.text = cells[i].strip() - except IndexError: + c.text = cells[i].strip(' ') + except IndexError: # pragma: no cover c.text = "" if a: - c.set('align', a) + if self.config['use_align_attribute']: + c.set('align', a) + else: + c.set('style', f'text-align: {a};') - def _split_row(self, row, border): + def _split_row(self, row): """ split a row of text into list of cells. """ - if border: + if self.border: if row.startswith('|'): row = row[1:] - if row.endswith('|'): - row = row[:-1] - return row.split('|') + row = self.RE_END_BORDER.sub('', row) + return self._split(row) + def _split(self, row): + """ split a row of text with some code into a list of cells. """ + elements = [] + pipes = [] + tics = [] + tic_points = [] + tic_region = [] + good_pipes = [] + + # Parse row + # Throw out \\, and \| + for m in self.RE_CODE_PIPES.finditer(row): + # Store ` data (len, start_pos, end_pos) + if m.group(2): + # \`+ + # Store length of each tic group: subtract \ + tics.append(len(m.group(2)) - 1) + # Store start of group, end of group, and escape length + tic_points.append((m.start(2), m.end(2) - 1, 1)) + elif m.group(3): + # `+ + # Store length of each tic group + tics.append(len(m.group(3))) + # Store start of group, end of group, and escape length + tic_points.append((m.start(3), m.end(3) - 1, 0)) + # Store pipe location + elif m.group(5): + pipes.append(m.start(5)) + + # Pair up tics according to size if possible + # Subtract the escape length *only* from the opening. + # Walk through tic list and see if tic has a close. + # Store the tic region (start of region, end of region). + pos = 0 + tic_len = len(tics) + while pos < tic_len: + try: + tic_size = tics[pos] - tic_points[pos][2] + if tic_size == 0: + raise ValueError + index = tics[pos + 1:].index(tic_size) + 1 + tic_region.append((tic_points[pos][0], tic_points[pos + index][1])) + pos += index + 1 + except ValueError: + pos += 1 -class TableExtension(markdown.Extension): + # Resolve pipes. Check if they are within a tic pair region. + # Walk through pipes comparing them to each region. + # - If pipe position is less that a region, it isn't in a region + # - If it is within a region, we don't want it, so throw it out + # - If we didn't throw it out, it must be a table pipe + for pipe in pipes: + throw_out = False + for region in tic_region: + if pipe < region[0]: + # Pipe is not in a region + break + elif region[0] <= pipe <= region[1]: + # Pipe is within a code region. Throw it out. + throw_out = True + break + if not throw_out: + good_pipes.append(pipe) + + # Split row according to table delimiters. + pos = 0 + for pipe in good_pipes: + elements.append(row[pos:pipe]) + pos = pipe + 1 + elements.append(row[pos:]) + return elements + + +class TableExtension(Extension): """ Add tables to Markdown. """ - def extendMarkdown(self, md, md_globals): + def __init__(self, **kwargs): + self.config = { + 'use_align_attribute': [False, 'True to use align attribute instead of style.'], + } + + super().__init__(**kwargs) + + def extendMarkdown(self, md): """ Add an instance of TableProcessor to BlockParser. """ - md.parser.blockprocessors.add('table', - TableProcessor(md.parser), - '<hashheader') + if '|' not in md.ESCAPED_CHARS: + md.ESCAPED_CHARS.append('|') + processor = TableProcessor(md.parser, self.getConfigs()) + md.parser.blockprocessors.register(processor, 'table', 75) -def makeExtension(configs={}): - return TableExtension(configs=configs) +def makeExtension(**kwargs): # pragma: no cover + return TableExtension(**kwargs) |