diff options
Diffstat (limited to 'markdown/blockprocessors.py')
-rw-r--r-- | markdown/blockprocessors.py | 423 |
1 files changed, 293 insertions, 130 deletions
diff --git a/markdown/blockprocessors.py b/markdown/blockprocessors.py index 7d3b137..3d0ff86 100644 --- a/markdown/blockprocessors.py +++ b/markdown/blockprocessors.py @@ -1,23 +1,64 @@ """ +Python Markdown + +A Python implementation of John Gruber's Markdown. + +Documentation: https://python-markdown.github.io/ +GitHub: https://github.com/Python-Markdown/markdown/ +PyPI: https://pypi.org/project/Markdown/ + +Started by Manfred Stienstra (http://www.dwerg.net/). +Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org). +Currently maintained by Waylan Limberg (https://github.com/waylan), +Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser). + +Copyright 2007-2018 The Python Markdown Project (v. 1.7 and later) +Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b) +Copyright 2004 Manfred Stienstra (the original version) + +License: BSD (see LICENSE.md for details). + CORE MARKDOWN BLOCKPARSER -============================================================================= +=========================================================================== -This parser handles basic parsing of Markdown blocks. It doesn't concern itself -with inline elements such as **bold** or *italics*, but rather just catches -blocks, lists, quotes, etc. +This parser handles basic parsing of Markdown blocks. It doesn't concern +itself with inline elements such as **bold** or *italics*, but rather just +catches blocks, lists, quotes, etc. -The BlockParser is made up of a bunch of BlockProssors, each handling a +The BlockParser is made up of a bunch of BlockProcessors, each handling a different type of block. Extensions may add/replace/remove BlockProcessors as they need to alter how markdown blocks are parsed. - """ +import logging import re -import markdown +import xml.etree.ElementTree as etree +from . import util +from .blockparser import BlockParser + +logger = logging.getLogger('MARKDOWN') + + +def build_block_parser(md, **kwargs): + """ Build the default block parser used by Markdown. """ + parser = BlockParser(md) + parser.blockprocessors.register(EmptyBlockProcessor(parser), 'empty', 100) + parser.blockprocessors.register(ListIndentProcessor(parser), 'indent', 90) + parser.blockprocessors.register(CodeBlockProcessor(parser), 'code', 80) + parser.blockprocessors.register(HashHeaderProcessor(parser), 'hashheader', 70) + parser.blockprocessors.register(SetextHeaderProcessor(parser), 'setextheader', 60) + parser.blockprocessors.register(HRProcessor(parser), 'hr', 50) + parser.blockprocessors.register(OListProcessor(parser), 'olist', 40) + parser.blockprocessors.register(UListProcessor(parser), 'ulist', 30) + parser.blockprocessors.register(BlockQuoteProcessor(parser), 'quote', 20) + parser.blockprocessors.register(ReferenceProcessor(parser), 'reference', 15) + parser.blockprocessors.register(ParagraphProcessor(parser), 'paragraph', 10) + return parser + class BlockProcessor: - """ Base class for block processors. - + """ Base class for block processors. + Each subclass will provide the methods below to work with the source and tree. Each processor will need to define it's own ``test`` and ``run`` methods. The ``test`` method should return True or False, to indicate @@ -26,8 +67,9 @@ class BlockProcessor: """ - def __init__(self, parser=None): + def __init__(self, parser): self.parser = parser + self.tab_length = parser.md.tab_length def lastChild(self, parent): """ Return the last child of an etree element. """ @@ -36,13 +78,15 @@ class BlockProcessor: else: return None - def detab(self, text): + def detab(self, text, length=None): """ Remove a tab from the front of each line of the given text. """ + if length is None: + length = self.tab_length newtext = [] lines = text.split('\n') for line in lines: - if line.startswith(' '*markdown.TAB_LENGTH): - newtext.append(line[markdown.TAB_LENGTH:]) + if line.startswith(' ' * length): + newtext.append(line[length:]) elif not line.strip(): newtext.append('') else: @@ -53,37 +97,37 @@ class BlockProcessor: """ Remove a tab from front of lines but allowing dedented lines. """ lines = text.split('\n') for i in range(len(lines)): - if lines[i].startswith(' '*markdown.TAB_LENGTH*level): - lines[i] = lines[i][markdown.TAB_LENGTH*level:] + if lines[i].startswith(' '*self.tab_length*level): + lines[i] = lines[i][self.tab_length*level:] return '\n'.join(lines) def test(self, parent, block): - """ Test for block type. Must be overridden by subclasses. - - As the parser loops through processors, it will call the ``test`` method - on each to determine if the given block of text is of that type. This - method must return a boolean ``True`` or ``False``. The actual method of - testing is left to the needs of that particular block type. It could - be as simple as ``block.startswith(some_string)`` or a complex regular - expression. As the block type may be different depending on the parent - of the block (i.e. inside a list), the parent etree element is also - provided and may be used as part of the test. + """ Test for block type. Must be overridden by subclasses. + + As the parser loops through processors, it will call the ``test`` + method on each to determine if the given block of text is of that + type. This method must return a boolean ``True`` or ``False``. The + actual method of testing is left to the needs of that particular + block type. It could be as simple as ``block.startswith(some_string)`` + or a complex regular expression. As the block type may be different + depending on the parent of the block (i.e. inside a list), the parent + etree element is also provided and may be used as part of the test. Keywords: - + * ``parent``: A etree element which will be the parent of the block. - * ``block``: A block of text from the source which has been split at + * ``block``: A block of text from the source which has been split at blank lines. """ - pass + pass # pragma: no cover def run(self, parent, blocks): - """ Run processor. Must be overridden by subclasses. - + """ Run processor. Must be overridden by subclasses. + When the parser determines the appropriate type of a block, the parser will call the corresponding processor's ``run`` method. This method should parse the individual lines of the block and append them to - the etree. + the etree. Note that both the ``parent`` and ``etree`` keywords are pointers to instances of the objects which should be edited in place. Each @@ -99,12 +143,12 @@ class BlockProcessor: * ``parent``: A etree element which is the parent of the current block. * ``blocks``: A list of all remaining blocks of the document. """ - pass + pass # pragma: no cover class ListIndentProcessor(BlockProcessor): - """ Process children of list items. - + """ Process children of list items. + Example: * a list item process this part @@ -113,18 +157,19 @@ class ListIndentProcessor(BlockProcessor): """ - INDENT_RE = re.compile(r'^(([ ]{%s})+)'% markdown.TAB_LENGTH) ITEM_TYPES = ['li'] LIST_TYPES = ['ul', 'ol'] + def __init__(self, *args): + super().__init__(*args) + self.INDENT_RE = re.compile(r'^(([ ]{%s})+)' % self.tab_length) + def test(self, parent, block): - return block.startswith(' '*markdown.TAB_LENGTH) and \ - not self.parser.state.isstate('detabbed') and \ - (parent.tag in self.ITEM_TYPES or \ - (len(parent) and parent[-1] and \ - (parent[-1].tag in self.LIST_TYPES) - ) - ) + return block.startswith(' '*self.tab_length) and \ + not self.parser.state.isstate('detabbed') and \ + (parent.tag in self.ITEM_TYPES or + (len(parent) and parent[-1] is not None and + (parent[-1].tag in self.LIST_TYPES))) def run(self, parent, blocks): block = blocks.pop(0) @@ -133,8 +178,16 @@ class ListIndentProcessor(BlockProcessor): self.parser.state.set('detabbed') if parent.tag in self.ITEM_TYPES: - # The parent is already a li. Just parse the child block. - self.parser.parseBlocks(parent, [block]) + # It's possible that this parent has a 'ul' or 'ol' child list + # with a member. If that is the case, then that should be the + # parent. This is intended to catch the edge case of an indented + # list whose first member was parsed previous to this point + # see OListProcessor + if len(parent) and parent[-1].tag in self.LIST_TYPES: + self.parser.parseBlocks(parent[-1], [block]) + else: + # The parent is already a li. Just parse the child block. + self.parser.parseBlocks(parent, [block]) elif sibling.tag in self.ITEM_TYPES: # The sibling is a li. Use it as parent. self.parser.parseBlocks(sibling, [block]) @@ -143,8 +196,12 @@ class ListIndentProcessor(BlockProcessor): # Assume the last child li is the parent of this block. if sibling[-1].text: # If the parent li has text, that text needs to be moved to a p - block = '%s\n\n%s' % (sibling[-1].text, block) + # The p must be 'inserted' at beginning of list in the event + # that other children already exist i.e.; a nested sublist. + p = etree.Element('p') + p.text = sibling[-1].text sibling[-1].text = '' + sibling[-1].insert(0, p) self.parser.parseChunk(sibling[-1], block) else: self.create_item(sibling, block) @@ -152,15 +209,15 @@ class ListIndentProcessor(BlockProcessor): def create_item(self, parent, block): """ Create a new li and parse the block with it as the parent. """ - li = markdown.etree.SubElement(parent, 'li') + li = etree.SubElement(parent, 'li') self.parser.parseBlocks(li, [block]) - + def get_level(self, parent, block): """ Get level of indent based on list level. """ # Get indent level m = self.INDENT_RE.match(block) if m: - indent_level = len(m.group(1))/markdown.TAB_LENGTH + indent_level = len(m.group(1))/self.tab_length else: indent_level = 0 if self.parser.state.isstate('list'): @@ -172,7 +229,8 @@ class ListIndentProcessor(BlockProcessor): # Step through children of tree to find matching indent level. while indent_level > level: child = self.lastChild(parent) - if child and (child.tag in self.LIST_TYPES or child.tag in self.ITEM_TYPES): + if (child is not None and + (child.tag in self.LIST_TYPES or child.tag in self.ITEM_TYPES)): if child.tag in self.LIST_TYPES: level += 1 parent = child @@ -187,28 +245,30 @@ class CodeBlockProcessor(BlockProcessor): """ Process code blocks. """ def test(self, parent, block): - return block.startswith(' '*markdown.TAB_LENGTH) - + return block.startswith(' '*self.tab_length) + def run(self, parent, blocks): sibling = self.lastChild(parent) block = blocks.pop(0) theRest = '' - if sibling and sibling.tag == "pre" and len(sibling) \ - and sibling[0].tag == "code": + if (sibling is not None and sibling.tag == "pre" and + len(sibling) and sibling[0].tag == "code"): # The previous block was a code block. As blank lines do not start # new code blocks, append this block to the previous, adding back # linebreaks removed from the split into a list. code = sibling[0] block, theRest = self.detab(block) - code.text = markdown.AtomicString('%s\n%s\n' % (code.text, block.rstrip())) + code.text = util.AtomicString( + '{}\n{}\n'.format(code.text, util.code_escape(block.rstrip())) + ) else: # This is a new codeblock. Create the elements and insert text. - pre = markdown.etree.SubElement(parent, 'pre') - code = markdown.etree.SubElement(pre, 'code') + pre = etree.SubElement(parent, 'pre') + code = etree.SubElement(pre, 'code') block, theRest = self.detab(block) - code.text = markdown.AtomicString('%s\n' % block.rstrip()) + code.text = util.AtomicString('%s\n' % util.code_escape(block.rstrip())) if theRest: - # This block contained unindented line(s) after the first indented + # This block contained unindented line(s) after the first indented # line. Insert these lines as the first block of the master blocks # list for future processing. blocks.insert(0, theRest) @@ -219,27 +279,31 @@ class BlockQuoteProcessor(BlockProcessor): RE = re.compile(r'(^|\n)[ ]{0,3}>[ ]?(.*)') def test(self, parent, block): - return bool(self.RE.search(block)) + return bool(self.RE.search(block)) and not util.nearing_recursion_limit() def run(self, parent, blocks): block = blocks.pop(0) m = self.RE.search(block) if m: - before = block[:m.start()] # Lines before blockquote - # Pass lines before blockquote in recursively for parsing forst. + before = block[:m.start()] # Lines before blockquote + # Pass lines before blockquote in recursively for parsing first. self.parser.parseBlocks(parent, [before]) - # Remove ``> `` from begining of each line. - block = '\n'.join([self.clean(line) for line in - block[m.start():].split('\n')]) + # Remove ``> `` from beginning of each line. + block = '\n'.join( + [self.clean(line) for line in block[m.start():].split('\n')] + ) sibling = self.lastChild(parent) - if sibling and sibling.tag == "blockquote": + if sibling is not None and sibling.tag == "blockquote": # Previous block was a blockquote so set that as this blocks parent quote = sibling else: # This is a new blockquote. Create a new parent element. - quote = markdown.etree.SubElement(parent, 'blockquote') + quote = etree.SubElement(parent, 'blockquote') # Recursively parse block with blockquote as parent. + # change parser state so blockquotes embedded in lists use p tags + self.parser.state.set('blockquote') self.parser.parseChunk(quote, block) + self.parser.state.reset() def clean(self, line): """ Remove ``>`` from beginning of a line. """ @@ -251,16 +315,31 @@ class BlockQuoteProcessor(BlockProcessor): else: return line + class OListProcessor(BlockProcessor): """ Process ordered list blocks. """ TAG = 'ol' - # Detect an item (``1. item``). ``group(1)`` contains contents of item. - RE = re.compile(r'^[ ]{0,3}\d+\.[ ]+(.*)') - # Detect items on secondary lines. they can be of either list type. - CHILD_RE = re.compile(r'^[ ]{0,3}((\d+\.)|[*+-])[ ]+(.*)') - # Detect indented (nested) items of either type - INDENT_RE = re.compile(r'^[ ]{4,7}((\d+\.)|[*+-])[ ]+.*') + # The integer (python string) with which the lists starts (default=1) + # Eg: If list is initialized as) + # 3. Item + # The ol tag will get starts="3" attribute + STARTSWITH = '1' + # Lazy ol - ignore startswith + LAZY_OL = True + # List of allowed sibling tags. + SIBLING_TAGS = ['ol', 'ul'] + + def __init__(self, parser): + super().__init__(parser) + # Detect an item (``1. item``). ``group(1)`` contains contents of item. + self.RE = re.compile(r'^[ ]{0,%d}\d+\.[ ]+(.*)' % (self.tab_length - 1)) + # Detect items on secondary lines. they can be of either list type. + self.CHILD_RE = re.compile(r'^[ ]{0,%d}((\d+\.)|[*+-])[ ]+(.*)' % + (self.tab_length - 1)) + # Detect indented (nested) items of either type + self.INDENT_RE = re.compile(r'^[ ]{%d,%d}((\d+\.)|[*+-])[ ]+.*' % + (self.tab_length, self.tab_length * 2 - 1)) def test(self, parent, block): return bool(self.RE.match(block)) @@ -269,33 +348,58 @@ class OListProcessor(BlockProcessor): # Check fr multiple items in one block. items = self.get_items(blocks.pop(0)) sibling = self.lastChild(parent) - if sibling and sibling.tag in ['ol', 'ul']: + + if sibling is not None and sibling.tag in self.SIBLING_TAGS: # Previous block was a list item, so set that as parent lst = sibling - # make sure previous item is in a p. - if len(lst) and lst[-1].text and not len(lst[-1]): - p = markdown.etree.SubElement(lst[-1], 'p') + # make sure previous item is in a p- if the item has text, + # then it isn't in a p + if lst[-1].text: + # since it's possible there are other children for this + # sibling, we can't just SubElement the p, we need to + # insert it as the first item. + p = etree.Element('p') p.text = lst[-1].text lst[-1].text = '' + lst[-1].insert(0, p) + # if the last item has a tail, then the tail needs to be put in a p + # likely only when a header is not followed by a blank line + lch = self.lastChild(lst[-1]) + if lch is not None and lch.tail: + p = etree.SubElement(lst[-1], 'p') + p.text = lch.tail.lstrip() + lch.tail = '' + # parse first block differently as it gets wrapped in a p. - li = markdown.etree.SubElement(lst, 'li') + li = etree.SubElement(lst, 'li') self.parser.state.set('looselist') firstitem = items.pop(0) self.parser.parseBlocks(li, [firstitem]) self.parser.state.reset() + elif parent.tag in ['ol', 'ul']: + # this catches the edge case of a multi-item indented list whose + # first item is in a blank parent-list item: + # * * subitem1 + # * subitem2 + # see also ListIndentProcessor + lst = parent else: # This is a new list so create parent with appropriate tag. - lst = markdown.etree.SubElement(parent, self.TAG) + lst = etree.SubElement(parent, self.TAG) + # Check if a custom start integer is set + if not self.LAZY_OL and self.STARTSWITH != '1': + lst.attrib['start'] = self.STARTSWITH + self.parser.state.set('list') # Loop through items in block, recursively parsing each with the # appropriate parent. for item in items: - if item.startswith(' '*markdown.TAB_LENGTH): + if item.startswith(' '*self.tab_length): # Item is indented. Parse with last item as parent self.parser.parseBlocks(lst[-1], [item]) else: # New item. Create li and parse with it as parent - li = markdown.etree.SubElement(lst, 'li') + li = etree.SubElement(lst, 'li') self.parser.parseBlocks(li, [item]) self.parser.state.reset() @@ -305,18 +409,24 @@ class OListProcessor(BlockProcessor): for line in block.split('\n'): m = self.CHILD_RE.match(line) if m: - # This is a new item. Append + # This is a new list item + # Check first item for the start index + if not items and self.TAG == 'ol': + # Detect the integer value of first list item + INTEGER_RE = re.compile(r'(\d+)') + self.STARTSWITH = INTEGER_RE.match(m.group(1)).group() + # Append to the list items.append(m.group(3)) elif self.INDENT_RE.match(line): # This is an indented (possibly nested) item. - if items[-1].startswith(' '*markdown.TAB_LENGTH): + if items[-1].startswith(' '*self.tab_length): # Previous item was indented. Append to that item. - items[-1] = '%s\n%s' % (items[-1], line) + items[-1] = '{}\n{}'.format(items[-1], line) else: items.append(line) else: # This is another line of previous item. Append to that item. - items[-1] = '%s\n%s' % (items[-1], line) + items[-1] = '{}\n{}'.format(items[-1], line) return items @@ -324,14 +434,18 @@ class UListProcessor(OListProcessor): """ Process unordered list blocks. """ TAG = 'ul' - RE = re.compile(r'^[ ]{0,3}[*+-][ ]+(.*)') + + def __init__(self, parser): + super().__init__(parser) + # Detect an item (``1. item``). ``group(1)`` contains contents of item. + self.RE = re.compile(r'^[ ]{0,%d}[*+-][ ]+(.*)' % (self.tab_length - 1)) class HashHeaderProcessor(BlockProcessor): """ Process Hash Headers. """ # Detect a header at start of any line in block - RE = re.compile(r'(^|\n)(?P<level>#{1,6})(?P<header>.*?)#*(\n|$)') + RE = re.compile(r'(?:^|\n)(?P<level>#{1,6})(?P<header>(?:\\.|[^\\])*?)#*(?:\n|$)') def test(self, parent, block): return bool(self.RE.search(block)) @@ -340,29 +454,29 @@ class HashHeaderProcessor(BlockProcessor): block = blocks.pop(0) m = self.RE.search(block) if m: - before = block[:m.start()] # All lines before header - after = block[m.end():] # All lines after header + before = block[:m.start()] # All lines before header + after = block[m.end():] # All lines after header if before: # As the header was not the first line of the block and the # lines before the header must be parsed first, # recursively parse this lines as a block. self.parser.parseBlocks(parent, [before]) # Create header using named groups from RE - h = markdown.etree.SubElement(parent, 'h%d' % len(m.group('level'))) + h = etree.SubElement(parent, 'h%d' % len(m.group('level'))) h.text = m.group('header').strip() if after: # Insert remaining lines as first block for future parsing. blocks.insert(0, after) - else: + else: # pragma: no cover # This should never happen, but just in case... - message(CRITICAL, "We've got a problem header!") + logger.warn("We've got a problem header: %r" % block) class SetextHeaderProcessor(BlockProcessor): """ Process Setext-style Headers. """ # Detect Setext-style header. Must be first 2 lines of block. - RE = re.compile(r'^.*?\n[=-]{3,}', re.MULTILINE) + RE = re.compile(r'^.*?\n[=-]+[ ]*(\n|$)', re.MULTILINE) def test(self, parent, block): return bool(self.RE.match(block)) @@ -374,7 +488,7 @@ class SetextHeaderProcessor(BlockProcessor): level = 1 else: level = 2 - h = markdown.etree.SubElement(parent, 'h%d' % level) + h = etree.SubElement(parent, 'h%d' % level) h.text = lines[0].strip() if len(lines) > 2: # Block contains additional lines. Add to master blocks for later. @@ -384,58 +498,91 @@ class SetextHeaderProcessor(BlockProcessor): class HRProcessor(BlockProcessor): """ Process Horizontal Rules. """ - RE = r'[ ]{0,3}(?P<ch>[*_-])[ ]?((?P=ch)[ ]?){2,}[ ]*' + # Python's re module doesn't officially support atomic grouping. However you can fake it. + # See https://stackoverflow.com/a/13577411/866026 + RE = r'^[ ]{0,3}(?=(?P<atomicgroup>(-+[ ]{0,2}){3,}|(_+[ ]{0,2}){3,}|(\*+[ ]{0,2}){3,}))(?P=atomicgroup)[ ]*$' # Detect hr on any line of a block. - SEARCH_RE = re.compile(r'(^|\n)%s(\n|$)' % RE) - # Match a hr on a single line of text. - MATCH_RE = re.compile(r'^%s$' % RE) + SEARCH_RE = re.compile(RE, re.MULTILINE) def test(self, parent, block): - return bool(self.SEARCH_RE.search(block)) + m = self.SEARCH_RE.search(block) + if m: + # Save match object on class instance so we can use it later. + self.match = m + return True + return False def run(self, parent, blocks): - lines = blocks.pop(0).split('\n') - prelines = [] + block = blocks.pop(0) + match = self.match # Check for lines in block before hr. - for line in lines: - m = self.MATCH_RE.match(line) - if m: - break - else: - prelines.append(line) - if len(prelines): + prelines = block[:match.start()].rstrip('\n') + if prelines: # Recursively parse lines before hr so they get parsed first. - self.parser.parseBlocks(parent, ['\n'.join(prelines)]) + self.parser.parseBlocks(parent, [prelines]) # create hr - hr = markdown.etree.SubElement(parent, 'hr') + etree.SubElement(parent, 'hr') # check for lines in block after hr. - lines = lines[len(prelines)+1:] - if len(lines): + postlines = block[match.end():].lstrip('\n') + if postlines: # Add lines after hr to master blocks for later parsing. - blocks.insert(0, '\n'.join(lines)) + blocks.insert(0, postlines) class EmptyBlockProcessor(BlockProcessor): - """ Process blocks and start with an empty line. """ + """ Process blocks that are empty or start with an empty line. """ - # Detect a block that only contains whitespace - # or only whitespace on the first line. - RE = re.compile(r'^\s*\n') + def test(self, parent, block): + return not block or block.startswith('\n') + + def run(self, parent, blocks): + block = blocks.pop(0) + filler = '\n\n' + if block: + # Starts with empty line + # Only replace a single line. + filler = '\n' + # Save the rest for later. + theRest = block[1:] + if theRest: + # Add remaining lines to master blocks for later. + blocks.insert(0, theRest) + sibling = self.lastChild(parent) + if (sibling is not None and sibling.tag == 'pre' and + len(sibling) and sibling[0].tag == 'code'): + # Last block is a codeblock. Append to preserve whitespace. + sibling[0].text = util.AtomicString( + '{}{}'.format(sibling[0].text, filler) + ) + + +class ReferenceProcessor(BlockProcessor): + """ Process link references. """ + RE = re.compile( + r'^[ ]{0,3}\[([^\[\]]*)\]:[ ]*\n?[ ]*([^\s]+)[ ]*(?:\n[ ]*)?((["\'])(.*)\4[ ]*|\((.*)\)[ ]*)?$', re.MULTILINE + ) def test(self, parent, block): - return bool(self.RE.match(block)) + return True def run(self, parent, blocks): block = blocks.pop(0) - m = self.RE.match(block) + m = self.RE.search(block) if m: - # Add remaining line to master blocks for later. - blocks.insert(0, block[m.end():]) - sibling = self.lastChild(parent) - if sibling and sibling.tag == 'pre' and sibling[0] and \ - sibling[0].tag == 'code': - # Last block is a codeblock. Append to preserve whitespace. - sibling[0].text = markdown.AtomicString('%s/n/n/n' % sibling[0].text ) + id = m.group(1).strip().lower() + link = m.group(2).lstrip('<').rstrip('>') + title = m.group(5) or m.group(6) + self.parser.md.references[id] = (link, title) + if block[m.end():].strip(): + # Add any content after match back to blocks as separate block + blocks.insert(0, block[m.end():].lstrip('\n')) + if block[:m.start()].strip(): + # Add any content before match back to blocks as separate block + blocks.insert(0, block[:m.start()].rstrip('\n')) + return True + # No match. Restore block. + blocks.insert(0, block) + return False class ParagraphProcessor(BlockProcessor): @@ -449,12 +596,28 @@ class ParagraphProcessor(BlockProcessor): if block.strip(): # Not a blank block. Add to parent, otherwise throw it away. if self.parser.state.isstate('list'): - # The parent is a tight-list. Append to parent.text - if parent.text: - parent.text = '%s\n%s' % (parent.text, block) + # The parent is a tight-list. + # + # Check for any children. This will likely only happen in a + # tight-list when a header isn't followed by a blank line. + # For example: + # + # * # Header + # Line 2 of list item - not part of header. + sibling = self.lastChild(parent) + if sibling is not None: + # Insetrt after sibling. + if sibling.tail: + sibling.tail = '{}\n{}'.format(sibling.tail, block) + else: + sibling.tail = '\n%s' % block else: - parent.text = block.lstrip() + # Append to parent.text + if parent.text: + parent.text = '{}\n{}'.format(parent.text, block) + else: + parent.text = block.lstrip() else: # Create a regular paragraph - p = markdown.etree.SubElement(parent, 'p') + p = etree.SubElement(parent, 'p') p.text = block.lstrip() |