1 files changed, 293 insertions, 130 deletions
diff --git a/markdown/blockprocessors.py b/markdown/blockprocessors.py
index 7d3b137..3d0ff86 100644
--- a/markdown/blockprocessors.py
+++ b/markdown/blockprocessors.py
@@ -1,23 +1,64 @@
 """
+Python Markdown
+
+A Python implementation of John Gruber's Markdown.
+
+Documentation: https://python-markdown.github.io/
+GitHub: https://github.com/Python-Markdown/markdown/
+PyPI: https://pypi.org/project/Markdown/
+
+Started by Manfred Stienstra (http://www.dwerg.net/).
+Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
+Currently maintained by Waylan Limberg (https://github.com/waylan),
+Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
+
+Copyright 2007-2018 The Python Markdown Project (v. 1.7 and later)
+Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
+Copyright 2004 Manfred Stienstra (the original version)
+
+License: BSD (see LICENSE.md for details).
+
 CORE MARKDOWN BLOCKPARSER
-=============================================================================
+===========================================================================
 
-This parser handles basic parsing of Markdown blocks.  It doesn't concern itself
-with inline elements such as **bold** or *italics*, but rather just catches 
-blocks, lists, quotes, etc.
+This parser handles basic parsing of Markdown blocks.  It doesn't concern
+itself with inline elements such as **bold** or *italics*, but rather just
+catches blocks, lists, quotes, etc.
 
-The BlockParser is made up of a bunch of BlockProssors, each handling a 
+The BlockParser is made up of a bunch of BlockProcessors, each handling a
 different type of block. Extensions may add/replace/remove BlockProcessors
 as they need to alter how markdown blocks are parsed.
-
 """
 
+import logging
 import re
-import markdown
+import xml.etree.ElementTree as etree
+from . import util
+from .blockparser import BlockParser
+
+logger = logging.getLogger('MARKDOWN')
+
+
+def build_block_parser(md, **kwargs):
+    """ Build the default block parser used by Markdown. """
+    parser = BlockParser(md)
+    parser.blockprocessors.register(EmptyBlockProcessor(parser), 'empty', 100)
+    parser.blockprocessors.register(ListIndentProcessor(parser), 'indent', 90)
+    parser.blockprocessors.register(CodeBlockProcessor(parser), 'code', 80)
+    parser.blockprocessors.register(HashHeaderProcessor(parser), 'hashheader', 70)
+    parser.blockprocessors.register(SetextHeaderProcessor(parser), 'setextheader', 60)
+    parser.blockprocessors.register(HRProcessor(parser), 'hr', 50)
+    parser.blockprocessors.register(OListProcessor(parser), 'olist', 40)
+    parser.blockprocessors.register(UListProcessor(parser), 'ulist', 30)
+    parser.blockprocessors.register(BlockQuoteProcessor(parser), 'quote', 20)
+    parser.blockprocessors.register(ReferenceProcessor(parser), 'reference', 15)
+    parser.blockprocessors.register(ParagraphProcessor(parser), 'paragraph', 10)
+    return parser
+
 
 class BlockProcessor:
-    """ Base class for block processors. 
-    
+    """ Base class for block processors.
+
     Each subclass will provide the methods below to work with the source and
     tree. Each processor will need to define it's own ``test`` and ``run``
     methods. The ``test`` method should return True or False, to indicate
@@ -26,8 +67,9 @@ class BlockProcessor:
 
     """
 
-    def __init__(self, parser=None):
+    def __init__(self, parser):
         self.parser = parser
+        self.tab_length = parser.md.tab_length
 
     def lastChild(self, parent):
         """ Return the last child of an etree element. """
@@ -36,13 +78,15 @@ class BlockProcessor:
         else:
             return None
 
-    def detab(self, text):
+    def detab(self, text, length=None):
         """ Remove a tab from the front of each line of the given text. """
+        if length is None:
+            length = self.tab_length
         newtext = []
         lines = text.split('\n')
         for line in lines:
-            if line.startswith(' '*markdown.TAB_LENGTH):
-                newtext.append(line[markdown.TAB_LENGTH:])
+            if line.startswith(' ' * length):
+                newtext.append(line[length:])
             elif not line.strip():
                 newtext.append('')
             else:
@@ -53,37 +97,37 @@ class BlockProcessor:
         """ Remove a tab from front of lines but allowing dedented lines. """
         lines = text.split('\n')
         for i in range(len(lines)):
-            if lines[i].startswith(' '*markdown.TAB_LENGTH*level):
-                lines[i] = lines[i][markdown.TAB_LENGTH*level:]
+            if lines[i].startswith(' '*self.tab_length*level):
+                lines[i] = lines[i][self.tab_length*level:]
         return '\n'.join(lines)
 
     def test(self, parent, block):
-        """ Test for block type. Must be overridden by subclasses. 
-        
-        As the parser loops through processors, it will call the ``test`` method
-        on each to determine if the given block of text is of that type. This
-        method must return a boolean ``True`` or ``False``. The actual method of
-        testing is left to the needs of that particular block type. It could 
-        be as simple as ``block.startswith(some_string)`` or a complex regular
-        expression. As the block type may be different depending on the parent
-        of the block (i.e. inside a list), the parent etree element is also 
-        provided and may be used as part of the test.
+        """ Test for block type. Must be overridden by subclasses.
+
+        As the parser loops through processors, it will call the ``test``
+        method on each to determine if the given block of text is of that
+        type. This method must return a boolean ``True`` or ``False``. The
+        actual method of testing is left to the needs of that particular
+        block type. It could be as simple as ``block.startswith(some_string)``
+        or a complex regular expression. As the block type may be different
+        depending on the parent of the block (i.e. inside a list), the parent
+        etree element is also provided and may be used as part of the test.
 
         Keywords:
-        
+
         * ``parent``: A etree element which will be the parent of the block.
-        * ``block``: A block of text from the source which has been split at 
+        * ``block``: A block of text from the source which has been split at
             blank lines.
         """
-        pass
+        pass  # pragma: no cover
 
     def run(self, parent, blocks):
-        """ Run processor. Must be overridden by subclasses. 
-        
+        """ Run processor. Must be overridden by subclasses.
+
         When the parser determines the appropriate type of a block, the parser
         will call the corresponding processor's ``run`` method. This method
         should parse the individual lines of the block and append them to
-        the etree. 
+        the etree.
 
         Note that both the ``parent`` and ``etree`` keywords are pointers
         to instances of the objects which should be edited in place. Each
@@ -99,12 +143,12 @@ class BlockProcessor:
         * ``parent``: A etree element which is the parent of the current block.
         * ``blocks``: A list of all remaining blocks of the document.
         """
-        pass
+        pass  # pragma: no cover
 
 
 class ListIndentProcessor(BlockProcessor):
-    """ Process children of list items. 
-    
+    """ Process children of list items.
+
     Example:
         * a list item
             process this part
@@ -113,18 +157,19 @@ class ListIndentProcessor(BlockProcessor):
 
     """
 
-    INDENT_RE = re.compile(r'^(([ ]{%s})+)'% markdown.TAB_LENGTH)
     ITEM_TYPES = ['li']
     LIST_TYPES = ['ul', 'ol']
 
+    def __init__(self, *args):
+        super().__init__(*args)
+        self.INDENT_RE = re.compile(r'^(([ ]{%s})+)' % self.tab_length)
+
     def test(self, parent, block):
-        return block.startswith(' '*markdown.TAB_LENGTH) and \
-                not self.parser.state.isstate('detabbed') and  \
-                (parent.tag in self.ITEM_TYPES or \
-                    (len(parent) and parent[-1] and \
-                        (parent[-1].tag in self.LIST_TYPES)
-                    )
-                )
+        return block.startswith(' '*self.tab_length) and \
+            not self.parser.state.isstate('detabbed') and \
+            (parent.tag in self.ITEM_TYPES or
+                (len(parent) and parent[-1] is not None and
+                    (parent[-1].tag in self.LIST_TYPES)))
 
     def run(self, parent, blocks):
         block = blocks.pop(0)
@@ -133,8 +178,16 @@ class ListIndentProcessor(BlockProcessor):
 
         self.parser.state.set('detabbed')
         if parent.tag in self.ITEM_TYPES:
-            # The parent is already a li. Just parse the child block.
-            self.parser.parseBlocks(parent, [block])
+            # It's possible that this parent has a 'ul' or 'ol' child list
+            # with a member.  If that is the case, then that should be the
+            # parent.  This is intended to catch the edge case of an indented
+            # list whose first member was parsed previous to this point
+            # see OListProcessor
+            if len(parent) and parent[-1].tag in self.LIST_TYPES:
+                self.parser.parseBlocks(parent[-1], [block])
+            else:
+                # The parent is already a li. Just parse the child block.
+                self.parser.parseBlocks(parent, [block])
         elif sibling.tag in self.ITEM_TYPES:
             # The sibling is a li. Use it as parent.
             self.parser.parseBlocks(sibling, [block])
@@ -143,8 +196,12 @@ class ListIndentProcessor(BlockProcessor):
             # Assume the last child li is the parent of this block.
             if sibling[-1].text:
                 # If the parent li has text, that text needs to be moved to a p
-                block = '%s\n\n%s' % (sibling[-1].text, block)
+                # The p must be 'inserted' at beginning of list in the event
+                # that other children already exist i.e.; a nested sublist.
+                p = etree.Element('p')
+                p.text = sibling[-1].text
                 sibling[-1].text = ''
+                sibling[-1].insert(0, p)
             self.parser.parseChunk(sibling[-1], block)
         else:
             self.create_item(sibling, block)
@@ -152,15 +209,15 @@ class ListIndentProcessor(BlockProcessor):
 
     def create_item(self, parent, block):
         """ Create a new li and parse the block with it as the parent. """
-        li = markdown.etree.SubElement(parent, 'li')
+        li = etree.SubElement(parent, 'li')
         self.parser.parseBlocks(li, [block])
- 
+
     def get_level(self, parent, block):
         """ Get level of indent based on list level. """
         # Get indent level
         m = self.INDENT_RE.match(block)
         if m:
-            indent_level = len(m.group(1))/markdown.TAB_LENGTH
+            indent_level = len(m.group(1))/self.tab_length
         else:
             indent_level = 0
         if self.parser.state.isstate('list'):
@@ -172,7 +229,8 @@ class ListIndentProcessor(BlockProcessor):
         # Step through children of tree to find matching indent level.
         while indent_level > level:
             child = self.lastChild(parent)
-            if child and (child.tag in self.LIST_TYPES or child.tag in self.ITEM_TYPES):
+            if (child is not None and
+               (child.tag in self.LIST_TYPES or child.tag in self.ITEM_TYPES)):
                 if child.tag in self.LIST_TYPES:
                     level += 1
                 parent = child
@@ -187,28 +245,30 @@ class CodeBlockProcessor(BlockProcessor):
     """ Process code blocks. """
 
     def test(self, parent, block):
-        return block.startswith(' '*markdown.TAB_LENGTH)
-    
+        return block.startswith(' '*self.tab_length)
+
     def run(self, parent, blocks):
         sibling = self.lastChild(parent)
         block = blocks.pop(0)
         theRest = ''
-        if sibling and sibling.tag == "pre" and len(sibling) \
-                    and sibling[0].tag == "code":
+        if (sibling is not None and sibling.tag == "pre" and
+           len(sibling) and sibling[0].tag == "code"):
             # The previous block was a code block. As blank lines do not start
             # new code blocks, append this block to the previous, adding back
             # linebreaks removed from the split into a list.
             code = sibling[0]
             block, theRest = self.detab(block)
-            code.text = markdown.AtomicString('%s\n%s\n' % (code.text, block.rstrip()))
+            code.text = util.AtomicString(
+                '{}\n{}\n'.format(code.text, util.code_escape(block.rstrip()))
+            )
         else:
             # This is a new codeblock. Create the elements and insert text.
-            pre = markdown.etree.SubElement(parent, 'pre')
-            code = markdown.etree.SubElement(pre, 'code')
+            pre = etree.SubElement(parent, 'pre')
+            code = etree.SubElement(pre, 'code')
             block, theRest = self.detab(block)
-            code.text = markdown.AtomicString('%s\n' % block.rstrip())
+            code.text = util.AtomicString('%s\n' % util.code_escape(block.rstrip()))
         if theRest:
-            # This block contained unindented line(s) after the first indented 
+            # This block contained unindented line(s) after the first indented
             # line. Insert these lines as the first block of the master blocks
             # list for future processing.
             blocks.insert(0, theRest)
@@ -219,27 +279,31 @@ class BlockQuoteProcessor(BlockProcessor):
     RE = re.compile(r'(^|\n)[ ]{0,3}>[ ]?(.*)')
 
     def test(self, parent, block):
-        return bool(self.RE.search(block))
+        return bool(self.RE.search(block)) and not util.nearing_recursion_limit()
 
     def run(self, parent, blocks):
         block = blocks.pop(0)
         m = self.RE.search(block)
         if m:
-            before = block[:m.start()] # Lines before blockquote
-            # Pass lines before blockquote in recursively for parsing forst.
+            before = block[:m.start()]  # Lines before blockquote
+            # Pass lines before blockquote in recursively for parsing first.
             self.parser.parseBlocks(parent, [before])
-            # Remove ``> `` from begining of each line.
-            block = '\n'.join([self.clean(line) for line in 
-                            block[m.start():].split('\n')])
+            # Remove ``> `` from beginning of each line.
+            block = '\n'.join(
+                [self.clean(line) for line in block[m.start():].split('\n')]
+            )
         sibling = self.lastChild(parent)
-        if sibling and sibling.tag == "blockquote":
+        if sibling is not None and sibling.tag == "blockquote":
             # Previous block was a blockquote so set that as this blocks parent
             quote = sibling
         else:
             # This is a new blockquote. Create a new parent element.
-            quote = markdown.etree.SubElement(parent, 'blockquote')
+            quote = etree.SubElement(parent, 'blockquote')
         # Recursively parse block with blockquote as parent.
+        # change parser state so blockquotes embedded in lists use p tags
+        self.parser.state.set('blockquote')
         self.parser.parseChunk(quote, block)
+        self.parser.state.reset()
 
     def clean(self, line):
         """ Remove ``>`` from beginning of a line. """
@@ -251,16 +315,31 @@ class BlockQuoteProcessor(BlockProcessor):
         else:
             return line
 
+
 class OListProcessor(BlockProcessor):
     """ Process ordered list blocks. """
 
     TAG = 'ol'
-    # Detect an item (``1. item``). ``group(1)`` contains contents of item.
-    RE = re.compile(r'^[ ]{0,3}\d+\.[ ]+(.*)')
-    # Detect items on secondary lines. they can be of either list type.
-    CHILD_RE = re.compile(r'^[ ]{0,3}((\d+\.)|[*+-])[ ]+(.*)')
-    # Detect indented (nested) items of either type
-    INDENT_RE = re.compile(r'^[ ]{4,7}((\d+\.)|[*+-])[ ]+.*')
+    # The integer (python string) with which the lists starts (default=1)
+    # Eg: If list is initialized as)
+    #   3. Item
+    # The ol tag will get starts="3" attribute
+    STARTSWITH = '1'
+    # Lazy ol - ignore startswith
+    LAZY_OL = True
+    # List of allowed sibling tags.
+    SIBLING_TAGS = ['ol', 'ul']
+
+    def __init__(self, parser):
+        super().__init__(parser)
+        # Detect an item (``1. item``). ``group(1)`` contains contents of item.
+        self.RE = re.compile(r'^[ ]{0,%d}\d+\.[ ]+(.*)' % (self.tab_length - 1))
+        # Detect items on secondary lines. they can be of either list type.
+        self.CHILD_RE = re.compile(r'^[ ]{0,%d}((\d+\.)|[*+-])[ ]+(.*)' %
+                                   (self.tab_length - 1))
+        # Detect indented (nested) items of either type
+        self.INDENT_RE = re.compile(r'^[ ]{%d,%d}((\d+\.)|[*+-])[ ]+.*' %
+                                    (self.tab_length, self.tab_length * 2 - 1))
 
     def test(self, parent, block):
         return bool(self.RE.match(block))
@@ -269,33 +348,58 @@ class OListProcessor(BlockProcessor):
         # Check fr multiple items in one block.
         items = self.get_items(blocks.pop(0))
         sibling = self.lastChild(parent)
-        if sibling and sibling.tag in ['ol', 'ul']:
+
+        if sibling is not None and sibling.tag in self.SIBLING_TAGS:
             # Previous block was a list item, so set that as parent
             lst = sibling
-            # make sure previous item is in a p.
-            if len(lst) and lst[-1].text and not len(lst[-1]):
-                p = markdown.etree.SubElement(lst[-1], 'p')
+            # make sure previous item is in a p- if the item has text,
+            # then it isn't in a p
+            if lst[-1].text:
+                # since it's possible there are other children for this
+                # sibling, we can't just SubElement the p, we need to
+                # insert it as the first item.
+                p = etree.Element('p')
                 p.text = lst[-1].text
                 lst[-1].text = ''
+                lst[-1].insert(0, p)
+            # if the last item has a tail, then the tail needs to be put in a p
+            # likely only when a header is not followed by a blank line
+            lch = self.lastChild(lst[-1])
+            if lch is not None and lch.tail:
+                p = etree.SubElement(lst[-1], 'p')
+                p.text = lch.tail.lstrip()
+                lch.tail = ''
+
             # parse first block differently as it gets wrapped in a p.
-            li = markdown.etree.SubElement(lst, 'li')
+            li = etree.SubElement(lst, 'li')
             self.parser.state.set('looselist')
             firstitem = items.pop(0)
             self.parser.parseBlocks(li, [firstitem])
             self.parser.state.reset()
+        elif parent.tag in ['ol', 'ul']:
+            # this catches the edge case of a multi-item indented list whose
+            # first item is in a blank parent-list item:
+            # * * subitem1
+            #     * subitem2
+            # see also ListIndentProcessor
+            lst = parent
         else:
             # This is a new list so create parent with appropriate tag.
-            lst = markdown.etree.SubElement(parent, self.TAG)
+            lst = etree.SubElement(parent, self.TAG)
+            # Check if a custom start integer is set
+            if not self.LAZY_OL and self.STARTSWITH != '1':
+                lst.attrib['start'] = self.STARTSWITH
+
         self.parser.state.set('list')
         # Loop through items in block, recursively parsing each with the
         # appropriate parent.
         for item in items:
-            if item.startswith(' '*markdown.TAB_LENGTH):
+            if item.startswith(' '*self.tab_length):
                 # Item is indented. Parse with last item as parent
                 self.parser.parseBlocks(lst[-1], [item])
             else:
                 # New item. Create li and parse with it as parent
-                li = markdown.etree.SubElement(lst, 'li')
+                li = etree.SubElement(lst, 'li')
                 self.parser.parseBlocks(li, [item])
         self.parser.state.reset()
 
@@ -305,18 +409,24 @@ class OListProcessor(BlockProcessor):
         for line in block.split('\n'):
             m = self.CHILD_RE.match(line)
             if m:
-                # This is a new item. Append
+                # This is a new list item
+                # Check first item for the start index
+                if not items and self.TAG == 'ol':
+                    # Detect the integer value of first list item
+                    INTEGER_RE = re.compile(r'(\d+)')
+                    self.STARTSWITH = INTEGER_RE.match(m.group(1)).group()
+                # Append to the list
                 items.append(m.group(3))
             elif self.INDENT_RE.match(line):
                 # This is an indented (possibly nested) item.
-                if items[-1].startswith(' '*markdown.TAB_LENGTH):
+                if items[-1].startswith(' '*self.tab_length):
                     # Previous item was indented. Append to that item.
-                    items[-1] = '%s\n%s' % (items[-1], line)
+                    items[-1] = '{}\n{}'.format(items[-1], line)
                 else:
                     items.append(line)
             else:
                 # This is another line of previous item. Append to that item.
-                items[-1] = '%s\n%s' % (items[-1], line)
+                items[-1] = '{}\n{}'.format(items[-1], line)
         return items
 
 
@@ -324,14 +434,18 @@ class UListProcessor(OListProcessor):
     """ Process unordered list blocks. """
 
     TAG = 'ul'
-    RE = re.compile(r'^[ ]{0,3}[*+-][ ]+(.*)')
+
+    def __init__(self, parser):
+        super().__init__(parser)
+        # Detect an item (``1. item``). ``group(1)`` contains contents of item.
+        self.RE = re.compile(r'^[ ]{0,%d}[*+-][ ]+(.*)' % (self.tab_length - 1))
 
 
 class HashHeaderProcessor(BlockProcessor):
     """ Process Hash Headers. """
 
     # Detect a header at start of any line in block
-    RE = re.compile(r'(^|\n)(?P<level>#{1,6})(?P<header>.*?)#*(\n|$)')
+    RE = re.compile(r'(?:^|\n)(?P<level>#{1,6})(?P<header>(?:\\.|[^\\])*?)#*(?:\n|$)')
 
     def test(self, parent, block):
         return bool(self.RE.search(block))
@@ -340,29 +454,29 @@ class HashHeaderProcessor(BlockProcessor):
         block = blocks.pop(0)
         m = self.RE.search(block)
         if m:
-            before = block[:m.start()] # All lines before header
-            after = block[m.end():]    # All lines after header
+            before = block[:m.start()]  # All lines before header
+            after = block[m.end():]     # All lines after header
             if before:
                 # As the header was not the first line of the block and the
                 # lines before the header must be parsed first,
                 # recursively parse this lines as a block.
                 self.parser.parseBlocks(parent, [before])
             # Create header using named groups from RE
-            h = markdown.etree.SubElement(parent, 'h%d' % len(m.group('level')))
+            h = etree.SubElement(parent, 'h%d' % len(m.group('level')))
             h.text = m.group('header').strip()
             if after:
                 # Insert remaining lines as first block for future parsing.
                 blocks.insert(0, after)
-        else:
+        else:  # pragma: no cover
             # This should never happen, but just in case...
-            message(CRITICAL, "We've got a problem header!")
+            logger.warn("We've got a problem header: %r" % block)
 
 
 class SetextHeaderProcessor(BlockProcessor):
     """ Process Setext-style Headers. """
 
     # Detect Setext-style header. Must be first 2 lines of block.
-    RE = re.compile(r'^.*?\n[=-]{3,}', re.MULTILINE)
+    RE = re.compile(r'^.*?\n[=-]+[ ]*(\n|$)', re.MULTILINE)
 
     def test(self, parent, block):
         return bool(self.RE.match(block))
@@ -374,7 +488,7 @@ class SetextHeaderProcessor(BlockProcessor):
             level = 1
         else:
             level = 2
-        h = markdown.etree.SubElement(parent, 'h%d' % level)
+        h = etree.SubElement(parent, 'h%d' % level)
         h.text = lines[0].strip()
         if len(lines) > 2:
             # Block contains additional lines. Add to  master blocks for later.
@@ -384,58 +498,91 @@ class SetextHeaderProcessor(BlockProcessor):
 class HRProcessor(BlockProcessor):
     """ Process Horizontal Rules. """
 
-    RE = r'[ ]{0,3}(?P<ch>[*_-])[ ]?((?P=ch)[ ]?){2,}[ ]*'
+    # Python's re module doesn't officially support atomic grouping. However you can fake it.
+    # See https://stackoverflow.com/a/13577411/866026
+    RE = r'^[ ]{0,3}(?=(?P<atomicgroup>(-+[ ]{0,2}){3,}|(_+[ ]{0,2}){3,}|(\*+[ ]{0,2}){3,}))(?P=atomicgroup)[ ]*$'
     # Detect hr on any line of a block.
-    SEARCH_RE = re.compile(r'(^|\n)%s(\n|$)' % RE)
-    # Match a hr on a single line of text.
-    MATCH_RE = re.compile(r'^%s$' % RE)
+    SEARCH_RE = re.compile(RE, re.MULTILINE)
 
     def test(self, parent, block):
-        return bool(self.SEARCH_RE.search(block))
+        m = self.SEARCH_RE.search(block)
+        if m:
+            # Save match object on class instance so we can use it later.
+            self.match = m
+            return True
+        return False
 
     def run(self, parent, blocks):
-        lines = blocks.pop(0).split('\n')
-        prelines = []
+        block = blocks.pop(0)
+        match = self.match
         # Check for lines in block before hr.
-        for line in lines:
-            m = self.MATCH_RE.match(line)
-            if m:
-                break
-            else:
-                prelines.append(line)
-        if len(prelines):
+        prelines = block[:match.start()].rstrip('\n')
+        if prelines:
             # Recursively parse lines before hr so they get parsed first.
-            self.parser.parseBlocks(parent, ['\n'.join(prelines)])
+            self.parser.parseBlocks(parent, [prelines])
         # create hr
-        hr = markdown.etree.SubElement(parent, 'hr')
+        etree.SubElement(parent, 'hr')
         # check for lines in block after hr.
-        lines = lines[len(prelines)+1:]
-        if len(lines):
+        postlines = block[match.end():].lstrip('\n')
+        if postlines:
             # Add lines after hr to master blocks for later parsing.
-            blocks.insert(0, '\n'.join(lines))
+            blocks.insert(0, postlines)
 
 
 class EmptyBlockProcessor(BlockProcessor):
-    """ Process blocks and start with an empty line. """
+    """ Process blocks that are empty or start with an empty line. """
 
-    # Detect a block that only contains whitespace 
-    # or only whitespace on the first line.
-    RE = re.compile(r'^\s*\n')
+    def test(self, parent, block):
+        return not block or block.startswith('\n')
+
+    def run(self, parent, blocks):
+        block = blocks.pop(0)
+        filler = '\n\n'
+        if block:
+            # Starts with empty line
+            # Only replace a single line.
+            filler = '\n'
+            # Save the rest for later.
+            theRest = block[1:]
+            if theRest:
+                # Add remaining lines to master blocks for later.
+                blocks.insert(0, theRest)
+        sibling = self.lastChild(parent)
+        if (sibling is not None and sibling.tag == 'pre' and
+           len(sibling) and sibling[0].tag == 'code'):
+            # Last block is a codeblock. Append to preserve whitespace.
+            sibling[0].text = util.AtomicString(
+                '{}{}'.format(sibling[0].text, filler)
+            )
+
+
+class ReferenceProcessor(BlockProcessor):
+    """ Process link references. """
+    RE = re.compile(
+        r'^[ ]{0,3}\[([^\[\]]*)\]:[ ]*\n?[ ]*([^\s]+)[ ]*(?:\n[ ]*)?((["\'])(.*)\4[ ]*|\((.*)\)[ ]*)?$', re.MULTILINE
+    )
 
     def test(self, parent, block):
-        return bool(self.RE.match(block))
+        return True
 
     def run(self, parent, blocks):
         block = blocks.pop(0)
-        m = self.RE.match(block)
+        m = self.RE.search(block)
         if m:
-            # Add remaining line to master blocks for later.
-            blocks.insert(0, block[m.end():])
-            sibling = self.lastChild(parent)
-            if sibling and sibling.tag == 'pre' and sibling[0] and \
-                    sibling[0].tag == 'code':
-                # Last block is a codeblock. Append to preserve whitespace.
-                sibling[0].text = markdown.AtomicString('%s/n/n/n' % sibling[0].text )
+            id = m.group(1).strip().lower()
+            link = m.group(2).lstrip('<').rstrip('>')
+            title = m.group(5) or m.group(6)
+            self.parser.md.references[id] = (link, title)
+            if block[m.end():].strip():
+                # Add any content after match back to blocks as separate block
+                blocks.insert(0, block[m.end():].lstrip('\n'))
+            if block[:m.start()].strip():
+                # Add any content before match back to blocks as separate block
+                blocks.insert(0, block[:m.start()].rstrip('\n'))
+            return True
+        # No match. Restore block.
+        blocks.insert(0, block)
+        return False
 
 
 class ParagraphProcessor(BlockProcessor):
@@ -449,12 +596,28 @@ class ParagraphProcessor(BlockProcessor):
         if block.strip():
             # Not a blank block. Add to parent, otherwise throw it away.
             if self.parser.state.isstate('list'):
-                # The parent is a tight-list. Append to parent.text
-                if parent.text:
-                    parent.text = '%s\n%s' % (parent.text, block)
+                # The parent is a tight-list.
+                #
+                # Check for any children. This will likely only happen in a
+                # tight-list when a header isn't followed by a blank line.
+                # For example:
+                #
+                #     * # Header
+                #     Line 2 of list item - not part of header.
+                sibling = self.lastChild(parent)
+                if sibling is not None:
+                    # Insetrt after sibling.
+                    if sibling.tail:
+                        sibling.tail = '{}\n{}'.format(sibling.tail, block)
+                    else:
+                        sibling.tail = '\n%s' % block
                 else:
-                    parent.text = block.lstrip()
+                    # Append to parent.text
+                    if parent.text:
+                        parent.text = '{}\n{}'.format(parent.text, block)
+                    else:
+                        parent.text = block.lstrip()
             else:
                 # Create a regular paragraph
-                p = markdown.etree.SubElement(parent, 'p')
+                p = etree.SubElement(parent, 'p')
                 p.text = block.lstrip()