1 files changed, 92 insertions, 32 deletions
diff --git a/markdown/postprocessors.py b/markdown/postprocessors.py
index 80227bb..498f7e8 100644
--- a/markdown/postprocessors.py
+++ b/markdown/postprocessors.py
@@ -1,4 +1,23 @@
 """
+Python Markdown
+
+A Python implementation of John Gruber's Markdown.
+
+Documentation: https://python-markdown.github.io/
+GitHub: https://github.com/Python-Markdown/markdown/
+PyPI: https://pypi.org/project/Markdown/
+
+Started by Manfred Stienstra (http://www.dwerg.net/).
+Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
+Currently maintained by Waylan Limberg (https://github.com/waylan),
+Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
+
+Copyright 2007-2018 The Python Markdown Project (v. 1.7 and later)
+Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
+Copyright 2004 Manfred Stienstra (the original version)
+
+License: BSD (see LICENSE.md for details).
+
 POST-PROCESSORS
 =============================================================================
 
@@ -8,15 +27,20 @@ processing.
 
 """
 
+from collections import OrderedDict
+from . import util
+import re
+
 
-import markdown
+def build_postprocessors(md, **kwargs):
+    """ Build the default postprocessors for Markdown. """
+    postprocessors = util.Registry()
+    postprocessors.register(RawHtmlPostprocessor(md), 'raw_html', 30)
+    postprocessors.register(AndSubstitutePostprocessor(), 'amp_substitute', 20)
+    return postprocessors
 
-class Processor:
-    def __init__(self, markdown_instance=None):
-        if markdown_instance:
-            self.markdown = markdown_instance
 
-class Postprocessor(Processor):
+class Postprocessor(util.Processor):
     """
     Postprocessors are run after the ElementTree it converted back into text.
 
@@ -34,44 +58,80 @@ class Postprocessor(Processor):
         (possibly modified) string.
 
         """
-        pass
+        pass  # pragma: no cover
 
 
 class RawHtmlPostprocessor(Postprocessor):
     """ Restore raw html to the document. """
 
+    BLOCK_LEVEL_REGEX = re.compile(r'^\<\/?([^ >]+)')
+
     def run(self, text):
-        """ Iterate over html stash and restore "safe" html. """
-        for i in range(self.markdown.htmlStash.html_counter):
-            html, safe  = self.markdown.htmlStash.rawHtmlBlocks[i]
-            if self.markdown.safeMode and not safe:
-                if str(self.markdown.safeMode).lower() == 'escape':
-                    html = self.escape(html)
-                elif str(self.markdown.safeMode).lower() == 'remove':
-                    html = ''
+        """ Iterate over html stash and restore html. """
+        replacements = OrderedDict()
+        for i in range(self.md.htmlStash.html_counter):
+            html = self.stash_to_string(self.md.htmlStash.rawHtmlBlocks[i])
+            if self.isblocklevel(html):
+                replacements["<p>{}</p>".format(
+                    self.md.htmlStash.get_placeholder(i))] = html
+            replacements[self.md.htmlStash.get_placeholder(i)] = html
+
+        def substitute_match(m):
+            key = m.group(0)
+
+            if key not in replacements:
+                if key[3:-4] in replacements:
+                    return f'<p>{ replacements[key[3:-4]] }</p>'
                 else:
-                    html = markdown.HTML_REMOVED_TEXT
-            if safe or not self.markdown.safeMode:
-                text = text.replace("<p>%s</p>" % 
-                            (markdown.preprocessors.HTML_PLACEHOLDER % i),
-                            html + "\n")
-            text =  text.replace(markdown.preprocessors.HTML_PLACEHOLDER % i, 
-                                 html)
-        return text
+                    return key
+
+            return replacements[key]
 
-    def escape(self, html):
-        """ Basic html escaping """
-        html = html.replace('&', '&amp;')
-        html = html.replace('<', '&lt;')
-        html = html.replace('>', '&gt;')
-        return html.replace('"', '&quot;')
+        if replacements:
+            base_placeholder = util.HTML_PLACEHOLDER % r'([0-9]+)'
+            pattern = re.compile(f'<p>{ base_placeholder }</p>|{ base_placeholder }')
+            processed_text = pattern.sub(substitute_match, text)
+        else:
+            return text
+
+        if processed_text == text:
+            return processed_text
+        else:
+            return self.run(processed_text)
+
+    def isblocklevel(self, html):
+        m = self.BLOCK_LEVEL_REGEX.match(html)
+        if m:
+            if m.group(1)[0] in ('!', '?', '@', '%'):
+                # Comment, php etc...
+                return True
+            return self.md.is_block_level(m.group(1))
+        return False
+
+    def stash_to_string(self, text):
+        """ Convert a stashed object to a string. """
+        return str(text)
 
 
 class AndSubstitutePostprocessor(Postprocessor):
     """ Restore valid entities """
-    def __init__(self):
-        pass
 
     def run(self, text):
-        text =  text.replace(markdown.AMP_SUBSTITUTE, "&")
+        text = text.replace(util.AMP_SUBSTITUTE, "&")
         return text
+
+
+@util.deprecated(
+    "This class will be removed in the future; "
+    "use 'treeprocessors.UnescapeTreeprocessor' instead."
+)
+class UnescapePostprocessor(Postprocessor):
+    """ Restore escaped chars """
+
+    RE = re.compile(r'{}(\d+){}'.format(util.STX, util.ETX))
+
+    def unescape(self, m):
+        return chr(int(m.group(1)))
+
+    def run(self, text):
+        return self.RE.sub(self.unescape, text)