diff options
Diffstat (limited to 'mako/filters.py')
-rw-r--r-- | mako/filters.py | 163 |
1 files changed, 163 insertions, 0 deletions
diff --git a/mako/filters.py b/mako/filters.py new file mode 100644 index 0000000..b255aaf --- /dev/null +++ b/mako/filters.py @@ -0,0 +1,163 @@ +# mako/filters.py +# Copyright 2006-2023 the Mako authors and contributors <see AUTHORS file> +# +# This module is part of Mako and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + + +import codecs +from html.entities import codepoint2name +from html.entities import name2codepoint +import re +from urllib.parse import quote_plus + +import markupsafe + +html_escape = markupsafe.escape + +xml_escapes = { + "&": "&", + ">": ">", + "<": "<", + '"': """, # also " in html-only + "'": "'", # also ' in html-only +} + + +def xml_escape(string): + return re.sub(r'([&<"\'>])', lambda m: xml_escapes[m.group()], string) + + +def url_escape(string): + # convert into a list of octets + string = string.encode("utf8") + return quote_plus(string) + + +def trim(string): + return string.strip() + + +class Decode: + def __getattr__(self, key): + def decode(x): + if isinstance(x, str): + return x + elif not isinstance(x, bytes): + return decode(str(x)) + else: + return str(x, encoding=key) + + return decode + + +decode = Decode() + + +class XMLEntityEscaper: + def __init__(self, codepoint2name, name2codepoint): + self.codepoint2entity = { + c: str("&%s;" % n) for c, n in codepoint2name.items() + } + self.name2codepoint = name2codepoint + + def escape_entities(self, text): + """Replace characters with their character entity references. + + Only characters corresponding to a named entity are replaced. + """ + return str(text).translate(self.codepoint2entity) + + def __escape(self, m): + codepoint = ord(m.group()) + try: + return self.codepoint2entity[codepoint] + except (KeyError, IndexError): + return "&#x%X;" % codepoint + + __escapable = re.compile(r'["&<>]|[^\x00-\x7f]') + + def escape(self, text): + """Replace characters with their character references. + + Replace characters by their named entity references. + Non-ASCII characters, if they do not have a named entity reference, + are replaced by numerical character references. + + The return value is guaranteed to be ASCII. + """ + return self.__escapable.sub(self.__escape, str(text)).encode("ascii") + + # XXX: This regexp will not match all valid XML entity names__. + # (It punts on details involving involving CombiningChars and Extenders.) + # + # .. __: http://www.w3.org/TR/2000/REC-xml-20001006#NT-EntityRef + __characterrefs = re.compile( + r"""& (?: + \#(\d+) + | \#x([\da-f]+) + | ( (?!\d) [:\w] [-.:\w]+ ) + ) ;""", + re.X | re.UNICODE, + ) + + def __unescape(self, m): + dval, hval, name = m.groups() + if dval: + codepoint = int(dval) + elif hval: + codepoint = int(hval, 16) + else: + codepoint = self.name2codepoint.get(name, 0xFFFD) + # U+FFFD = "REPLACEMENT CHARACTER" + if codepoint < 128: + return chr(codepoint) + return chr(codepoint) + + def unescape(self, text): + """Unescape character references. + + All character references (both entity references and numerical + character references) are unescaped. + """ + return self.__characterrefs.sub(self.__unescape, text) + + +_html_entities_escaper = XMLEntityEscaper(codepoint2name, name2codepoint) + +html_entities_escape = _html_entities_escaper.escape_entities +html_entities_unescape = _html_entities_escaper.unescape + + +def htmlentityreplace_errors(ex): + """An encoding error handler. + + This python codecs error handler replaces unencodable + characters with HTML entities, or, if no HTML entity exists for + the character, XML character references:: + + >>> 'The cost was \u20ac12.'.encode('latin1', 'htmlentityreplace') + 'The cost was €12.' + """ + if isinstance(ex, UnicodeEncodeError): + # Handle encoding errors + bad_text = ex.object[ex.start : ex.end] + text = _html_entities_escaper.escape(bad_text) + return (str(text), ex.end) + raise ex + + +codecs.register_error("htmlentityreplace", htmlentityreplace_errors) + + +DEFAULT_ESCAPES = { + "x": "filters.xml_escape", + "h": "filters.html_escape", + "u": "filters.url_escape", + "trim": "filters.trim", + "entity": "filters.html_entities_escape", + "unicode": "str", + "decode": "decode", + "str": "str", + "n": "n", +} |