summaryrefslogtreecommitdiff
path: root/mako/ext/extract.py
diff options
context:
space:
mode:
Diffstat (limited to 'mako/ext/extract.py')
-rw-r--r--mako/ext/extract.py129
1 files changed, 129 insertions, 0 deletions
diff --git a/mako/ext/extract.py b/mako/ext/extract.py
new file mode 100644
index 0000000..fa7fffa
--- /dev/null
+++ b/mako/ext/extract.py
@@ -0,0 +1,129 @@
+# ext/extract.py
+# Copyright 2006-2023 the Mako authors and contributors <see AUTHORS file>
+#
+# This module is part of Mako and is released under
+# the MIT License: http://www.opensource.org/licenses/mit-license.php
+
+from io import BytesIO
+from io import StringIO
+import re
+
+from mako import lexer
+from mako import parsetree
+
+
+class MessageExtractor:
+ use_bytes = True
+
+ def process_file(self, fileobj):
+ template_node = lexer.Lexer(
+ fileobj.read(), input_encoding=self.config["encoding"]
+ ).parse()
+ yield from self.extract_nodes(template_node.get_children())
+
+ def extract_nodes(self, nodes):
+ translator_comments = []
+ in_translator_comments = False
+ input_encoding = self.config["encoding"] or "ascii"
+ comment_tags = list(
+ filter(None, re.split(r"\s+", self.config["comment-tags"]))
+ )
+
+ for node in nodes:
+ child_nodes = None
+ if (
+ in_translator_comments
+ and isinstance(node, parsetree.Text)
+ and not node.content.strip()
+ ):
+ # Ignore whitespace within translator comments
+ continue
+
+ if isinstance(node, parsetree.Comment):
+ value = node.text.strip()
+ if in_translator_comments:
+ translator_comments.extend(
+ self._split_comment(node.lineno, value)
+ )
+ continue
+ for comment_tag in comment_tags:
+ if value.startswith(comment_tag):
+ in_translator_comments = True
+ translator_comments.extend(
+ self._split_comment(node.lineno, value)
+ )
+ continue
+
+ if isinstance(node, parsetree.DefTag):
+ code = node.function_decl.code
+ child_nodes = node.nodes
+ elif isinstance(node, parsetree.BlockTag):
+ code = node.body_decl.code
+ child_nodes = node.nodes
+ elif isinstance(node, parsetree.CallTag):
+ code = node.code.code
+ child_nodes = node.nodes
+ elif isinstance(node, parsetree.PageTag):
+ code = node.body_decl.code
+ elif isinstance(node, parsetree.CallNamespaceTag):
+ code = node.expression
+ child_nodes = node.nodes
+ elif isinstance(node, parsetree.ControlLine):
+ if node.isend:
+ in_translator_comments = False
+ continue
+ code = node.text
+ elif isinstance(node, parsetree.Code):
+ in_translator_comments = False
+ code = node.code.code
+ elif isinstance(node, parsetree.Expression):
+ code = node.code.code
+ else:
+ continue
+
+ # Comments don't apply unless they immediately precede the message
+ if (
+ translator_comments
+ and translator_comments[-1][0] < node.lineno - 1
+ ):
+ translator_comments = []
+
+ translator_strings = [
+ comment[1] for comment in translator_comments
+ ]
+
+ if isinstance(code, str) and self.use_bytes:
+ code = code.encode(input_encoding, "backslashreplace")
+
+ used_translator_comments = False
+ # We add extra newline to work around a pybabel bug
+ # (see python-babel/babel#274, parse_encoding dies if the first
+ # input string of the input is non-ascii)
+ # Also, because we added it, we have to subtract one from
+ # node.lineno
+ if self.use_bytes:
+ code = BytesIO(b"\n" + code)
+ else:
+ code = StringIO("\n" + code)
+
+ for message in self.process_python(
+ code, node.lineno - 1, translator_strings
+ ):
+ yield message
+ used_translator_comments = True
+
+ if used_translator_comments:
+ translator_comments = []
+ in_translator_comments = False
+
+ if child_nodes:
+ yield from self.extract_nodes(child_nodes)
+
+ @staticmethod
+ def _split_comment(lineno, comment):
+ """Return the multiline comment at lineno split into a list of
+ comment line numbers and the accompanying comment line"""
+ return [
+ (lineno + index, line)
+ for index, line in enumerate(comment.splitlines())
+ ]