1 files changed, 129 insertions, 0 deletions
diff --git a/mako/ext/extract.py b/mako/ext/extract.py
new file mode 100644
index 0000000..fa7fffa
--- /dev/null
+++ b/mako/ext/extract.py
@@ -0,0 +1,129 @@
+# ext/extract.py
+# Copyright 2006-2023 the Mako authors and contributors <see AUTHORS file>
+#
+# This module is part of Mako and is released under
+# the MIT License: http://www.opensource.org/licenses/mit-license.php
+
+from io import BytesIO
+from io import StringIO
+import re
+
+from mako import lexer
+from mako import parsetree
+
+
+class MessageExtractor:
+    use_bytes = True
+
+    def process_file(self, fileobj):
+        template_node = lexer.Lexer(
+            fileobj.read(), input_encoding=self.config["encoding"]
+        ).parse()
+        yield from self.extract_nodes(template_node.get_children())
+
+    def extract_nodes(self, nodes):
+        translator_comments = []
+        in_translator_comments = False
+        input_encoding = self.config["encoding"] or "ascii"
+        comment_tags = list(
+            filter(None, re.split(r"\s+", self.config["comment-tags"]))
+        )
+
+        for node in nodes:
+            child_nodes = None
+            if (
+                in_translator_comments
+                and isinstance(node, parsetree.Text)
+                and not node.content.strip()
+            ):
+                # Ignore whitespace within translator comments
+                continue
+
+            if isinstance(node, parsetree.Comment):
+                value = node.text.strip()
+                if in_translator_comments:
+                    translator_comments.extend(
+                        self._split_comment(node.lineno, value)
+                    )
+                    continue
+                for comment_tag in comment_tags:
+                    if value.startswith(comment_tag):
+                        in_translator_comments = True
+                        translator_comments.extend(
+                            self._split_comment(node.lineno, value)
+                        )
+                continue
+
+            if isinstance(node, parsetree.DefTag):
+                code = node.function_decl.code
+                child_nodes = node.nodes
+            elif isinstance(node, parsetree.BlockTag):
+                code = node.body_decl.code
+                child_nodes = node.nodes
+            elif isinstance(node, parsetree.CallTag):
+                code = node.code.code
+                child_nodes = node.nodes
+            elif isinstance(node, parsetree.PageTag):
+                code = node.body_decl.code
+            elif isinstance(node, parsetree.CallNamespaceTag):
+                code = node.expression
+                child_nodes = node.nodes
+            elif isinstance(node, parsetree.ControlLine):
+                if node.isend:
+                    in_translator_comments = False
+                    continue
+                code = node.text
+            elif isinstance(node, parsetree.Code):
+                in_translator_comments = False
+                code = node.code.code
+            elif isinstance(node, parsetree.Expression):
+                code = node.code.code
+            else:
+                continue
+
+            # Comments don't apply unless they immediately precede the message
+            if (
+                translator_comments
+                and translator_comments[-1][0] < node.lineno - 1
+            ):
+                translator_comments = []
+
+            translator_strings = [
+                comment[1] for comment in translator_comments
+            ]
+
+            if isinstance(code, str) and self.use_bytes:
+                code = code.encode(input_encoding, "backslashreplace")
+
+            used_translator_comments = False
+            # We add extra newline to work around a pybabel bug
+            # (see python-babel/babel#274, parse_encoding dies if the first
+            # input string of the input is non-ascii)
+            # Also, because we added it, we have to subtract one from
+            # node.lineno
+            if self.use_bytes:
+                code = BytesIO(b"\n" + code)
+            else:
+                code = StringIO("\n" + code)
+
+            for message in self.process_python(
+                code, node.lineno - 1, translator_strings
+            ):
+                yield message
+                used_translator_comments = True
+
+            if used_translator_comments:
+                translator_comments = []
+            in_translator_comments = False
+
+            if child_nodes:
+                yield from self.extract_nodes(child_nodes)
+
+    @staticmethod
+    def _split_comment(lineno, comment):
+        """Return the multiline comment at lineno split into a list of
+        comment line numbers and the accompanying comment line"""
+        return [
+            (lineno + index, line)
+            for index, line in enumerate(comment.splitlines())
+        ]