summaryrefslogtreecommitdiff
path: root/tools/yapf/yapf/yapflib/pytree_unwrapper.py
diff options
context:
space:
mode:
Diffstat (limited to 'tools/yapf/yapf/yapflib/pytree_unwrapper.py')
-rw-r--r--tools/yapf/yapf/yapflib/pytree_unwrapper.py376
1 files changed, 376 insertions, 0 deletions
diff --git a/tools/yapf/yapf/yapflib/pytree_unwrapper.py b/tools/yapf/yapf/yapflib/pytree_unwrapper.py
new file mode 100644
index 00000000..c67c1c6e
--- /dev/null
+++ b/tools/yapf/yapf/yapflib/pytree_unwrapper.py
@@ -0,0 +1,376 @@
+# Copyright 2015-2017 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""PyTreeUnwrapper - produces a list of unwrapped lines from a pytree.
+
+[for a description of what an unwrapped line is, see unwrapped_line.py]
+
+This is a pytree visitor that goes over a parse tree and produces a list of
+UnwrappedLine containers from it, each with its own depth and containing all
+the tokens that could fit on the line if there were no maximal line-length
+limitations.
+
+Note: a precondition to running this visitor and obtaining correct results is
+for the tree to have its comments spliced in as nodes. Prefixes are ignored.
+
+For most uses, the convenience function UnwrapPyTree should be sufficient.
+"""
+
+# The word "token" is overloaded within this module, so for clarity rename
+# the imported pgen2.token module.
+from lib2to3 import pytree
+from lib2to3.pgen2 import token as grammar_token
+
+from yapf.yapflib import pytree_utils
+from yapf.yapflib import pytree_visitor
+from yapf.yapflib import split_penalty
+from yapf.yapflib import unwrapped_line
+
+
+def UnwrapPyTree(tree):
+ """Create and return a list of unwrapped lines from the given pytree.
+
+ Arguments:
+ tree: the top-level pytree node to unwrap.
+
+ Returns:
+ A list of UnwrappedLine objects.
+ """
+ unwrapper = PyTreeUnwrapper()
+ unwrapper.Visit(tree)
+ uwlines = unwrapper.GetUnwrappedLines()
+ uwlines.sort(key=lambda x: x.lineno)
+ return uwlines
+
+
+# Grammar tokens considered as whitespace for the purpose of unwrapping.
+_WHITESPACE_TOKENS = frozenset([
+ grammar_token.NEWLINE, grammar_token.DEDENT, grammar_token.INDENT,
+ grammar_token.ENDMARKER
+])
+
+
+class PyTreeUnwrapper(pytree_visitor.PyTreeVisitor):
+ """PyTreeUnwrapper - see file-level docstring for detailed description.
+
+ Note: since this implements PyTreeVisitor and node names in lib2to3 are
+ underscore_separated, the visiting methods of this class are named as
+ Visit_node_name. invalid-name pragmas are added to each such method to silence
+ a style warning. This is forced on us by the usage of lib2to3, and re-munging
+ method names to make them different from actual node names sounded like a
+ confusing and brittle affair that wasn't worth it for this small & controlled
+ deviation from the style guide.
+
+ To understand the connection between visitor methods in this class, some
+ familiarity with the Python grammar is required.
+ """
+
+ def __init__(self):
+ # A list of all unwrapped lines finished visiting so far.
+ self._unwrapped_lines = []
+
+ # Builds up a "current" unwrapped line while visiting pytree nodes. Some
+ # nodes will finish a line and start a new one.
+ self._cur_unwrapped_line = unwrapped_line.UnwrappedLine(0)
+
+ # Current indentation depth.
+ self._cur_depth = 0
+
+ def GetUnwrappedLines(self):
+ """Fetch the result of the tree walk.
+
+ Note: only call this after visiting the whole tree.
+
+ Returns:
+ A list of UnwrappedLine objects.
+ """
+ # Make sure the last line that was being populated is flushed.
+ self._StartNewLine()
+ return self._unwrapped_lines
+
+ def _StartNewLine(self):
+ """Finish current line and start a new one.
+
+ Place the currently accumulated line into the _unwrapped_lines list and
+ start a new one.
+ """
+ if self._cur_unwrapped_line.tokens:
+ self._unwrapped_lines.append(self._cur_unwrapped_line)
+ _MatchBrackets(self._cur_unwrapped_line)
+ _AdjustSplitPenalty(self._cur_unwrapped_line)
+ self._cur_unwrapped_line = unwrapped_line.UnwrappedLine(self._cur_depth)
+
+ _STMT_TYPES = frozenset({
+ 'if_stmt',
+ 'while_stmt',
+ 'for_stmt',
+ 'try_stmt',
+ 'expect_clause',
+ 'with_stmt',
+ 'funcdef',
+ 'classdef',
+ })
+
+ # pylint: disable=invalid-name,missing-docstring
+ def Visit_simple_stmt(self, node):
+ # A 'simple_stmt' conveniently represents a non-compound Python statement,
+ # i.e. a statement that does not contain other statements.
+
+ # When compound nodes have a single statement as their suite, the parser
+ # can leave it in the tree directly without creating a suite. But we have
+ # to increase depth in these cases as well. However, don't increase the
+ # depth of we have a simple_stmt that's a comment node. This represents a
+ # standalone comment and in the case of it coming directly after the
+ # funcdef, it is a "top" comment for the whole function.
+ # TODO(eliben): add more relevant compound statements here.
+ single_stmt_suite = (node.parent and
+ pytree_utils.NodeName(node.parent) in self._STMT_TYPES)
+ is_comment_stmt = pytree_utils.IsCommentStatement(node)
+ if single_stmt_suite and not is_comment_stmt:
+ self._cur_depth += 1
+ self._StartNewLine()
+ self.DefaultNodeVisit(node)
+ if single_stmt_suite and not is_comment_stmt:
+ self._cur_depth -= 1
+
+ def _VisitCompoundStatement(self, node, substatement_names):
+ """Helper for visiting compound statements.
+
+ Python compound statements serve as containers for other statements. Thus,
+ when we encounter a new compound statement we start a new unwrapped line.
+
+ Arguments:
+ node: the node to visit.
+ substatement_names: set of node names. A compound statement will be
+ recognized as a NAME node with a name in this set.
+ """
+ for child in node.children:
+ # A pytree is structured in such a way that a single 'if_stmt' node will
+ # contain all the 'if', 'elif' and 'else' nodes as children (similar
+ # structure applies to 'while' statements, 'try' blocks, etc). Therefore,
+ # we visit all children here and create a new line before the requested
+ # set of nodes.
+ if (child.type == grammar_token.NAME and
+ child.value in substatement_names):
+ self._StartNewLine()
+ self.Visit(child)
+
+ _IF_STMT_ELEMS = frozenset({'if', 'else', 'elif'})
+
+ def Visit_if_stmt(self, node): # pylint: disable=invalid-name
+ self._VisitCompoundStatement(node, self._IF_STMT_ELEMS)
+
+ _WHILE_STMT_ELEMS = frozenset({'while', 'else'})
+
+ def Visit_while_stmt(self, node): # pylint: disable=invalid-name
+ self._VisitCompoundStatement(node, self._WHILE_STMT_ELEMS)
+
+ _FOR_STMT_ELEMS = frozenset({'for', 'else'})
+
+ def Visit_for_stmt(self, node): # pylint: disable=invalid-name
+ self._VisitCompoundStatement(node, self._FOR_STMT_ELEMS)
+
+ _TRY_STMT_ELEMS = frozenset({'try', 'except', 'else', 'finally'})
+
+ def Visit_try_stmt(self, node): # pylint: disable=invalid-name
+ self._VisitCompoundStatement(node, self._TRY_STMT_ELEMS)
+
+ _EXCEPT_STMT_ELEMS = frozenset({'except'})
+
+ def Visit_except_clause(self, node): # pylint: disable=invalid-name
+ self._VisitCompoundStatement(node, self._EXCEPT_STMT_ELEMS)
+
+ _FUNC_DEF_ELEMS = frozenset({'def'})
+
+ def Visit_funcdef(self, node): # pylint: disable=invalid-name
+ self._VisitCompoundStatement(node, self._FUNC_DEF_ELEMS)
+
+ def Visit_async_funcdef(self, node): # pylint: disable=invalid-name
+ self._StartNewLine()
+ index = 0
+ for child in node.children:
+ index += 1
+ self.Visit(child)
+ if pytree_utils.NodeName(child) == 'ASYNC':
+ break
+ for child in node.children[index].children:
+ self.Visit(child)
+
+ _CLASS_DEF_ELEMS = frozenset({'class'})
+
+ def Visit_classdef(self, node): # pylint: disable=invalid-name
+ self._VisitCompoundStatement(node, self._CLASS_DEF_ELEMS)
+
+ def Visit_async_stmt(self, node): # pylint: disable=invalid-name
+ self._StartNewLine()
+ index = 0
+ for child in node.children:
+ index += 1
+ self.Visit(child)
+ if pytree_utils.NodeName(child) == 'ASYNC':
+ break
+ for child in node.children[index].children:
+ self.Visit(child)
+
+ def Visit_decorators(self, node): # pylint: disable=invalid-name
+ for child in node.children:
+ self._StartNewLine()
+ self.Visit(child)
+
+ def Visit_decorated(self, node): # pylint: disable=invalid-name
+ for child in node.children:
+ self._StartNewLine()
+ self.Visit(child)
+
+ _WITH_STMT_ELEMS = frozenset({'with'})
+
+ def Visit_with_stmt(self, node): # pylint: disable=invalid-name
+ self._VisitCompoundStatement(node, self._WITH_STMT_ELEMS)
+
+ def Visit_suite(self, node): # pylint: disable=invalid-name
+ # A 'suite' starts a new indentation level in Python.
+ self._cur_depth += 1
+ self._StartNewLine()
+ self.DefaultNodeVisit(node)
+ self._cur_depth -= 1
+
+ def Visit_listmaker(self, node): # pylint: disable=invalid-name
+ _DetermineMustSplitAnnotation(node)
+ self.DefaultNodeVisit(node)
+
+ def Visit_dictsetmaker(self, node): # pylint: disable=invalid-name
+ _DetermineMustSplitAnnotation(node)
+ self.DefaultNodeVisit(node)
+
+ def Visit_import_as_names(self, node): # pylint: disable=invalid-name
+ if node.prev_sibling.value == '(':
+ _DetermineMustSplitAnnotation(node)
+ self.DefaultNodeVisit(node)
+
+ def Visit_testlist_gexp(self, node): # pylint: disable=invalid-name
+ if _ContainsComments(node):
+ _DetermineMustSplitAnnotation(node)
+ self.DefaultNodeVisit(node)
+
+ def Visit_arglist(self, node): # pylint: disable=invalid-name
+ _DetermineMustSplitAnnotation(node)
+ self.DefaultNodeVisit(node)
+
+ def Visit_typedargslist(self, node): # pylint: disable=invalid-name
+ _DetermineMustSplitAnnotation(node)
+ self.DefaultNodeVisit(node)
+
+ def DefaultLeafVisit(self, leaf):
+ """Default visitor for tree leaves.
+
+ A tree leaf is always just gets appended to the current unwrapped line.
+
+ Arguments:
+ leaf: the leaf to visit.
+ """
+ if leaf.type in _WHITESPACE_TOKENS:
+ self._StartNewLine()
+ elif leaf.type != grammar_token.COMMENT or leaf.value.strip():
+ if leaf.value == ';':
+ # Split up multiple statements on one line.
+ self._StartNewLine()
+ else:
+ # Add non-whitespace tokens and comments that aren't empty.
+ self._cur_unwrapped_line.AppendNode(leaf)
+
+
+_BRACKET_MATCH = {')': '(', '}': '{', ']': '['}
+
+
+def _MatchBrackets(uwline):
+ """Visit the node and match the brackets.
+
+ For every open bracket ('[', '{', or '('), find the associated closing bracket
+ and "match" them up. I.e., save in the token a pointer to its associated open
+ or close bracket.
+
+ Arguments:
+ uwline: (UnwrappedLine) An unwrapped line.
+ """
+ bracket_stack = []
+ for token in uwline.tokens:
+ if token.value in pytree_utils.OPENING_BRACKETS:
+ bracket_stack.append(token)
+ elif token.value in pytree_utils.CLOSING_BRACKETS:
+ bracket_stack[-1].matching_bracket = token
+ token.matching_bracket = bracket_stack[-1]
+ bracket_stack.pop()
+
+
+def _AdjustSplitPenalty(uwline):
+ """Visit the node and adjust the split penalties if needed.
+
+ A token shouldn't be split if it's not within a bracket pair. Mark any token
+ that's not within a bracket pair as "unbreakable".
+
+ Arguments:
+ uwline: (UnwrappedLine) An unwrapped line.
+ """
+ bracket_level = 0
+ for index, token in enumerate(uwline.tokens):
+ if index and not bracket_level:
+ pytree_utils.SetNodeAnnotation(token.node,
+ pytree_utils.Annotation.SPLIT_PENALTY,
+ split_penalty.UNBREAKABLE)
+ if token.value in pytree_utils.OPENING_BRACKETS:
+ bracket_level += 1
+ elif token.value in pytree_utils.CLOSING_BRACKETS:
+ bracket_level -= 1
+
+
+def _DetermineMustSplitAnnotation(node):
+ """Enforce a split in the list if the list ends with a comma."""
+ if not _ContainsComments(node):
+ if (not isinstance(node.children[-1], pytree.Leaf) or
+ node.children[-1].value != ','):
+ return
+ num_children = len(node.children)
+ index = 0
+ _SetMustSplitOnFirstLeaf(node.children[0])
+ while index < num_children - 1:
+ child = node.children[index]
+ if isinstance(child, pytree.Leaf) and child.value == ',':
+ next_child = node.children[index + 1]
+ if next_child.type == grammar_token.COMMENT:
+ index += 1
+ if index >= num_children - 1:
+ break
+ _SetMustSplitOnFirstLeaf(node.children[index + 1])
+ index += 1
+
+
+def _ContainsComments(node):
+ """Return True if the list has a comment in it."""
+ if isinstance(node, pytree.Leaf):
+ return node.type == grammar_token.COMMENT
+ for child in node.children:
+ if _ContainsComments(child):
+ return True
+ return False
+
+
+def _SetMustSplitOnFirstLeaf(node):
+ """Set the "must split" annotation on the first leaf node."""
+
+ def FindFirstLeaf(node):
+ if isinstance(node, pytree.Leaf):
+ return node
+ return FindFirstLeaf(node.children[0])
+
+ pytree_utils.SetNodeAnnotation(
+ FindFirstLeaf(node), pytree_utils.Annotation.MUST_SPLIT, True)