diff options
Diffstat (limited to 'tools/yapf/yapf/yapflib/pytree_unwrapper.py')
-rw-r--r-- | tools/yapf/yapf/yapflib/pytree_unwrapper.py | 376 |
1 files changed, 376 insertions, 0 deletions
diff --git a/tools/yapf/yapf/yapflib/pytree_unwrapper.py b/tools/yapf/yapf/yapflib/pytree_unwrapper.py new file mode 100644 index 00000000..c67c1c6e --- /dev/null +++ b/tools/yapf/yapf/yapflib/pytree_unwrapper.py @@ -0,0 +1,376 @@ +# Copyright 2015-2017 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""PyTreeUnwrapper - produces a list of unwrapped lines from a pytree. + +[for a description of what an unwrapped line is, see unwrapped_line.py] + +This is a pytree visitor that goes over a parse tree and produces a list of +UnwrappedLine containers from it, each with its own depth and containing all +the tokens that could fit on the line if there were no maximal line-length +limitations. + +Note: a precondition to running this visitor and obtaining correct results is +for the tree to have its comments spliced in as nodes. Prefixes are ignored. + +For most uses, the convenience function UnwrapPyTree should be sufficient. +""" + +# The word "token" is overloaded within this module, so for clarity rename +# the imported pgen2.token module. +from lib2to3 import pytree +from lib2to3.pgen2 import token as grammar_token + +from yapf.yapflib import pytree_utils +from yapf.yapflib import pytree_visitor +from yapf.yapflib import split_penalty +from yapf.yapflib import unwrapped_line + + +def UnwrapPyTree(tree): + """Create and return a list of unwrapped lines from the given pytree. + + Arguments: + tree: the top-level pytree node to unwrap. + + Returns: + A list of UnwrappedLine objects. + """ + unwrapper = PyTreeUnwrapper() + unwrapper.Visit(tree) + uwlines = unwrapper.GetUnwrappedLines() + uwlines.sort(key=lambda x: x.lineno) + return uwlines + + +# Grammar tokens considered as whitespace for the purpose of unwrapping. +_WHITESPACE_TOKENS = frozenset([ + grammar_token.NEWLINE, grammar_token.DEDENT, grammar_token.INDENT, + grammar_token.ENDMARKER +]) + + +class PyTreeUnwrapper(pytree_visitor.PyTreeVisitor): + """PyTreeUnwrapper - see file-level docstring for detailed description. + + Note: since this implements PyTreeVisitor and node names in lib2to3 are + underscore_separated, the visiting methods of this class are named as + Visit_node_name. invalid-name pragmas are added to each such method to silence + a style warning. This is forced on us by the usage of lib2to3, and re-munging + method names to make them different from actual node names sounded like a + confusing and brittle affair that wasn't worth it for this small & controlled + deviation from the style guide. + + To understand the connection between visitor methods in this class, some + familiarity with the Python grammar is required. + """ + + def __init__(self): + # A list of all unwrapped lines finished visiting so far. + self._unwrapped_lines = [] + + # Builds up a "current" unwrapped line while visiting pytree nodes. Some + # nodes will finish a line and start a new one. + self._cur_unwrapped_line = unwrapped_line.UnwrappedLine(0) + + # Current indentation depth. + self._cur_depth = 0 + + def GetUnwrappedLines(self): + """Fetch the result of the tree walk. + + Note: only call this after visiting the whole tree. + + Returns: + A list of UnwrappedLine objects. + """ + # Make sure the last line that was being populated is flushed. + self._StartNewLine() + return self._unwrapped_lines + + def _StartNewLine(self): + """Finish current line and start a new one. + + Place the currently accumulated line into the _unwrapped_lines list and + start a new one. + """ + if self._cur_unwrapped_line.tokens: + self._unwrapped_lines.append(self._cur_unwrapped_line) + _MatchBrackets(self._cur_unwrapped_line) + _AdjustSplitPenalty(self._cur_unwrapped_line) + self._cur_unwrapped_line = unwrapped_line.UnwrappedLine(self._cur_depth) + + _STMT_TYPES = frozenset({ + 'if_stmt', + 'while_stmt', + 'for_stmt', + 'try_stmt', + 'expect_clause', + 'with_stmt', + 'funcdef', + 'classdef', + }) + + # pylint: disable=invalid-name,missing-docstring + def Visit_simple_stmt(self, node): + # A 'simple_stmt' conveniently represents a non-compound Python statement, + # i.e. a statement that does not contain other statements. + + # When compound nodes have a single statement as their suite, the parser + # can leave it in the tree directly without creating a suite. But we have + # to increase depth in these cases as well. However, don't increase the + # depth of we have a simple_stmt that's a comment node. This represents a + # standalone comment and in the case of it coming directly after the + # funcdef, it is a "top" comment for the whole function. + # TODO(eliben): add more relevant compound statements here. + single_stmt_suite = (node.parent and + pytree_utils.NodeName(node.parent) in self._STMT_TYPES) + is_comment_stmt = pytree_utils.IsCommentStatement(node) + if single_stmt_suite and not is_comment_stmt: + self._cur_depth += 1 + self._StartNewLine() + self.DefaultNodeVisit(node) + if single_stmt_suite and not is_comment_stmt: + self._cur_depth -= 1 + + def _VisitCompoundStatement(self, node, substatement_names): + """Helper for visiting compound statements. + + Python compound statements serve as containers for other statements. Thus, + when we encounter a new compound statement we start a new unwrapped line. + + Arguments: + node: the node to visit. + substatement_names: set of node names. A compound statement will be + recognized as a NAME node with a name in this set. + """ + for child in node.children: + # A pytree is structured in such a way that a single 'if_stmt' node will + # contain all the 'if', 'elif' and 'else' nodes as children (similar + # structure applies to 'while' statements, 'try' blocks, etc). Therefore, + # we visit all children here and create a new line before the requested + # set of nodes. + if (child.type == grammar_token.NAME and + child.value in substatement_names): + self._StartNewLine() + self.Visit(child) + + _IF_STMT_ELEMS = frozenset({'if', 'else', 'elif'}) + + def Visit_if_stmt(self, node): # pylint: disable=invalid-name + self._VisitCompoundStatement(node, self._IF_STMT_ELEMS) + + _WHILE_STMT_ELEMS = frozenset({'while', 'else'}) + + def Visit_while_stmt(self, node): # pylint: disable=invalid-name + self._VisitCompoundStatement(node, self._WHILE_STMT_ELEMS) + + _FOR_STMT_ELEMS = frozenset({'for', 'else'}) + + def Visit_for_stmt(self, node): # pylint: disable=invalid-name + self._VisitCompoundStatement(node, self._FOR_STMT_ELEMS) + + _TRY_STMT_ELEMS = frozenset({'try', 'except', 'else', 'finally'}) + + def Visit_try_stmt(self, node): # pylint: disable=invalid-name + self._VisitCompoundStatement(node, self._TRY_STMT_ELEMS) + + _EXCEPT_STMT_ELEMS = frozenset({'except'}) + + def Visit_except_clause(self, node): # pylint: disable=invalid-name + self._VisitCompoundStatement(node, self._EXCEPT_STMT_ELEMS) + + _FUNC_DEF_ELEMS = frozenset({'def'}) + + def Visit_funcdef(self, node): # pylint: disable=invalid-name + self._VisitCompoundStatement(node, self._FUNC_DEF_ELEMS) + + def Visit_async_funcdef(self, node): # pylint: disable=invalid-name + self._StartNewLine() + index = 0 + for child in node.children: + index += 1 + self.Visit(child) + if pytree_utils.NodeName(child) == 'ASYNC': + break + for child in node.children[index].children: + self.Visit(child) + + _CLASS_DEF_ELEMS = frozenset({'class'}) + + def Visit_classdef(self, node): # pylint: disable=invalid-name + self._VisitCompoundStatement(node, self._CLASS_DEF_ELEMS) + + def Visit_async_stmt(self, node): # pylint: disable=invalid-name + self._StartNewLine() + index = 0 + for child in node.children: + index += 1 + self.Visit(child) + if pytree_utils.NodeName(child) == 'ASYNC': + break + for child in node.children[index].children: + self.Visit(child) + + def Visit_decorators(self, node): # pylint: disable=invalid-name + for child in node.children: + self._StartNewLine() + self.Visit(child) + + def Visit_decorated(self, node): # pylint: disable=invalid-name + for child in node.children: + self._StartNewLine() + self.Visit(child) + + _WITH_STMT_ELEMS = frozenset({'with'}) + + def Visit_with_stmt(self, node): # pylint: disable=invalid-name + self._VisitCompoundStatement(node, self._WITH_STMT_ELEMS) + + def Visit_suite(self, node): # pylint: disable=invalid-name + # A 'suite' starts a new indentation level in Python. + self._cur_depth += 1 + self._StartNewLine() + self.DefaultNodeVisit(node) + self._cur_depth -= 1 + + def Visit_listmaker(self, node): # pylint: disable=invalid-name + _DetermineMustSplitAnnotation(node) + self.DefaultNodeVisit(node) + + def Visit_dictsetmaker(self, node): # pylint: disable=invalid-name + _DetermineMustSplitAnnotation(node) + self.DefaultNodeVisit(node) + + def Visit_import_as_names(self, node): # pylint: disable=invalid-name + if node.prev_sibling.value == '(': + _DetermineMustSplitAnnotation(node) + self.DefaultNodeVisit(node) + + def Visit_testlist_gexp(self, node): # pylint: disable=invalid-name + if _ContainsComments(node): + _DetermineMustSplitAnnotation(node) + self.DefaultNodeVisit(node) + + def Visit_arglist(self, node): # pylint: disable=invalid-name + _DetermineMustSplitAnnotation(node) + self.DefaultNodeVisit(node) + + def Visit_typedargslist(self, node): # pylint: disable=invalid-name + _DetermineMustSplitAnnotation(node) + self.DefaultNodeVisit(node) + + def DefaultLeafVisit(self, leaf): + """Default visitor for tree leaves. + + A tree leaf is always just gets appended to the current unwrapped line. + + Arguments: + leaf: the leaf to visit. + """ + if leaf.type in _WHITESPACE_TOKENS: + self._StartNewLine() + elif leaf.type != grammar_token.COMMENT or leaf.value.strip(): + if leaf.value == ';': + # Split up multiple statements on one line. + self._StartNewLine() + else: + # Add non-whitespace tokens and comments that aren't empty. + self._cur_unwrapped_line.AppendNode(leaf) + + +_BRACKET_MATCH = {')': '(', '}': '{', ']': '['} + + +def _MatchBrackets(uwline): + """Visit the node and match the brackets. + + For every open bracket ('[', '{', or '('), find the associated closing bracket + and "match" them up. I.e., save in the token a pointer to its associated open + or close bracket. + + Arguments: + uwline: (UnwrappedLine) An unwrapped line. + """ + bracket_stack = [] + for token in uwline.tokens: + if token.value in pytree_utils.OPENING_BRACKETS: + bracket_stack.append(token) + elif token.value in pytree_utils.CLOSING_BRACKETS: + bracket_stack[-1].matching_bracket = token + token.matching_bracket = bracket_stack[-1] + bracket_stack.pop() + + +def _AdjustSplitPenalty(uwline): + """Visit the node and adjust the split penalties if needed. + + A token shouldn't be split if it's not within a bracket pair. Mark any token + that's not within a bracket pair as "unbreakable". + + Arguments: + uwline: (UnwrappedLine) An unwrapped line. + """ + bracket_level = 0 + for index, token in enumerate(uwline.tokens): + if index and not bracket_level: + pytree_utils.SetNodeAnnotation(token.node, + pytree_utils.Annotation.SPLIT_PENALTY, + split_penalty.UNBREAKABLE) + if token.value in pytree_utils.OPENING_BRACKETS: + bracket_level += 1 + elif token.value in pytree_utils.CLOSING_BRACKETS: + bracket_level -= 1 + + +def _DetermineMustSplitAnnotation(node): + """Enforce a split in the list if the list ends with a comma.""" + if not _ContainsComments(node): + if (not isinstance(node.children[-1], pytree.Leaf) or + node.children[-1].value != ','): + return + num_children = len(node.children) + index = 0 + _SetMustSplitOnFirstLeaf(node.children[0]) + while index < num_children - 1: + child = node.children[index] + if isinstance(child, pytree.Leaf) and child.value == ',': + next_child = node.children[index + 1] + if next_child.type == grammar_token.COMMENT: + index += 1 + if index >= num_children - 1: + break + _SetMustSplitOnFirstLeaf(node.children[index + 1]) + index += 1 + + +def _ContainsComments(node): + """Return True if the list has a comment in it.""" + if isinstance(node, pytree.Leaf): + return node.type == grammar_token.COMMENT + for child in node.children: + if _ContainsComments(child): + return True + return False + + +def _SetMustSplitOnFirstLeaf(node): + """Set the "must split" annotation on the first leaf node.""" + + def FindFirstLeaf(node): + if isinstance(node, pytree.Leaf): + return node + return FindFirstLeaf(node.children[0]) + + pytree_utils.SetNodeAnnotation( + FindFirstLeaf(node), pytree_utils.Annotation.MUST_SPLIT, True) |