1 files changed, 376 insertions, 0 deletions
diff --git a/tools/yapf/yapf/yapflib/pytree_unwrapper.py b/tools/yapf/yapf/yapflib/pytree_unwrapper.py
new file mode 100644
index 00000000..c67c1c6e
--- /dev/null
+++ b/tools/yapf/yapf/yapflib/pytree_unwrapper.py
@@ -0,0 +1,376 @@
+# Copyright 2015-2017 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""PyTreeUnwrapper - produces a list of unwrapped lines from a pytree.
+
+[for a description of what an unwrapped line is, see unwrapped_line.py]
+
+This is a pytree visitor that goes over a parse tree and produces a list of
+UnwrappedLine containers from it, each with its own depth and containing all
+the tokens that could fit on the line if there were no maximal line-length
+limitations.
+
+Note: a precondition to running this visitor and obtaining correct results is
+for the tree to have its comments spliced in as nodes. Prefixes are ignored.
+
+For most uses, the convenience function UnwrapPyTree should be sufficient.
+"""
+
+# The word "token" is overloaded within this module, so for clarity rename
+# the imported pgen2.token module.
+from lib2to3 import pytree
+from lib2to3.pgen2 import token as grammar_token
+
+from yapf.yapflib import pytree_utils
+from yapf.yapflib import pytree_visitor
+from yapf.yapflib import split_penalty
+from yapf.yapflib import unwrapped_line
+
+
+def UnwrapPyTree(tree):
+  """Create and return a list of unwrapped lines from the given pytree.
+
+  Arguments:
+    tree: the top-level pytree node to unwrap.
+
+  Returns:
+    A list of UnwrappedLine objects.
+  """
+  unwrapper = PyTreeUnwrapper()
+  unwrapper.Visit(tree)
+  uwlines = unwrapper.GetUnwrappedLines()
+  uwlines.sort(key=lambda x: x.lineno)
+  return uwlines
+
+
+# Grammar tokens considered as whitespace for the purpose of unwrapping.
+_WHITESPACE_TOKENS = frozenset([
+    grammar_token.NEWLINE, grammar_token.DEDENT, grammar_token.INDENT,
+    grammar_token.ENDMARKER
+])
+
+
+class PyTreeUnwrapper(pytree_visitor.PyTreeVisitor):
+  """PyTreeUnwrapper - see file-level docstring for detailed description.
+
+  Note: since this implements PyTreeVisitor and node names in lib2to3 are
+  underscore_separated, the visiting methods of this class are named as
+  Visit_node_name. invalid-name pragmas are added to each such method to silence
+  a style warning. This is forced on us by the usage of lib2to3, and re-munging
+  method names to make them different from actual node names sounded like a
+  confusing and brittle affair that wasn't worth it for this small & controlled
+  deviation from the style guide.
+
+  To understand the connection between visitor methods in this class, some
+  familiarity with the Python grammar is required.
+  """
+
+  def __init__(self):
+    # A list of all unwrapped lines finished visiting so far.
+    self._unwrapped_lines = []
+
+    # Builds up a "current" unwrapped line while visiting pytree nodes. Some
+    # nodes will finish a line and start a new one.
+    self._cur_unwrapped_line = unwrapped_line.UnwrappedLine(0)
+
+    # Current indentation depth.
+    self._cur_depth = 0
+
+  def GetUnwrappedLines(self):
+    """Fetch the result of the tree walk.
+
+    Note: only call this after visiting the whole tree.
+
+    Returns:
+      A list of UnwrappedLine objects.
+    """
+    # Make sure the last line that was being populated is flushed.
+    self._StartNewLine()
+    return self._unwrapped_lines
+
+  def _StartNewLine(self):
+    """Finish current line and start a new one.
+
+    Place the currently accumulated line into the _unwrapped_lines list and
+    start a new one.
+    """
+    if self._cur_unwrapped_line.tokens:
+      self._unwrapped_lines.append(self._cur_unwrapped_line)
+      _MatchBrackets(self._cur_unwrapped_line)
+      _AdjustSplitPenalty(self._cur_unwrapped_line)
+    self._cur_unwrapped_line = unwrapped_line.UnwrappedLine(self._cur_depth)
+
+  _STMT_TYPES = frozenset({
+      'if_stmt',
+      'while_stmt',
+      'for_stmt',
+      'try_stmt',
+      'expect_clause',
+      'with_stmt',
+      'funcdef',
+      'classdef',
+  })
+
+  # pylint: disable=invalid-name,missing-docstring
+  def Visit_simple_stmt(self, node):
+    # A 'simple_stmt' conveniently represents a non-compound Python statement,
+    # i.e. a statement that does not contain other statements.
+
+    # When compound nodes have a single statement as their suite, the parser
+    # can leave it in the tree directly without creating a suite. But we have
+    # to increase depth in these cases as well. However, don't increase the
+    # depth of we have a simple_stmt that's a comment node. This represents a
+    # standalone comment and in the case of it coming directly after the
+    # funcdef, it is a "top" comment for the whole function.
+    # TODO(eliben): add more relevant compound statements here.
+    single_stmt_suite = (node.parent and
+                         pytree_utils.NodeName(node.parent) in self._STMT_TYPES)
+    is_comment_stmt = pytree_utils.IsCommentStatement(node)
+    if single_stmt_suite and not is_comment_stmt:
+      self._cur_depth += 1
+    self._StartNewLine()
+    self.DefaultNodeVisit(node)
+    if single_stmt_suite and not is_comment_stmt:
+      self._cur_depth -= 1
+
+  def _VisitCompoundStatement(self, node, substatement_names):
+    """Helper for visiting compound statements.
+
+    Python compound statements serve as containers for other statements. Thus,
+    when we encounter a new compound statement we start a new unwrapped line.
+
+    Arguments:
+      node: the node to visit.
+      substatement_names: set of node names. A compound statement will be
+        recognized as a NAME node with a name in this set.
+    """
+    for child in node.children:
+      # A pytree is structured in such a way that a single 'if_stmt' node will
+      # contain all the 'if', 'elif' and 'else' nodes as children (similar
+      # structure applies to 'while' statements, 'try' blocks, etc). Therefore,
+      # we visit all children here and create a new line before the requested
+      # set of nodes.
+      if (child.type == grammar_token.NAME and
+          child.value in substatement_names):
+        self._StartNewLine()
+      self.Visit(child)
+
+  _IF_STMT_ELEMS = frozenset({'if', 'else', 'elif'})
+
+  def Visit_if_stmt(self, node):  # pylint: disable=invalid-name
+    self._VisitCompoundStatement(node, self._IF_STMT_ELEMS)
+
+  _WHILE_STMT_ELEMS = frozenset({'while', 'else'})
+
+  def Visit_while_stmt(self, node):  # pylint: disable=invalid-name
+    self._VisitCompoundStatement(node, self._WHILE_STMT_ELEMS)
+
+  _FOR_STMT_ELEMS = frozenset({'for', 'else'})
+
+  def Visit_for_stmt(self, node):  # pylint: disable=invalid-name
+    self._VisitCompoundStatement(node, self._FOR_STMT_ELEMS)
+
+  _TRY_STMT_ELEMS = frozenset({'try', 'except', 'else', 'finally'})
+
+  def Visit_try_stmt(self, node):  # pylint: disable=invalid-name
+    self._VisitCompoundStatement(node, self._TRY_STMT_ELEMS)
+
+  _EXCEPT_STMT_ELEMS = frozenset({'except'})
+
+  def Visit_except_clause(self, node):  # pylint: disable=invalid-name
+    self._VisitCompoundStatement(node, self._EXCEPT_STMT_ELEMS)
+
+  _FUNC_DEF_ELEMS = frozenset({'def'})
+
+  def Visit_funcdef(self, node):  # pylint: disable=invalid-name
+    self._VisitCompoundStatement(node, self._FUNC_DEF_ELEMS)
+
+  def Visit_async_funcdef(self, node):  # pylint: disable=invalid-name
+    self._StartNewLine()
+    index = 0
+    for child in node.children:
+      index += 1
+      self.Visit(child)
+      if pytree_utils.NodeName(child) == 'ASYNC':
+        break
+    for child in node.children[index].children:
+      self.Visit(child)
+
+  _CLASS_DEF_ELEMS = frozenset({'class'})
+
+  def Visit_classdef(self, node):  # pylint: disable=invalid-name
+    self._VisitCompoundStatement(node, self._CLASS_DEF_ELEMS)
+
+  def Visit_async_stmt(self, node):  # pylint: disable=invalid-name
+    self._StartNewLine()
+    index = 0
+    for child in node.children:
+      index += 1
+      self.Visit(child)
+      if pytree_utils.NodeName(child) == 'ASYNC':
+        break
+    for child in node.children[index].children:
+      self.Visit(child)
+
+  def Visit_decorators(self, node):  # pylint: disable=invalid-name
+    for child in node.children:
+      self._StartNewLine()
+      self.Visit(child)
+
+  def Visit_decorated(self, node):  # pylint: disable=invalid-name
+    for child in node.children:
+      self._StartNewLine()
+      self.Visit(child)
+
+  _WITH_STMT_ELEMS = frozenset({'with'})
+
+  def Visit_with_stmt(self, node):  # pylint: disable=invalid-name
+    self._VisitCompoundStatement(node, self._WITH_STMT_ELEMS)
+
+  def Visit_suite(self, node):  # pylint: disable=invalid-name
+    # A 'suite' starts a new indentation level in Python.
+    self._cur_depth += 1
+    self._StartNewLine()
+    self.DefaultNodeVisit(node)
+    self._cur_depth -= 1
+
+  def Visit_listmaker(self, node):  # pylint: disable=invalid-name
+    _DetermineMustSplitAnnotation(node)
+    self.DefaultNodeVisit(node)
+
+  def Visit_dictsetmaker(self, node):  # pylint: disable=invalid-name
+    _DetermineMustSplitAnnotation(node)
+    self.DefaultNodeVisit(node)
+
+  def Visit_import_as_names(self, node):  # pylint: disable=invalid-name
+    if node.prev_sibling.value == '(':
+      _DetermineMustSplitAnnotation(node)
+    self.DefaultNodeVisit(node)
+
+  def Visit_testlist_gexp(self, node):  # pylint: disable=invalid-name
+    if _ContainsComments(node):
+      _DetermineMustSplitAnnotation(node)
+    self.DefaultNodeVisit(node)
+
+  def Visit_arglist(self, node):  # pylint: disable=invalid-name
+    _DetermineMustSplitAnnotation(node)
+    self.DefaultNodeVisit(node)
+
+  def Visit_typedargslist(self, node):  # pylint: disable=invalid-name
+    _DetermineMustSplitAnnotation(node)
+    self.DefaultNodeVisit(node)
+
+  def DefaultLeafVisit(self, leaf):
+    """Default visitor for tree leaves.
+
+    A tree leaf is always just gets appended to the current unwrapped line.
+
+    Arguments:
+      leaf: the leaf to visit.
+    """
+    if leaf.type in _WHITESPACE_TOKENS:
+      self._StartNewLine()
+    elif leaf.type != grammar_token.COMMENT or leaf.value.strip():
+      if leaf.value == ';':
+        # Split up multiple statements on one line.
+        self._StartNewLine()
+      else:
+        # Add non-whitespace tokens and comments that aren't empty.
+        self._cur_unwrapped_line.AppendNode(leaf)
+
+
+_BRACKET_MATCH = {')': '(', '}': '{', ']': '['}
+
+
+def _MatchBrackets(uwline):
+  """Visit the node and match the brackets.
+
+  For every open bracket ('[', '{', or '('), find the associated closing bracket
+  and "match" them up. I.e., save in the token a pointer to its associated open
+  or close bracket.
+
+  Arguments:
+    uwline: (UnwrappedLine) An unwrapped line.
+  """
+  bracket_stack = []
+  for token in uwline.tokens:
+    if token.value in pytree_utils.OPENING_BRACKETS:
+      bracket_stack.append(token)
+    elif token.value in pytree_utils.CLOSING_BRACKETS:
+      bracket_stack[-1].matching_bracket = token
+      token.matching_bracket = bracket_stack[-1]
+      bracket_stack.pop()
+
+
+def _AdjustSplitPenalty(uwline):
+  """Visit the node and adjust the split penalties if needed.
+
+  A token shouldn't be split if it's not within a bracket pair. Mark any token
+  that's not within a bracket pair as "unbreakable".
+
+  Arguments:
+    uwline: (UnwrappedLine) An unwrapped line.
+  """
+  bracket_level = 0
+  for index, token in enumerate(uwline.tokens):
+    if index and not bracket_level:
+      pytree_utils.SetNodeAnnotation(token.node,
+                                     pytree_utils.Annotation.SPLIT_PENALTY,
+                                     split_penalty.UNBREAKABLE)
+    if token.value in pytree_utils.OPENING_BRACKETS:
+      bracket_level += 1
+    elif token.value in pytree_utils.CLOSING_BRACKETS:
+      bracket_level -= 1
+
+
+def _DetermineMustSplitAnnotation(node):
+  """Enforce a split in the list if the list ends with a comma."""
+  if not _ContainsComments(node):
+    if (not isinstance(node.children[-1], pytree.Leaf) or
+        node.children[-1].value != ','):
+      return
+  num_children = len(node.children)
+  index = 0
+  _SetMustSplitOnFirstLeaf(node.children[0])
+  while index < num_children - 1:
+    child = node.children[index]
+    if isinstance(child, pytree.Leaf) and child.value == ',':
+      next_child = node.children[index + 1]
+      if next_child.type == grammar_token.COMMENT:
+        index += 1
+        if index >= num_children - 1:
+          break
+      _SetMustSplitOnFirstLeaf(node.children[index + 1])
+    index += 1
+
+
+def _ContainsComments(node):
+  """Return True if the list has a comment in it."""
+  if isinstance(node, pytree.Leaf):
+    return node.type == grammar_token.COMMENT
+  for child in node.children:
+    if _ContainsComments(child):
+      return True
+  return False
+
+
+def _SetMustSplitOnFirstLeaf(node):
+  """Set the "must split" annotation on the first leaf node."""
+
+  def FindFirstLeaf(node):
+    if isinstance(node, pytree.Leaf):
+      return node
+    return FindFirstLeaf(node.children[0])
+
+  pytree_utils.SetNodeAnnotation(
+      FindFirstLeaf(node), pytree_utils.Annotation.MUST_SPLIT, True)