summaryrefslogtreecommitdiff
path: root/cbuildbot/swarming_lib.py
blob: 673b11af6b2a8e7c6400f5e3fc52982286fc9ef3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
# Copyright 2015 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

"""Utilities for running commands via swarming instance."""

from __future__ import print_function


import json
import os

from chromite.lib import cros_build_lib
from chromite.lib import cros_logging as logging
from chromite.lib import osutils
from chromite.lib import retry_util

# Location of swarming_client.py that is used to send swarming requests
_DIR_NAME = os.path.dirname(os.path.abspath(__file__))
_SWARMING_PROXY_CLIENT = os.path.abspath(os.path.join(
    _DIR_NAME, '..', 'third_party', 'swarming.client', 'swarming.py'))
CONNECTION_TYPE_COMMON = 'common'
CONNECTION_TYPE_MOCK = 'mock'
# Code 80 - bot died.
RETRIABLE_INTERNAL_FAILURE_STATES = {80}


def RunSwarmingCommand(cmd, swarming_server, task_name=None,
                       dimension=None,
                       print_status_updates=False,
                       timeout_secs=None, io_timeout_secs=None,
                       hard_timeout_secs=None, expiration_secs=None,
                       temp_json_path=None,
                       *args, **kwargs):
  """Run command via swarming proxy.

  Args:
    cmd: Commands to run, represented as a list.
    swarming_server: The swarming server to send request to.
    task_name: String, represent a task.
    dimension: A tuple with two elements, representing dimension for
               selecting a swarming bots. E.g. ('os', 'Linux')
    print_status_updates: Boolean, whether to output status updates,
                          can be used to prevent from hitting
                          buildbot silent timeout.
    timeout_secs: Timeout to wait for result used by swarming client.
    hard_timeout_secs: Seconds to allow the task to complete.
    io_timeout_secs: Seconds to allow the task to be silent.
    expiration_secs: Seconds to allow the task to be pending for a bot to
                     run before this task request expires.
    temp_json_path: Where swarming client should dump the result.
  """
  with osutils.TempDir() as tempdir:
    if temp_json_path is None:
      temp_json_path = os.path.join(tempdir, 'temp_summary.json')
    swarming_cmd = [_SWARMING_PROXY_CLIENT, 'run',
                    '--swarming', swarming_server,
                    '--task-summary-json', temp_json_path,
                    '--raw-cmd']
    if task_name:
      swarming_cmd += ['--task-name', task_name]

    if dimension:
      swarming_cmd += ['--dimension', dimension[0], dimension[1]]

    if print_status_updates:
      swarming_cmd.append('--print-status-updates')

    if timeout_secs is not None:
      swarming_cmd += ['--timeout', str(timeout_secs)]

    if io_timeout_secs is not None:
      swarming_cmd += ['--io-timeout', str(io_timeout_secs)]

    if hard_timeout_secs is not None:
      swarming_cmd += ['--hard-timeout', str(hard_timeout_secs)]

    if expiration_secs is not None:
      swarming_cmd += ['--expiration', str(expiration_secs)]

    swarming_cmd += ['--']
    swarming_cmd += cmd

    try:
      result = cros_build_lib.RunCommand(swarming_cmd, *args, **kwargs)
      return SwarmingCommandResult.CreateSwarmingCommandResult(
          task_summary_json_path=temp_json_path, command_result=result)
    except cros_build_lib.RunCommandError as e:
      result = SwarmingCommandResult.CreateSwarmingCommandResult(
          task_summary_json_path=temp_json_path, command_result=e.result)
      raise cros_build_lib.RunCommandError(e.msg, result, e.exception)


def SwarmingRetriableErrorCheck(exception):
  """Check if a swarming error is retriable.

  Args:
    exception: A cros_build_lib.RunCommandError exception.

  Returns:
    True if retriable, otherwise False.
  """
  if not isinstance(exception, cros_build_lib.RunCommandError):
    return False
  result = exception.result
  if not isinstance(result, SwarmingCommandResult):
    return False
  if result.task_summary_json:
    try:
      internal_failure = result.task_summary_json[
          'shards'][0]['internal_failure']
      state = result.task_summary_json['shards'][0]['state']
      if internal_failure and state in RETRIABLE_INTERNAL_FAILURE_STATES:
        logging.warning(
            'Encountered retriable swarming internal failure: %s',
            json.dumps(result.task_summary_json, indent=2))
        return True
    except (IndexError, KeyError) as e:
      logging.warning(
          "Could not determine if %s is retriable, error: %s. json: %s",
          str(exception), str(e),
          json.dumps(result.task_summary_json, indent=2))
  return False


def RunSwarmingCommandWithRetries(max_retry, *args, **kwargs):
  """Wrapper for RunSwarmingCommand that will retry a command.

  Args:
    max_retry: See RetryCommand.
    *args: See RetryCommand and RunSwarmingCommand.
    **kwargs: See RetryCommand and RunSwarmingCommand.

  Returns:
    A SwarmingCommandResult object.

  Raises:
    RunCommandError: When the command fails.
  """
  return retry_util.RetryCommand(RunSwarmingCommand, max_retry, *args, **kwargs)


class SwarmingCommandResult(cros_build_lib.CommandResult):
  """An object to store result of a command that is run via swarming.

  Args:
    task_summary_json: A dictionary, loaded from the json file
                       output by swarming client. It cantains all
                       details about the swarming task.
  """

  def __init__(self, task_summary_json, *args, **kwargs):
    super(SwarmingCommandResult, self).__init__(*args, **kwargs)
    self.task_summary_json = task_summary_json

  @staticmethod
  def LoadJsonSummary(task_summary_json_path):
    """Load json file into a dict.

    Args:
      task_summary_json_path: A json that contains output of a swarming task.

    Returns:
      A dictionary or None if task_summary_json_path doesn't exist.
    """
    if os.path.exists(task_summary_json_path):
      with open(task_summary_json_path) as f:
        return json.load(f)

  @staticmethod
  def CreateSwarmingCommandResult(task_summary_json_path, command_result):
    """Create a SwarmingCommandResult object from a CommandResult object.

    Args:
      task_summary_json_path: The path to a json file that contains
                              output of a swarming task.
      command_result: A CommandResult object.

    Returns:
      A SwarmingCommandResult object.
    """
    task_summary_json = SwarmingCommandResult.LoadJsonSummary(
        task_summary_json_path)
    return  SwarmingCommandResult(task_summary_json=task_summary_json,
                                  cmd=command_result.cmd,
                                  error=command_result.error,
                                  output=command_result.output,
                                  returncode=command_result.returncode)