diff options
Diffstat (limited to 'google/cloud/ml/v1/job_service.proto')
-rw-r--r-- | google/cloud/ml/v1/job_service.proto | 613 |
1 files changed, 613 insertions, 0 deletions
diff --git a/google/cloud/ml/v1/job_service.proto b/google/cloud/ml/v1/job_service.proto new file mode 100644 index 000000000..d1f608941 --- /dev/null +++ b/google/cloud/ml/v1/job_service.proto @@ -0,0 +1,613 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; + +package google.cloud.ml.v1; + +import "google/api/annotations.proto"; +import "google/api/auth.proto"; +import "google/protobuf/empty.proto"; +import "google/protobuf/timestamp.proto"; + +option go_package = "google.golang.org/genproto/googleapis/cloud/ml/v1;ml"; +option java_multiple_files = true; +option java_outer_classname = "JobServiceProto"; +option java_package = "com.google.cloud.ml.api.v1"; + +// Copyright 2017 Google Inc. All Rights Reserved. +// +// Proto file for the Google Cloud Machine Learning Engine. +// Describes the 'job service' to manage training and prediction jobs. + +// Service to create and manage training and batch prediction jobs. +service JobService { + // Creates a training or a batch prediction job. + rpc CreateJob(CreateJobRequest) returns (Job) { + option (google.api.http) = { + post: "/v1/{parent=projects/*}/jobs" + body: "job" + }; + } + + // Lists the jobs in the project. + rpc ListJobs(ListJobsRequest) returns (ListJobsResponse) { + option (google.api.http) = { + get: "/v1/{parent=projects/*}/jobs" + }; + } + + // Describes a job. + rpc GetJob(GetJobRequest) returns (Job) { + option (google.api.http) = { + get: "/v1/{name=projects/*/jobs/*}" + }; + } + + // Cancels a running job. + rpc CancelJob(CancelJobRequest) returns (google.protobuf.Empty) { + option (google.api.http) = { + post: "/v1/{name=projects/*/jobs/*}:cancel" + body: "*" + }; + } +} + +// Represents input parameters for a training job. +message TrainingInput { + // A scale tier is an abstract representation of the resources Cloud ML + // will allocate to a training job. When selecting a scale tier for your + // training job, you should consider the size of your training dataset and + // the complexity of your model. As the tiers increase, virtual machines are + // added to handle your job, and the individual machines in the cluster + // generally have more memory and greater processing power than they do at + // lower tiers. The number of training units charged per hour of processing + // increases as tiers get more advanced. Refer to the + // [pricing guide](/ml/pricing) for more details. Note that in addition to + // incurring costs, your use of training resources is constrained by the + // [quota policy](/ml/quota). + enum ScaleTier { + // A single worker instance. This tier is suitable for learning how to use + // Cloud ML, and for experimenting with new models using small datasets. + BASIC = 0; + + // Many workers and a few parameter servers. + STANDARD_1 = 1; + + // A large number of workers with many parameter servers. + PREMIUM_1 = 3; + + // A single worker instance [with a GPU](ml/docs/how-tos/using-gpus). + BASIC_GPU = 6; + + // The CUSTOM tier is not a set tier, but rather enables you to use your + // own cluster specification. When you use this tier, set values to + // configure your processing cluster according to these guidelines: + // + // * You _must_ set `TrainingInput.masterType` to specify the type + // of machine to use for your master node. This is the only required + // setting. + // + // * You _may_ set `TrainingInput.workerCount` to specify the number of + // workers to use. If you specify one or more workers, you _must_ also + // set `TrainingInput.workerType` to specify the type of machine to use + // for your worker nodes. + // + // * You _may_ set `TrainingInput.parameterServerCount` to specify the + // number of parameter servers to use. If you specify one or more + // parameter servers, you _must_ also set + // `TrainingInput.parameterServerType` to specify the type of machine to + // use for your parameter servers. + // + // Note that all of your workers must use the same machine type, which can + // be different from your parameter server type and master type. Your + // parameter servers must likewise use the same machine type, which can be + // different from your worker type and master type. + CUSTOM = 5; + } + + // Required. Specifies the machine types, the number of replicas for workers + // and parameter servers. + ScaleTier scale_tier = 1; + + // Optional. Specifies the type of virtual machine to use for your training + // job's master worker. + // + // The following types are supported: + // + // <dl> + // <dt>standard</dt> + // <dd> + // A basic machine configuration suitable for training simple models with + // small to moderate datasets. + // </dd> + // <dt>large_model</dt> + // <dd> + // A machine with a lot of memory, specially suited for parameter servers + // when your model is large (having many hidden layers or layers with very + // large numbers of nodes). + // </dd> + // <dt>complex_model_s</dt> + // <dd> + // A machine suitable for the master and workers of the cluster when your + // model requires more computation than the standard machine can handle + // satisfactorily. + // </dd> + // <dt>complex_model_m</dt> + // <dd> + // A machine with roughly twice the number of cores and roughly double the + // memory of <code suppresswarning="true">complex_model_s</code>. + // </dd> + // <dt>complex_model_l</dt> + // <dd> + // A machine with roughly twice the number of cores and roughly double the + // memory of <code suppresswarning="true">complex_model_m</code>. + // </dd> + // <dt>standard_gpu</dt> + // <dd> + // A machine equivalent to <code suppresswarning="true">standard</code> that + // also includes a + // <a href="ml/docs/how-tos/using-gpus"> + // GPU that you can use in your trainer</a>. + // </dd> + // <dt>complex_model_m_gpu</dt> + // <dd> + // A machine equivalent to + // <code suppresswarning="true">coplex_model_m</code> that also includes + // four GPUs. + // </dd> + // </dl> + // + // You must set this value when `scaleTier` is set to `CUSTOM`. + string master_type = 2; + + // Optional. Specifies the type of virtual machine to use for your training + // job's worker nodes. + // + // The supported values are the same as those described in the entry for + // `masterType`. + // + // This value must be present when `scaleTier` is set to `CUSTOM` and + // `workerCount` is greater than zero. + string worker_type = 3; + + // Optional. Specifies the type of virtual machine to use for your training + // job's parameter server. + // + // The supported values are the same as those described in the entry for + // `master_type`. + // + // This value must be present when `scaleTier` is set to `CUSTOM` and + // `parameter_server_count` is greater than zero. + string parameter_server_type = 4; + + // Optional. The number of worker replicas to use for the training job. Each + // replica in the cluster will be of the type specified in `worker_type`. + // + // This value can only be used when `scale_tier` is set to `CUSTOM`. If you + // set this value, you must also set `worker_type`. + int64 worker_count = 5; + + // Optional. The number of parameter server replicas to use for the training + // job. Each replica in the cluster will be of the type specified in + // `parameter_server_type`. + // + // This value can only be used when `scale_tier` is set to `CUSTOM`.If you + // set this value, you must also set `parameter_server_type`. + int64 parameter_server_count = 6; + + // Required. The Google Cloud Storage location of the packages with + // the training program and any additional dependencies. + repeated string package_uris = 7; + + // Required. The Python module name to run after installing the packages. + string python_module = 8; + + // Optional. Command line arguments to pass to the program. + repeated string args = 10; + + // Optional. The set of Hyperparameters to tune. + HyperparameterSpec hyperparameters = 12; + + // Required. The Google Compute Engine region to run the training job in. + string region = 14; + + // Optional. A Google Cloud Storage path in which to store training outputs + // and other data needed for training. This path is passed to your TensorFlow + // program as the 'job_dir' command-line argument. The benefit of specifying + // this field is that Cloud ML validates the path for use in training. + string job_dir = 16; + + // Optional. The Google Cloud ML runtime version to use for training. If not + // set, Google Cloud ML will choose the latest stable version. + string runtime_version = 15; +} + +// Represents a set of hyperparameters to optimize. +message HyperparameterSpec { + // The available types of optimization goals. + enum GoalType { + // Goal Type will default to maximize. + GOAL_TYPE_UNSPECIFIED = 0; + + // Maximize the goal metric. + MAXIMIZE = 1; + + // Minimize the goal metric. + MINIMIZE = 2; + } + + // Required. The type of goal to use for tuning. Available types are + // `MAXIMIZE` and `MINIMIZE`. + // + // Defaults to `MAXIMIZE`. + GoalType goal = 1; + + // Required. The set of parameters to tune. + repeated ParameterSpec params = 2; + + // Optional. How many training trials should be attempted to optimize + // the specified hyperparameters. + // + // Defaults to one. + int32 max_trials = 3; + + // Optional. The number of training trials to run concurrently. + // You can reduce the time it takes to perform hyperparameter tuning by adding + // trials in parallel. However, each trail only benefits from the information + // gained in completed trials. That means that a trial does not get access to + // the results of trials running at the same time, which could reduce the + // quality of the overall optimization. + // + // Each trial will use the same scale tier and machine types. + // + // Defaults to one. + int32 max_parallel_trials = 4; + + // Optional. The Tensorflow summary tag name to use for optimizing trials. For + // current versions of Tensorflow, this tag name should exactly match what is + // shown in Tensorboard, including all scopes. For versions of Tensorflow + // prior to 0.12, this should be only the tag passed to tf.Summary. + // By default, "training/hptuning/metric" will be used. + string hyperparameter_metric_tag = 5; +} + +// Represents a single hyperparameter to optimize. +message ParameterSpec { + // The type of the parameter. + enum ParameterType { + // You must specify a valid type. Using this unspecified type will result in + // an error. + PARAMETER_TYPE_UNSPECIFIED = 0; + + // Type for real-valued parameters. + DOUBLE = 1; + + // Type for integral parameters. + INTEGER = 2; + + // The parameter is categorical, with a value chosen from the categories + // field. + CATEGORICAL = 3; + + // The parameter is real valued, with a fixed set of feasible points. If + // `type==DISCRETE`, feasible_points must be provided, and + // {`min_value`, `max_value`} will be ignored. + DISCRETE = 4; + } + + // The type of scaling that should be applied to this parameter. + enum ScaleType { + // By default, no scaling is applied. + NONE = 0; + + // Scales the feasible space to (0, 1) linearly. + UNIT_LINEAR_SCALE = 1; + + // Scales the feasible space logarithmically to (0, 1). The entire feasible + // space must be strictly positive. + UNIT_LOG_SCALE = 2; + + // Scales the feasible space "reverse" logarithmically to (0, 1). The result + // is that values close to the top of the feasible space are spread out more + // than points near the bottom. The entire feasible space must be strictly + // positive. + UNIT_REVERSE_LOG_SCALE = 3; + } + + // Required. The parameter name must be unique amongst all ParameterConfigs in + // a HyperparameterSpec message. E.g., "learning_rate". + string parameter_name = 1; + + // Required. The type of the parameter. + ParameterType type = 4; + + // Required if type is `DOUBLE` or `INTEGER`. This field + // should be unset if type is `CATEGORICAL`. This value should be integers if + // type is INTEGER. + double min_value = 2; + + // Required if typeis `DOUBLE` or `INTEGER`. This field + // should be unset if type is `CATEGORICAL`. This value should be integers if + // type is `INTEGER`. + double max_value = 3; + + // Required if type is `CATEGORICAL`. The list of possible categories. + repeated string categorical_values = 5; + + // Required if type is `DISCRETE`. + // A list of feasible points. + // The list should be in strictly increasing order. For instance, this + // parameter might have possible settings of 1.5, 2.5, and 4.0. This list + // should not contain more than 1,000 values. + repeated double discrete_values = 6; + + // Optional. How the parameter should be scaled to the hypercube. + // Leave unset for categorical parameters. + // Some kind of scaling is strongly recommended for real or integral + // parameters (e.g., `UNIT_LINEAR_SCALE`). + ScaleType scale_type = 7; +} + +// Represents the result of a single hyperparameter tuning trial from a +// training job. The TrainingOutput object that is returned on successful +// completion of a training job with hyperparameter tuning includes a list +// of HyperparameterOutput objects, one for each successful trial. +message HyperparameterOutput { + // An observed value of a metric. + message HyperparameterMetric { + // The global training step for this metric. + int64 training_step = 1; + + // The objective value at this training step. + double objective_value = 2; + } + + // The trial id for these results. + string trial_id = 1; + + // The hyperparameters given to this trial. + map<string, string> hyperparameters = 2; + + // The final objective metric seen for this trial. + HyperparameterMetric final_metric = 3; + + // All recorded object metrics for this trial. + repeated HyperparameterMetric all_metrics = 4; +} + +// Represents results of a training job. Output only. +message TrainingOutput { + // The number of hyperparameter tuning trials that completed successfully. + // Only set for hyperparameter tuning jobs. + int64 completed_trial_count = 1; + + // Results for individual Hyperparameter trials. + // Only set for hyperparameter tuning jobs. + repeated HyperparameterOutput trials = 2; + + // The amount of ML units consumed by the job. + double consumed_ml_units = 3; + + // Whether this job is a hyperparameter tuning job. + bool is_hyperparameter_tuning_job = 4; +} + +// Represents input parameters for a prediction job. +message PredictionInput { + // The format used to separate data instances in the source files. + enum DataFormat { + // Unspecified format. + DATA_FORMAT_UNSPECIFIED = 0; + + // The source file is a text file with instances separated by the + // new-line character. + TEXT = 1; + + // The source file is a TFRecord file. + TF_RECORD = 2; + + // The source file is a GZIP-compressed TFRecord file. + TF_RECORD_GZIP = 3; + } + + // Required. The model or the version to use for prediction. + oneof model_version { + // Use this field if you want to use the default version for the specified + // model. The string must use the following format: + // + // `"projects/<var>[YOUR_PROJECT]</var>/models/<var>[YOUR_MODEL]</var>"` + string model_name = 1; + + // Use this field if you want to specify a version of the model to use. The + // string is formatted the same way as `model_version`, with the addition + // of the version information: + // + // `"projects/<var>[YOUR_PROJECT]</var>/models/<var>YOUR_MODEL/versions/<var>[YOUR_VERSION]</var>"` + string version_name = 2; + + // Use this field if you want to specify a Google Cloud Storage path for + // the model to use. + string uri = 9; + } + + // Required. The format of the input data files. + DataFormat data_format = 3; + + // Required. The Google Cloud Storage location of the input data files. + // May contain wildcards. + repeated string input_paths = 4; + + // Required. The output Google Cloud Storage location. + string output_path = 5; + + // Optional. The maximum number of workers to be used for parallel processing. + // Defaults to 10 if not specified. + int64 max_worker_count = 6; + + // Required. The Google Compute Engine region to run the prediction job in. + string region = 7; + + // Optional. The Google Cloud ML runtime version to use for this batch + // prediction. If not set, Google Cloud ML will pick the runtime version used + // during the CreateVersion request for this model version, or choose the + // latest stable version when model version information is not available + // such as when the model is specified by uri. + string runtime_version = 8; +} + +// Represents results of a prediction job. +message PredictionOutput { + // The output Google Cloud Storage location provided at the job creation time. + string output_path = 1; + + // The number of generated predictions. + int64 prediction_count = 2; + + // The number of data instances which resulted in errors. + int64 error_count = 3; + + // Node hours used by the batch prediction job. + double node_hours = 4; +} + +// Represents a training or prediction job. +message Job { + // Describes the job state. + enum State { + // The job state is unspecified. + STATE_UNSPECIFIED = 0; + + // The job has been just created and processing has not yet begun. + QUEUED = 1; + + // The service is preparing to run the job. + PREPARING = 2; + + // The job is in progress. + RUNNING = 3; + + // The job completed successfully. + SUCCEEDED = 4; + + // The job failed. + // `error_message` should contain the details of the failure. + FAILED = 5; + + // The job is being cancelled. + // `error_message` should describe the reason for the cancellation. + CANCELLING = 6; + + // The job has been cancelled. + // `error_message` should describe the reason for the cancellation. + CANCELLED = 7; + } + + // Required. The user-specified id of the job. + string job_id = 1; + + // Required. Parameters to create a job. + oneof input { + // Input parameters to create a training job. + TrainingInput training_input = 2; + + // Input parameters to create a prediction job. + PredictionInput prediction_input = 3; + } + + // Output only. When the job was created. + google.protobuf.Timestamp create_time = 4; + + // Output only. When the job processing was started. + google.protobuf.Timestamp start_time = 5; + + // Output only. When the job processing was completed. + google.protobuf.Timestamp end_time = 6; + + // Output only. The detailed state of a job. + State state = 7; + + // Output only. The details of a failure or a cancellation. + string error_message = 8; + + // Output only. The current result of the job. + oneof output { + // The current training job result. + TrainingOutput training_output = 9; + + // The current prediction job result. + PredictionOutput prediction_output = 10; + } +} + +// Request message for the CreateJob method. +message CreateJobRequest { + // Required. The project name. + // + // Authorization: requires `Editor` role on the specified project. + string parent = 1; + + // Required. The job to create. + Job job = 2; +} + +// Request message for the ListJobs method. +message ListJobsRequest { + // Required. The name of the project for which to list jobs. + // + // Authorization: requires `Viewer` role on the specified project. + string parent = 1; + + // Optional. Specifies the subset of jobs to retrieve. + string filter = 2; + + // Optional. A page token to request the next page of results. + // + // You get the token from the `next_page_token` field of the response from + // the previous call. + string page_token = 4; + + // Optional. The number of jobs to retrieve per "page" of results. If there + // are more remaining results than this number, the response message will + // contain a valid value in the `next_page_token` field. + // + // The default value is 20, and the maximum page size is 100. + int32 page_size = 5; +} + +// Response message for the ListJobs method. +message ListJobsResponse { + // The list of jobs. + repeated Job jobs = 1; + + // Optional. Pass this token as the `page_token` field of the request for a + // subsequent call. + string next_page_token = 2; +} + +// Request message for the GetJob method. +message GetJobRequest { + // Required. The name of the job to get the description of. + // + // Authorization: requires `Viewer` role on the parent project. + string name = 1; +} + +// Request message for the CancelJob method. +message CancelJobRequest { + // Required. The name of the job to cancel. + // + // Authorization: requires `Editor` role on the parent project. + string name = 1; +} |