diff options
Diffstat (limited to 'google/genomics/v1alpha2/pipelines.proto')
-rw-r--r-- | google/genomics/v1alpha2/pipelines.proto | 632 |
1 files changed, 632 insertions, 0 deletions
diff --git a/google/genomics/v1alpha2/pipelines.proto b/google/genomics/v1alpha2/pipelines.proto new file mode 100644 index 000000000..51b92ce46 --- /dev/null +++ b/google/genomics/v1alpha2/pipelines.proto @@ -0,0 +1,632 @@ +// Copyright 2016 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; + +package google.genomics.v1alpha2; + +import "google/api/annotations.proto"; +import "google/longrunning/operations.proto"; +import "google/protobuf/duration.proto"; +import "google/protobuf/empty.proto"; +import "google/protobuf/timestamp.proto"; +import "google/rpc/code.proto"; + +option cc_enable_arenas = true; +option go_package = "google.golang.org/genproto/googleapis/genomics/v1alpha2;genomics"; +option java_multiple_files = true; +option java_outer_classname = "PipelinesProto"; +option java_package = "com.google.genomics.v1a"; + +// A service for running genomics pipelines. +service PipelinesV1Alpha2 { + // Creates a pipeline that can be run later. Create takes a Pipeline that + // has all fields other than `pipelineId` populated, and then returns + // the same pipeline with `pipelineId` populated. This id can be used + // to run the pipeline. + // + // Caller must have WRITE permission to the project. + rpc CreatePipeline(CreatePipelineRequest) returns (Pipeline) { + option (google.api.http) = { + post: "/v1alpha2/pipelines" + body: "pipeline" + }; + } + + // Runs a pipeline. If `pipelineId` is specified in the request, then + // run a saved pipeline. If `ephemeralPipeline` is specified, then run + // that pipeline once without saving a copy. + // + // The caller must have READ permission to the project where the pipeline + // is stored and WRITE permission to the project where the pipeline will be + // run, as VMs will be created and storage will be used. + rpc RunPipeline(RunPipelineRequest) returns (google.longrunning.Operation) { + option (google.api.http) = { + post: "/v1alpha2/pipelines:run" + body: "*" + }; + } + + // Retrieves a pipeline based on ID. + // + // Caller must have READ permission to the project. + rpc GetPipeline(GetPipelineRequest) returns (Pipeline) { + option (google.api.http) = { + get: "/v1alpha2/pipelines/{pipeline_id}" + }; + } + + // Lists pipelines. + // + // Caller must have READ permission to the project. + rpc ListPipelines(ListPipelinesRequest) returns (ListPipelinesResponse) { + option (google.api.http) = { + get: "/v1alpha2/pipelines" + }; + } + + // Deletes a pipeline based on ID. + // + // Caller must have WRITE permission to the project. + rpc DeletePipeline(DeletePipelineRequest) returns (google.protobuf.Empty) { + option (google.api.http) = { + delete: "/v1alpha2/pipelines/{pipeline_id}" + }; + } + + // Gets controller configuration information. Should only be called + // by VMs created by the Pipelines Service and not by end users. + rpc GetControllerConfig(GetControllerConfigRequest) + returns (ControllerConfig) { + option (google.api.http) = { + get: "/v1alpha2/pipelines:getControllerConfig" + }; + } + + // Sets status of a given operation. Any new timestamps (as determined by + // description) are appended to TimestampEvents. Should only be called by VMs + // created by the Pipelines Service and not by end users. + rpc SetOperationStatus(SetOperationStatusRequest) + returns (google.protobuf.Empty) { + option (google.api.http) = { + put: "/v1alpha2/pipelines:setOperationStatus" + body: "*" + }; + } +} + +// Describes a Compute Engine resource that is being managed by a running +// [pipeline][google.genomics.v1alpha2.Pipeline]. +message ComputeEngine { + // The instance on which the operation is running. + string instance_name = 1; + + // The availability zone in which the instance resides. + string zone = 2; + + // The machine type of the instance. + string machine_type = 3; + + // The names of the disks that were created for this pipeline. + repeated string disk_names = 4; +} + +// Runtime metadata that will be populated in the +// [runtimeMetadata][google.genomics.v1.OperationMetadata.runtime_metadata] +// field of the Operation associated with a RunPipeline execution. +message RuntimeMetadata { + // Execution information specific to Google Compute Engine. + ComputeEngine compute_engine = 1; +} + +// The pipeline object. Represents a transformation from a set of input +// parameters to a set of output parameters. The transformation is defined +// as a docker image and command to run within that image. Each pipeline +// is run on a Google Compute Engine VM. A pipeline can be created with the +// `create` method and then later run with the `run` method, or a pipeline can +// be defined and run all at once with the `run` method. +message Pipeline { + // Required. The project in which to create the pipeline. The caller must have + // WRITE access. + string project_id = 1; + + // Required. A user specified pipeline name that does not have to be unique. + // This name can be used for filtering Pipelines in ListPipelines. + string name = 2; + + // User-specified description. + string description = 3; + + // Input parameters of the pipeline. + repeated PipelineParameter input_parameters = 8; + + // Output parameters of the pipeline. + repeated PipelineParameter output_parameters = 9; + + // Required. The executor indicates in which environment the pipeline runs. + oneof executor { + // Specifies the docker run information. + DockerExecutor docker = 5; + } + + // Required. Specifies resource requirements for the pipeline run. + // Required fields: + // + // * + // [minimumCpuCores][google.genomics.v1alpha2.PipelineResources.minimum_cpu_cores] + // + // * + // [minimumRamGb][google.genomics.v1alpha2.PipelineResources.minimum_ram_gb] + PipelineResources resources = 6; + + // Unique pipeline id that is generated by the service when CreatePipeline + // is called. Cannot be specified in the Pipeline used in the + // CreatePipelineRequest, and will be populated in the response to + // CreatePipeline and all subsequent Get and List calls. Indicates that the + // service has registered this pipeline. + string pipeline_id = 7; +} + +// The request to create a pipeline. The pipeline field here should not have +// `pipelineId` populated, as that will be populated by the server. +message CreatePipelineRequest { + // The pipeline to create. Should not have `pipelineId` populated. + Pipeline pipeline = 1; +} + +// The pipeline run arguments. +message RunPipelineArgs { + // Required. The project in which to run the pipeline. The caller must have + // WRITER access to all Google Cloud services and resources (e.g. Google + // Compute Engine) will be used. + string project_id = 1; + + // Pipeline input arguments; keys are defined in the pipeline documentation. + // All input parameters that do not have default values must be specified. + // If parameters with defaults are specified here, the defaults will be + // overridden. + map<string, string> inputs = 2; + + // Pipeline output arguments; keys are defined in the pipeline + // documentation. All output parameters of without default values + // must be specified. If parameters with defaults are specified + // here, the defaults will be overridden. + map<string, string> outputs = 3; + + // The Google Cloud Service Account that will be used to access data and + // services. By default, the compute service account associated with + // `projectId` is used. + ServiceAccount service_account = 4; + + // This field is deprecated. Use `labels` instead. Client-specified pipeline + // operation identifier. + string client_id = 5; + + // Specifies resource requirements/overrides for the pipeline run. + PipelineResources resources = 6; + + // Required. Logging options. Used by the service to communicate results + // to the user. + LoggingOptions logging = 7; + + // How long to keep the VM up after a failure (for example docker command + // failed, copying input or output files failed, etc). While the VM is up, one + // can ssh into the VM to debug. Default is 0; maximum allowed value is 1 day. + google.protobuf.Duration keep_vm_alive_on_failure_duration = 8; + + // Labels to apply to this pipeline run. Labels will also be applied to + // compute resources (VM, disks) created by this pipeline run. When listing + // operations, operations can [filtered by labels] + // [google.longrunning.ListOperationsRequest.filter]. + // Label keys may not be empty; label values may be empty. Non-empty labels + // must be 1-63 characters long, and comply with [RFC1035] + // (https://www.ietf.org/rfc/rfc1035.txt). + // Specifically, the name must be 1-63 characters long and match the regular + // expression `[a-z]([-a-z0-9]*[a-z0-9])?` which means the first + // character must be a lowercase letter, and all following characters must be + // a dash, lowercase letter, or digit, except the last character, which cannot + // be a dash. + map<string, string> labels = 9; +} + +// The request to run a pipeline. If `pipelineId` is specified, it +// refers to a saved pipeline created with CreatePipeline and set as +// the `pipelineId` of the returned Pipeline object. If +// `ephemeralPipeline` is specified, that pipeline is run once +// with the given args and not saved. It is an error to specify both +// `pipelineId` and `ephemeralPipeline`. `pipelineArgs` +// must be specified. +message RunPipelineRequest { + oneof pipeline { + // The already created pipeline to run. + string pipeline_id = 1; + + // A new pipeline object to run once and then delete. + Pipeline ephemeral_pipeline = 2; + } + + // The arguments to use when running this pipeline. + RunPipelineArgs pipeline_args = 3; +} + +// A request to get a saved pipeline by id. +message GetPipelineRequest { + // Caller must have READ access to the project in which this pipeline + // is defined. + string pipeline_id = 1; +} + +// A request to list pipelines in a given project. Pipelines can be +// filtered by name using `namePrefix`: all pipelines with names that +// begin with `namePrefix` will be returned. Uses standard pagination: +// `pageSize` indicates how many pipelines to return, and +// `pageToken` comes from a previous ListPipelinesResponse to +// indicate offset. +message ListPipelinesRequest { + // Required. The name of the project to search for pipelines. Caller + // must have READ access to this project. + string project_id = 1; + + // Pipelines with names that match this prefix should be + // returned. If unspecified, all pipelines in the project, up to + // `pageSize`, will be returned. + string name_prefix = 2; + + // Number of pipelines to return at once. Defaults to 256, and max + // is 2048. + int32 page_size = 3; + + // Token to use to indicate where to start getting results. + // If unspecified, returns the first page of results. + string page_token = 4; +} + +// The response of ListPipelines. Contains at most `pageSize` +// pipelines. If it contains `pageSize` pipelines, and more pipelines +// exist, then `nextPageToken` will be populated and should be +// used as the `pageToken` argument to a subsequent ListPipelines +// request. +message ListPipelinesResponse { + // The matched pipelines. + repeated Pipeline pipelines = 1; + + // The token to use to get the next page of results. + string next_page_token = 2; +} + +// The request to delete a saved pipeline by ID. +message DeletePipelineRequest { + // Caller must have WRITE access to the project in which this pipeline + // is defined. + string pipeline_id = 1; +} + +// Request to get controller configuation. Should only be used +// by VMs created by the Pipelines Service and not by end users. +message GetControllerConfigRequest { + // The operation to retrieve controller configuration for. + string operation_id = 1; + + uint64 validation_token = 2; +} + +// Stores the information that the controller will fetch from the +// server in order to run. Should only be used by VMs created by the +// Pipelines Service and not by end users. +message ControllerConfig { + message RepeatedString { + repeated string values = 1; + } + + string image = 1; + + string cmd = 2; + + string gcs_log_path = 3; + + string machine_type = 4; + + map<string, string> vars = 5; + + map<string, string> disks = 6; + + map<string, RepeatedString> gcs_sources = 7; + + map<string, RepeatedString> gcs_sinks = 8; +} + +// Stores the list of events and times they occured for major events in job +// execution. +message TimestampEvent { + // String indicating the type of event + string description = 1; + + // The time this event occured. + google.protobuf.Timestamp timestamp = 2; +} + +// Request to set operation status. Should only be used by VMs +// created by the Pipelines Service and not by end users. +message SetOperationStatusRequest { + string operation_id = 1; + + repeated TimestampEvent timestamp_events = 2; + + google.rpc.Code error_code = 3; + + string error_message = 4; + + uint64 validation_token = 5; +} + +// A Google Cloud Service Account. +message ServiceAccount { + // Email address of the service account. Defaults to `default`, + // which uses the compute service account associated with the project. + string email = 1; + + // List of scopes to be enabled for this service account on the VM. + // The following scopes are automatically included: + // + // * https://www.googleapis.com/auth/compute + // * https://www.googleapis.com/auth/devstorage.full_control + // * https://www.googleapis.com/auth/genomics + // * https://www.googleapis.com/auth/logging.write + // * https://www.googleapis.com/auth/monitoring.write + repeated string scopes = 2; +} + +// The logging options for the pipeline run. +message LoggingOptions { + // The location in Google Cloud Storage to which the pipeline logs + // will be copied. Can be specified as a fully qualified directory + // path, in which case logs will be output with a unique identifier + // as the filename in that directory, or as a fully specified path, + // which must end in `.log`, in which case that path will be + // used, and the user must ensure that logs are not + // overwritten. Stdout and stderr logs from the run are also + // generated and output as `-stdout.log` and `-stderr.log`. + string gcs_path = 1; +} + +// The system resources for the pipeline run. +message PipelineResources { + // A Google Compute Engine disk resource specification. + message Disk { + // The types of disks that may be attached to VMs. + enum Type { + // Default disk type. Use one of the other options below. + TYPE_UNSPECIFIED = 0; + + // Specifies a Google Compute Engine persistent hard disk. See + // https://cloud.google.com/compute/docs/disks/#pdspecs for details. + PERSISTENT_HDD = 1; + + // Specifies a Google Compute Engine persistent solid-state disk. See + // https://cloud.google.com/compute/docs/disks/#pdspecs for details. + PERSISTENT_SSD = 2; + + // Specifies a Google Compute Engine local SSD. + // See https://cloud.google.com/compute/docs/disks/local-ssd for details. + LOCAL_SSD = 3; + } + + // Required. The name of the disk that can be used in the pipeline + // parameters. Must be 1 - 63 characters. + // The name "boot" is reserved for system use. + string name = 1; + + // Required. The type of the disk to create. + Type type = 2; + + // The size of the disk. Defaults to 500 (GB). + // This field is not applicable for local SSD. + int32 size_gb = 3; + + // The full or partial URL of the persistent disk to attach. See + // https://cloud.google.com/compute/docs/reference/latest/instances#resource + // and + // https://cloud.google.com/compute/docs/disks/persistent-disks#snapshots + // for more details. + string source = 4; + + // Deprecated. Disks created by the Pipelines API will be deleted at the end + // of the pipeline run, regardless of what this field is set to. + bool auto_delete = 6; + + // Required at create time and cannot be overridden at run time. + // Specifies the path in the docker container where files on + // this disk should be located. For example, if `mountPoint` + // is `/mnt/disk`, and the parameter has `localPath` + // `inputs/file.txt`, the docker container can access the data at + // `/mnt/disk/inputs/file.txt`. + string mount_point = 8; + } + + // The minimum number of cores to use. Defaults to 1. + int32 minimum_cpu_cores = 1; + + // Whether to use preemptible VMs. Defaults to `false`. In order to use this, + // must be true for both create time and run time. Cannot be true at run time + // if false at create time. + bool preemptible = 2; + + // The minimum amount of RAM to use. Defaults to 3.75 (GB) + double minimum_ram_gb = 3; + + // Disks to attach. + repeated Disk disks = 4; + + // List of Google Compute Engine availability zones to which resource + // creation will restricted. If empty, any zone may be chosen. + repeated string zones = 5; + + // The size of the boot disk. Defaults to 10 (GB). + int32 boot_disk_size_gb = 6; + + // Whether to assign an external IP to the instance. This is an experimental + // feature that may go away. Defaults to false. + // Corresponds to `--no_address` flag for [gcloud compute instances create] + // (https://cloud.google.com/sdk/gcloud/reference/compute/instances/create). + // In order to use this, must be true for both create time and run time. + // Cannot be true at run time if false at create time. If you need to ssh into + // a private IP VM for debugging, you can ssh to a public VM and then ssh into + // the private VM's Internal IP. If noAddress is set, this pipeline run may + // only load docker images from Google Container Registry and not Docker Hub. + // ** Note: To use this option, your project must be in Google Access for + // Private IPs Early Access Program.** + bool no_address = 7; +} + +// Parameters facilitate setting and delivering data into the +// pipeline's execution environment. They are defined at create time, +// with optional defaults, and can be overridden at run time. +// +// If `localCopy` is unset, then the parameter specifies a string that +// is passed as-is into the pipeline, as the value of the environment +// variable with the given name. A default value can be optionally +// specified at create time. The default can be overridden at run time +// using the inputs map. If no default is given, a value must be +// supplied at runtime. +// +// If `localCopy` is defined, then the parameter specifies a data +// source or sink, both in Google Cloud Storage and on the Docker container +// where the pipeline computation is run. The [service account associated with +// the Pipeline][google.genomics.v1alpha2.RunPipelineArgs.service_account] (by +// default the project's Compute Engine service account) must have access to the +// Google Cloud Storage paths. +// +// At run time, the Google Cloud Storage paths can be overridden if a default +// was provided at create time, or must be set otherwise. The pipeline runner +// should add a key/value pair to either the inputs or outputs map. The +// indicated data copies will be carried out before/after pipeline execution, +// just as if the corresponding arguments were provided to `gsutil cp`. +// +// For example: Given the following `PipelineParameter`, specified +// in the `inputParameters` list: +// +// ``` +// {name: "input_file", localCopy: {path: "file.txt", disk: "pd1"}} +// ``` +// +// where `disk` is defined in the `PipelineResources` object as: +// +// ``` +// {name: "pd1", mountPoint: "/mnt/disk/"} +// ``` +// +// We create a disk named `pd1`, mount it on the host VM, and map +// `/mnt/pd1` to `/mnt/disk` in the docker container. At +// runtime, an entry for `input_file` would be required in the inputs +// map, such as: +// +// ``` +// inputs["input_file"] = "gs://my-bucket/bar.txt" +// ``` +// +// This would generate the following gsutil call: +// +// ``` +// gsutil cp gs://my-bucket/bar.txt /mnt/pd1/file.txt +// ``` +// +// The file `/mnt/pd1/file.txt` maps to `/mnt/disk/file.txt` in the +// Docker container. Acceptable paths are: +// +// <table> +// <thead> +// <tr><th>Google Cloud storage path</th><th>Local path</th></tr> +// </thead> +// <tbody> +// <tr><td>file</td><td>file</td></tr> +// <tr><td>glob</td><td>directory</td></tr> +// </tbody> +// </table> +// +// For outputs, the direction of the copy is reversed: +// +// ``` +// gsutil cp /mnt/disk/file.txt gs://my-bucket/bar.txt +// ``` +// +// Acceptable paths are: +// +// <table> +// <thead> +// <tr><th>Local path</th><th>Google Cloud Storage path</th></tr> +// </thead> +// <tbody> +// <tr><td>file</td><td>file</td></tr> +// <tr> +// <td>file</td> +// <td>directory - directory must already exist</td> +// </tr> +// <tr> +// <td>glob</td> +// <td>directory - directory will be created if it doesn't exist</td></tr> +// </tbody> +// </table> +// +// One restriction due to docker limitations, is that for outputs that are found +// on the boot disk, the local path cannot be a glob and must be a file. +message PipelineParameter { + // LocalCopy defines how a remote file should be copied to and from the VM. + message LocalCopy { + // Required. The path within the user's docker container where + // this input should be localized to and from, relative to the specified + // disk's mount point. For example: file.txt, + string path = 1; + + // Required. The name of the disk where this parameter is + // located. Can be the name of one of the disks specified in the + // Resources field, or "boot", which represents the Docker + // instance's boot disk and has a mount point of `/`. + string disk = 2; + } + + // Required. Name of the parameter - the pipeline runner uses this string + // as the key to the input and output maps in RunPipeline. + string name = 1; + + // Human-readable description. + string description = 2; + + // The default value for this parameter. Can be overridden at runtime. + // If `localCopy` is present, then this must be a Google Cloud Storage path + // beginning with `gs://`. + string default_value = 5; + + // If present, this parameter is marked for copying to and from the VM. + // `LocalCopy` indicates where on the VM the file should be. The value + // given to this parameter (either at runtime or using `defaultValue`) + // must be the remote path where the file should be. + LocalCopy local_copy = 6; +} + +// The Docker execuctor specification. +message DockerExecutor { + // Required. Image name from either Docker Hub or Google Container Registry. + // Users that run pipelines must have READ access to the image. + string image_name = 1; + + // Required. The command or newline delimited script to run. The command + // string will be executed within a bash shell. + // + // If the command exits with a non-zero exit code, output parameter + // de-localization will be skipped and the pipeline operation's + // [`error`][google.longrunning.Operation.error] field will be populated. + // + // Maximum command string length is 16384. + string cmd = 2; +} |