diff options
Diffstat (limited to 'google/cloud/videointelligence/v1p3beta1/video_intelligence.proto')
-rw-r--r-- | google/cloud/videointelligence/v1p3beta1/video_intelligence.proto | 627 |
1 files changed, 627 insertions, 0 deletions
diff --git a/google/cloud/videointelligence/v1p3beta1/video_intelligence.proto b/google/cloud/videointelligence/v1p3beta1/video_intelligence.proto new file mode 100644 index 000000000..dc65a651e --- /dev/null +++ b/google/cloud/videointelligence/v1p3beta1/video_intelligence.proto @@ -0,0 +1,627 @@ +// Copyright 2018 Google LLC. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +syntax = "proto3"; + +package google.cloud.videointelligence.v1p3beta1; + +import "google/api/annotations.proto"; +import "google/longrunning/operations.proto"; +import "google/protobuf/duration.proto"; +import "google/protobuf/timestamp.proto"; +import "google/rpc/status.proto"; + +option csharp_namespace = "Google.Cloud.VideoIntelligence.V1P3Beta1"; +option go_package = "google.golang.org/genproto/googleapis/cloud/videointelligence/v1p3beta1;videointelligence"; +option java_multiple_files = true; +option java_outer_classname = "VideoIntelligenceServiceProto"; +option java_package = "com.google.cloud.videointelligence.v1p3beta1"; +option php_namespace = "Google\\Cloud\\VideoIntelligence\\V1p3beta1"; + +// Service that implements Google Cloud Video Intelligence API. +service VideoIntelligenceService { + // Performs asynchronous video annotation. Progress and results can be + // retrieved through the `google.longrunning.Operations` interface. + // `Operation.metadata` contains `AnnotateVideoProgress` (progress). + // `Operation.response` contains `AnnotateVideoResponse` (results). + rpc AnnotateVideo(AnnotateVideoRequest) + returns (google.longrunning.Operation) { + option (google.api.http) = { + post: "/v1p3beta1/videos:annotate" + body: "*" + }; + } +} + +// Service that implements Google Cloud Video Intelligence Streaming API. +service StreamingVideoIntelligenceService { + // Performs video annotation with bidirectional streaming: emitting results + // while sending video/audio bytes. + // This method is only available via the gRPC API (not REST). + rpc StreamingAnnotateVideo(stream StreamingAnnotateVideoRequest) + returns (stream StreamingAnnotateVideoResponse); +} + +// Video annotation request. +message AnnotateVideoRequest { + // Input video location. Currently, only + // [Google Cloud Storage](https://cloud.google.com/storage/) URIs are + // supported, which must be specified in the following format: + // `gs://bucket-id/object-id` (other URI formats return + // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For + // more information, see [Request URIs](/storage/docs/reference-uris). A video + // URI may include wildcards in `object-id`, and thus identify multiple + // videos. Supported wildcards: '*' to match 0 or more characters; + // '?' to match 1 character. If unset, the input video should be embedded + // in the request as `input_content`. If set, `input_content` should be unset. + string input_uri = 1; + + // The video data bytes. + // If unset, the input video(s) should be specified via `input_uri`. + // If set, `input_uri` should be unset. + bytes input_content = 6; + + // Requested video annotation features. + repeated Feature features = 2; + + // Additional video context and/or feature-specific parameters. + VideoContext video_context = 3; + + // Optional location where the output (in JSON format) should be stored. + // Currently, only [Google Cloud Storage](https://cloud.google.com/storage/) + // URIs are supported, which must be specified in the following format: + // `gs://bucket-id/object-id` (other URI formats return + // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For + // more information, see [Request URIs](/storage/docs/reference-uris). + string output_uri = 4; + + // Optional cloud region where annotation should take place. Supported cloud + // regions: `us-east1`, `us-west1`, `europe-west1`, `asia-east1`. If no region + // is specified, a region will be determined based on video file location. + string location_id = 5; +} + +// Video context and/or feature-specific parameters. +message VideoContext { + // Video segments to annotate. The segments may overlap and are not required + // to be contiguous or span the whole video. If unspecified, each video is + // treated as a single segment. + repeated VideoSegment segments = 1; + + // Config for LABEL_DETECTION. + LabelDetectionConfig label_detection_config = 2; + + // Config for SHOT_CHANGE_DETECTION. + ShotChangeDetectionConfig shot_change_detection_config = 3; + + // Config for EXPLICIT_CONTENT_DETECTION. + ExplicitContentDetectionConfig explicit_content_detection_config = 4; + + // Config for TEXT_DETECTION. + TextDetectionConfig text_detection_config = 8; +} + +// Config for LABEL_DETECTION. +message LabelDetectionConfig { + // What labels should be detected with LABEL_DETECTION, in addition to + // video-level labels or segment-level labels. + // If unspecified, defaults to `SHOT_MODE`. + LabelDetectionMode label_detection_mode = 1; + + // Whether the video has been shot from a stationary (i.e. non-moving) camera. + // When set to true, might improve detection accuracy for moving objects. + // Should be used with `SHOT_AND_FRAME_MODE` enabled. + bool stationary_camera = 2; + + // Model to use for label detection. + // Supported values: "builtin/stable" (the default if unset) and + // "builtin/latest". + string model = 3; +} + +// Config for SHOT_CHANGE_DETECTION. +message ShotChangeDetectionConfig { + // Model to use for shot change detection. + // Supported values: "builtin/stable" (the default if unset) and + // "builtin/latest". + string model = 1; +} + +// Config for EXPLICIT_CONTENT_DETECTION. +message ExplicitContentDetectionConfig { + // Model to use for explicit content detection. + // Supported values: "builtin/stable" (the default if unset) and + // "builtin/latest". + string model = 1; +} + +// Config for TEXT_DETECTION. +message TextDetectionConfig { + // Language hint can be specified if the language to be detected is known a + // priori. It can increase the accuracy of the detection. Language hint must + // be language code in BCP-47 format. + // + // Automatic language detection is performed if no hint is provided. + repeated string language_hints = 1; +} + +// Video segment. +message VideoSegment { + // Time-offset, relative to the beginning of the video, + // corresponding to the start of the segment (inclusive). + google.protobuf.Duration start_time_offset = 1; + + // Time-offset, relative to the beginning of the video, + // corresponding to the end of the segment (inclusive). + google.protobuf.Duration end_time_offset = 2; +} + +// Video segment level annotation results for label detection. +message LabelSegment { + // Video segment where a label was detected. + VideoSegment segment = 1; + + // Confidence that the label is accurate. Range: [0, 1]. + float confidence = 2; +} + +// Video frame level annotation results for label detection. +message LabelFrame { + // Time-offset, relative to the beginning of the video, corresponding to the + // video frame for this location. + google.protobuf.Duration time_offset = 1; + + // Confidence that the label is accurate. Range: [0, 1]. + float confidence = 2; +} + +// Detected entity from video analysis. +message Entity { + // Opaque entity ID. Some IDs may be available in + // [Google Knowledge Graph Search + // API](https://developers.google.com/knowledge-graph/). + string entity_id = 1; + + // Textual description, e.g. `Fixed-gear bicycle`. + string description = 2; + + // Language code for `description` in BCP-47 format. + string language_code = 3; +} + +// Label annotation. +message LabelAnnotation { + // Detected entity. + Entity entity = 1; + + // Common categories for the detected entity. + // E.g. when the label is `Terrier` the category is likely `dog`. And in some + // cases there might be more than one categories e.g. `Terrier` could also be + // a `pet`. + repeated Entity category_entities = 2; + + // All video segments where a label was detected. + repeated LabelSegment segments = 3; + + // All video frames where a label was detected. + repeated LabelFrame frames = 4; +} + +// Video frame level annotation results for explicit content. +message ExplicitContentFrame { + // Time-offset, relative to the beginning of the video, corresponding to the + // video frame for this location. + google.protobuf.Duration time_offset = 1; + + // Likelihood of the pornography content.. + Likelihood pornography_likelihood = 2; +} + +// Explicit content annotation (based on per-frame visual signals only). +// If no explicit content has been detected in a frame, no annotations are +// present for that frame. +message ExplicitContentAnnotation { + // All video frames where explicit content was detected. + repeated ExplicitContentFrame frames = 1; +} + +// Normalized bounding box. +// The normalized vertex coordinates are relative to the original image. +// Range: [0, 1]. +message NormalizedBoundingBox { + // Left X coordinate. + float left = 1; + + // Top Y coordinate. + float top = 2; + + // Right X coordinate. + float right = 3; + + // Bottom Y coordinate. + float bottom = 4; +} + +// Annotation results for a single video. +message VideoAnnotationResults { + // Video file location in + // [Google Cloud Storage](https://cloud.google.com/storage/). + string input_uri = 1; + + // Label annotations on video level or user specified segment level. + // There is exactly one element for each unique label. + repeated LabelAnnotation segment_label_annotations = 2; + + // Label annotations on shot level. + // There is exactly one element for each unique label. + repeated LabelAnnotation shot_label_annotations = 3; + + // Label annotations on frame level. + // There is exactly one element for each unique label. + repeated LabelAnnotation frame_label_annotations = 4; + + // Shot annotations. Each shot is represented as a video segment. + repeated VideoSegment shot_annotations = 6; + + // Explicit content annotation. + ExplicitContentAnnotation explicit_annotation = 7; + + // OCR text detection and tracking. + // Annotations for list of detected text snippets. Each will have list of + // frame information associated with it. + repeated TextAnnotation text_annotations = 12; + + // Annotations for list of objects detected and tracked in video. + repeated ObjectTrackingAnnotation object_annotations = 14; + + // If set, indicates an error. Note that for a single `AnnotateVideoRequest` + // some videos may succeed and some may fail. + google.rpc.Status error = 9; +} + +// Video annotation response. Included in the `response` +// field of the `Operation` returned by the `GetOperation` +// call of the `google::longrunning::Operations` service. +message AnnotateVideoResponse { + // Annotation results for all videos specified in `AnnotateVideoRequest`. + repeated VideoAnnotationResults annotation_results = 1; +} + +// Annotation progress for a single video. +message VideoAnnotationProgress { + // Video file location in + // [Google Cloud Storage](https://cloud.google.com/storage/). + string input_uri = 1; + + // Approximate percentage processed thus far. Guaranteed to be + // 100 when fully processed. + int32 progress_percent = 2; + + // Time when the request was received. + google.protobuf.Timestamp start_time = 3; + + // Time of the most recent update. + google.protobuf.Timestamp update_time = 4; +} + +// Video annotation progress. Included in the `metadata` +// field of the `Operation` returned by the `GetOperation` +// call of the `google::longrunning::Operations` service. +message AnnotateVideoProgress { + // Progress metadata for all videos specified in `AnnotateVideoRequest`. + repeated VideoAnnotationProgress annotation_progress = 1; +} + +// A vertex represents a 2D point in the image. +// NOTE: the normalized vertex coordinates are relative to the original image +// and range from 0 to 1. +message NormalizedVertex { + // X coordinate. + float x = 1; + + // Y coordinate. + float y = 2; +} + +// Normalized bounding polygon for text (that might not be aligned with axis). +// Contains list of the corner points in clockwise order starting from +// top-left corner. For example, for a rectangular bounding box: +// When the text is horizontal it might look like: +// 0----1 +// | | +// 3----2 +// +// When it's clockwise rotated 180 degrees around the top-left corner it +// becomes: +// 2----3 +// | | +// 1----0 +// +// and the vertex order will still be (0, 1, 2, 3). Note that values can be less +// than 0, or greater than 1 due to trignometric calculations for location of +// the box. +message NormalizedBoundingPoly { + // Normalized vertices of the bounding polygon. + repeated NormalizedVertex vertices = 1; +} + +// Video segment level annotation results for text detection. +message TextSegment { + // Video segment where a text snippet was detected. + VideoSegment segment = 1; + + // Confidence for the track of detected text. It is calculated as the highest + // over all frames where OCR detected text appears. + float confidence = 2; + + // Information related to the frames where OCR detected text appears. + repeated TextFrame frames = 3; +} + +// Video frame level annotation results for text annotation (OCR). +// Contains information regarding timestamp and bounding box locations for the +// frames containing detected OCR text snippets. +message TextFrame { + // Bounding polygon of the detected text for this frame. + NormalizedBoundingPoly rotated_bounding_box = 1; + + // Timestamp of this frame. + google.protobuf.Duration time_offset = 2; +} + +// Annotations related to one detected OCR text snippet. This will contain the +// corresponding text, confidence value, and frame level information for each +// detection. +message TextAnnotation { + // The detected text. + string text = 1; + + // All video segments where OCR detected text appears. + repeated TextSegment segments = 2; +} + +// Video frame level annotations for object detection and tracking. This field +// stores per frame location, time offset, and confidence. +message ObjectTrackingFrame { + // The normalized bounding box location of this object track for the frame. + NormalizedBoundingBox normalized_bounding_box = 1; + + // The timestamp of the frame in microseconds. + google.protobuf.Duration time_offset = 2; +} + +// Annotations corresponding to one tracked object. +message ObjectTrackingAnnotation { + // Entity to specify the object category that this track is labeled as. + Entity entity = 1; + + // Object category's labeling confidence of this track. + float confidence = 4; + + // Information corresponding to all frames where this object track appears. + // Non-streaming batch mode: it may be one or multiple ObjectTrackingFrame + // messages in frames. + // Streaming mode: it can only be one ObjectTrackingFrame message in frames. + repeated ObjectTrackingFrame frames = 2; + + // Different representation of tracking info in non-streaming batch + // and streaming modes. + oneof track_info { + // Non-streaming batch mode ONLY. + // Each object track corresponds to one video segment where it appears. + VideoSegment segment = 3; + // Streaming mode ONLY. + // In streaming mode, we do not know the end time of a tracked object + // before it is completed. Hence, there is no VideoSegment info returned. + // Instead, we provide a unique identifiable integer track_id so that + // the customers can correlate the results of the ongoing + // ObjectTrackAnnotation of the same track_id over time. + int64 track_id = 5; + } +} + +// The top-level message sent by the client for the `StreamingAnnotateVideo` +// method. Multiple `StreamingAnnotateVideoRequest` messages are sent. +// The first message must only contain a `StreamingVideoConfig` message. +// All subsequent messages must only contain `input_content` data. +message StreamingAnnotateVideoRequest { + // *Required* The streaming request, which is either a streaming config or + // video content. + oneof streaming_request { + // Provides information to the annotator, specifing how to process the + // request. The first `AnnotateStreamingVideoRequest` message must only + // contain a `video_config` message. + StreamingVideoConfig video_config = 1; + + // The video data to be annotated. Chunks of video data are sequentially + // sent in `StreamingAnnotateVideoRequest` messages. Except the initial + // `StreamingAnnotateVideoRequest` message containing only + // `video_config`, all subsequent `AnnotateStreamingVideoRequest` + // messages must only contain `input_content` field. + bytes input_content = 2; + } +} + +// `StreamingAnnotateVideoResponse` is the only message returned to the client +// by `StreamingAnnotateVideo`. A series of zero or more +// `StreamingAnnotateVideoResponse` messages are streamed back to the client. +message StreamingAnnotateVideoResponse { + // If set, returns a [google.rpc.Status][] message that + // specifies the error for the operation. + google.rpc.Status error = 1; + + // Streaming annotation results. + StreamingVideoAnnotationResults annotation_results = 2; + + // GCS URI that stores annotation results of one streaming session. + // It is a directory that can hold multiple files in JSON format. + // Example uri format: + // gs://bucket_id/object_id/cloud_project_name-session_id + string annotation_results_uri = 3; +} + +// Config for EXPLICIT_CONTENT_DETECTION in streaming mode. +message StreamingExplicitContentDetectionConfig { + // No customized config support. +} + +// Config for LABEL_DETECTION in streaming mode. +message StreamingLabelDetectionConfig { + // Whether the video has been captured from a stationary (i.e. non-moving) + // camera. When set to true, might improve detection accuracy for moving + // objects. Default: false. + bool stationary_camera = 1; +} + +// Config for STREAMING_OBJECT_TRACKING. +message StreamingObjectTrackingConfig { + // No customized config support. +} + +// Config for SHOT_CHANGE_DETECTION in streaming mode. +message StreamingShotChangeDetectionConfig { + // No customized config support. +} + +// Config for streaming storage option. +message StreamingStorageConfig { + // Enable streaming storage. Default: false. + bool enable_storage_annotation_result = 1; + + // GCS URI to store all annotation results for one client. Client should + // specify this field as the top-level storage directory. Annotation results + // of different sessions will be put into different sub-directories denoted + // by project_name and session_id. All sub-directories will be auto generated + // by program and will be made accessible to client in response proto. + // URIs must be specified in the following format: `gs://bucket-id/object-id` + // `bucket-id` should be a valid GCS bucket created by client and bucket + // permission shall also be configured properly. `object-id` can be arbitrary + // string that make sense to client. Other URI formats will return error and + // cause GCS write failure. + string annotation_result_storage_directory = 3; +} + +// Streaming annotation results corresponding to a portion of the video +// that is currently being processed. +message StreamingVideoAnnotationResults { + // Shot annotation results. Each shot is represented as a video segment. + repeated VideoSegment shot_annotations = 1; + + // Label annotation results. + repeated LabelAnnotation label_annotations = 2; + + // Explicit content detection results. + ExplicitContentAnnotation explicit_annotation = 3; + + // Object tracking results. + repeated ObjectTrackingAnnotation object_annotations = 4; +} + +// Provides information to the annotator that specifies how to process the +// request. +message StreamingVideoConfig { + // Requested annotation feature. + StreamingFeature feature = 1; + + // Config for requested annotation feature. + oneof streaming_config { + // Config for SHOT_CHANGE_DETECTION. + StreamingShotChangeDetectionConfig shot_change_detection_config = 2; + + // Config for LABEL_DETECTION. + StreamingLabelDetectionConfig label_detection_config = 3; + + // Config for STREAMING_EXPLICIT_CONTENT_DETECTION. + StreamingExplicitContentDetectionConfig explicit_content_detection_config = + 4; + + // Config for STREAMING_OBJECT_TRACKING. + StreamingObjectTrackingConfig object_tracking_config = 5; + } + + // Streaming storage option. By default: storage is disabled. + StreamingStorageConfig storage_config = 30; +} + +// Video annotation feature. +enum Feature { + // Unspecified. + FEATURE_UNSPECIFIED = 0; + + // Label detection. Detect objects, such as dog or flower. + LABEL_DETECTION = 1; + + // Shot change detection. + SHOT_CHANGE_DETECTION = 2; + + // Explicit content detection. + EXPLICIT_CONTENT_DETECTION = 3; + + // OCR text detection and tracking. + TEXT_DETECTION = 7; + + // Object detection and tracking. + OBJECT_TRACKING = 9; +} + +// Label detection mode. +enum LabelDetectionMode { + // Unspecified. + LABEL_DETECTION_MODE_UNSPECIFIED = 0; + + // Detect shot-level labels. + SHOT_MODE = 1; + + // Detect frame-level labels. + FRAME_MODE = 2; + + // Detect both shot-level and frame-level labels. + SHOT_AND_FRAME_MODE = 3; +} + +// Bucketized representation of likelihood. +enum Likelihood { + // Unspecified likelihood. + LIKELIHOOD_UNSPECIFIED = 0; + + // Very unlikely. + VERY_UNLIKELY = 1; + + // Unlikely. + UNLIKELY = 2; + + // Possible. + POSSIBLE = 3; + + // Likely. + LIKELY = 4; + + // Very likely. + VERY_LIKELY = 5; +} + +// Streaming video annotation feature. +enum StreamingFeature { + // Unspecified. + STREAMING_FEATURE_UNSPECIFIED = 0; + // Label detection. Detect objects, such as dog or flower. + STREAMING_LABEL_DETECTION = 1; + // Shot change detection. + STREAMING_SHOT_CHANGE_DETECTION = 2; + // Explicit content detection. + STREAMING_EXPLICIT_CONTENT_DETECTION = 3; + // Object detection and tracking. + STREAMING_OBJECT_TRACKING = 4; +} |