aboutsummaryrefslogtreecommitdiff
path: root/google/cloud/datalabeling/v1beta1/annotation.proto
blob: 204ad8e3d87421f4a6d0944306657daf06a25f73 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
// Copyright 2018 Google LLC.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//

syntax = "proto3";

package google.cloud.datalabeling.v1beta1;

import "google/cloud/datalabeling/v1beta1/annotation_spec_set.proto";
import "google/protobuf/duration.proto";
import "google/protobuf/struct.proto";
import "google/protobuf/timestamp.proto";
import "google/api/annotations.proto";

option go_package = "google.golang.org/genproto/googleapis/cloud/datalabeling/v1beta1;datalabeling";
option java_multiple_files = true;
option java_package = "com.google.cloud.datalabeling.v1beta1";


// Specifies where is the answer from.
enum AnnotationSource {
  ANNOTATION_SOURCE_UNSPECIFIED = 0;

  // Answer is provided by a human contributor.
  OPERATOR = 3;
}

enum AnnotationSentiment {
  ANNOTATION_SENTIMENT_UNSPECIFIED = 0;

  // This annotation describes negatively about the data.
  NEGATIVE = 1;

  // This label describes positively about the data.
  POSITIVE = 2;
}

enum AnnotationType {
  ANNOTATION_TYPE_UNSPECIFIED = 0;

  // Classification annotations in an image.
  IMAGE_CLASSIFICATION_ANNOTATION = 1;

  // Bounding box annotations in an image.
  IMAGE_BOUNDING_BOX_ANNOTATION = 2;

  // Oriented bounding box. The box does not have to be parallel to horizontal
  // line.
  IMAGE_ORIENTED_BOUNDING_BOX_ANNOTATION = 13;

  // Bounding poly annotations in an image.
  IMAGE_BOUNDING_POLY_ANNOTATION = 10;

  // Polyline annotations in an image.
  IMAGE_POLYLINE_ANNOTATION = 11;

  // Segmentation annotations in an image.
  IMAGE_SEGMENTATION_ANNOTATION = 12;

  // Classification annotations in video shots.
  VIDEO_SHOTS_CLASSIFICATION_ANNOTATION = 3;

  // Video object tracking annotation.
  VIDEO_OBJECT_TRACKING_ANNOTATION = 4;

  // Video object detection annotation.
  VIDEO_OBJECT_DETECTION_ANNOTATION = 5;

  // Video event annotation.
  VIDEO_EVENT_ANNOTATION = 6;

  // Speech to text annotation.
  AUDIO_TRANSCRIPTION_ANNOTATION = 7;

  // Classification for text.
  TEXT_CLASSIFICATION_ANNOTATION = 8;

  // Entity extraction for text.
  TEXT_ENTITY_EXTRACTION_ANNOTATION = 9;
}

// Annotation for Example. Each example may have one or more annotations. For
// example in image classification problem, each image might have one or more
// labels. We call labels binded with this image an Annotation.
message Annotation {
  // Output only. Unique name of this annotation, format is:
  //
  // projects/{project_id}/datasets/{dataset_id}/annotatedDatasets/{annotated_dataset}/examples/{example_id}/annotations/{annotation_id}
  string name = 1;

  // Output only. The source of the annotation.
  AnnotationSource annotation_source = 2;

  // Output only. This is the actual annotation value, e.g classification,
  // bounding box values are stored here.
  AnnotationValue annotation_value = 3;

  // Output only. Annotation metadata, including information like votes
  // for labels.
  AnnotationMetadata annotation_metadata = 4;

  // Output only. Sentiment for this annotation.
  AnnotationSentiment annotation_sentiment = 6;
}

// Annotation value for an example.
message AnnotationValue {
  oneof value_type {
    // Annotation value for image classification case.
    ImageClassificationAnnotation image_classification_annotation = 1;

    // Annotation value for image bounding box, oriented bounding box
    // and polygon cases.
    ImageBoundingPolyAnnotation image_bounding_poly_annotation = 2;

    // Annotation value for image polyline cases.
    // Polyline here is different from BoundingPoly. It is formed by
    // line segments connected to each other but not closed form(Bounding Poly).
    // The line segments can cross each other.
    ImagePolylineAnnotation image_polyline_annotation = 8;

    // Annotation value for image segmentation.
    ImageSegmentationAnnotation image_segmentation_annotation = 9;

    // Annotation value for text classification case.
    TextClassificationAnnotation text_classification_annotation = 3;

    // Annotation value for video classification case.
    VideoClassificationAnnotation video_classification_annotation = 4;

    // Annotation value for video object detection and tracking case.
    VideoObjectTrackingAnnotation video_object_tracking_annotation = 5;

    // Annotation value for video event case.
    VideoEventAnnotation video_event_annotation = 6;

    // Annotation value for speech audio recognition case.
    AudioRecognitionAnnotation audio_recognition_annotation = 7;
  }
}

// Image classification annotation definition.
message ImageClassificationAnnotation {
  // Label of image.
  AnnotationSpec annotation_spec = 1;
}

// A vertex represents a 2D point in the image.
// NOTE: the vertex coordinates are in the same scale as the original image.
message Vertex {
  // X coordinate.
  int32 x = 1;

  // Y coordinate.
  int32 y = 2;
}

// A vertex represents a 2D point in the image.
// NOTE: the normalized vertex coordinates are relative to the original image
// and range from 0 to 1.
message NormalizedVertex {
  // X coordinate.
  float x = 1;

  // Y coordinate.
  float y = 2;
}

// A bounding polygon in the image.
message BoundingPoly {
  // The bounding polygon vertices.
  repeated Vertex vertices = 1;
}

// Normalized bounding polygon.
message NormalizedBoundingPoly {
  // The bounding polygon normalized vertices.
  repeated NormalizedVertex normalized_vertices = 1;
}

// Image bounding poly annotation. It represents a polygon including
// bounding box in the image.
message ImageBoundingPolyAnnotation {
  // The region of the polygon. If it is a bounding box, it is guaranteed to be
  // four points.
  oneof bounded_area {
    BoundingPoly bounding_poly = 2;

    NormalizedBoundingPoly normalized_bounding_poly = 3;
  }

  // Label of object in this bounding polygon.
  AnnotationSpec annotation_spec = 1;
}

// A line with multiple line segments.
message Polyline {
  // The polyline vertices.
  repeated Vertex vertices = 1;
}

// Normalized polyline.
message NormalizedPolyline {
  // The normalized polyline vertices.
  repeated NormalizedVertex normalized_vertices = 1;
}

// A polyline for the image annotation.
message ImagePolylineAnnotation {
  oneof poly {
    Polyline polyline = 2;

    NormalizedPolyline normalized_polyline = 3;
  }

  // Label of this polyline.
  AnnotationSpec annotation_spec = 1;
}

// Image segmentation annotation.
message ImageSegmentationAnnotation {
  // The mapping between rgb color and annotation spec. The key is the rgb
  // color represented in format of rgb(0, 0, 0). The value is the
  // AnnotationSpec.
  map<string, AnnotationSpec> annotation_colors = 1;

  // Image format.
  string mime_type = 2;

  // A byte string of a full image's color map.
  bytes image_bytes = 3;
}

// Text classification annotation.
message TextClassificationAnnotation {
  // Label of the text.
  AnnotationSpec annotation_spec = 1;
}

// A time period inside of an example that has a time dimension (e.g. video).
message TimeSegment {
  // Start of the time segment (inclusive), represented as the duration since
  // the example start.
  google.protobuf.Duration start_time_offset = 1;

  // End of the time segment (exclusive), represented as the duration since the
  // example start.
  google.protobuf.Duration end_time_offset = 2;
}

// Video classification annotation.
message VideoClassificationAnnotation {
  // The time segment of the video to which the annotation applies.
  TimeSegment time_segment = 1;

  // Label of the segment specified by time_segment.
  AnnotationSpec annotation_spec = 2;
}

// Video frame level annotation for object detection and tracking.
message ObjectTrackingFrame {
  // The bounding box location of this object track for the frame.
  oneof bounded_area {
    BoundingPoly bounding_poly = 1;

    NormalizedBoundingPoly normalized_bounding_poly = 2;
  }

  // The time offset of this frame relative to the beginning of the video.
  google.protobuf.Duration time_offset = 3;
}

// Video object tracking annotation.
message VideoObjectTrackingAnnotation {
  // Label of the object tracked in this annotation.
  AnnotationSpec annotation_spec = 1;

  // The time segment of the video to which object tracking applies.
  TimeSegment time_segment = 2;

  // The list of frames where this object track appears.
  repeated ObjectTrackingFrame object_tracking_frames = 3;
}

// Video event annotation.
message VideoEventAnnotation {
  // Label of the event in this annotation.
  AnnotationSpec annotation_spec = 1;

  // The time segment of the video to which the annotation applies.
  TimeSegment time_segment = 2;
}

// Speech audio recognition.
message AudioRecognitionAnnotation {
  // Transcript text representing the words spoken.
  string transcript = 1;

  // Start position in audio file that the transcription corresponds to.
  google.protobuf.Duration start_offset = 2;

  // End position in audio file that the transcription corresponds to.
  google.protobuf.Duration end_offset = 3;
}

// Additional information associated with the annotation.
message AnnotationMetadata {
  // Metadata related to human labeling.
  OperatorMetadata operator_metadata = 2;
}

// General information useful for labels coming from contributors.
message OperatorMetadata {
  // Confidence score corresponding to a label. For examle, if 3 contributors
  // have answered the question and 2 of them agree on the final label, the
  // confidence score will be 0.67 (2/3).
  float score = 1;

  // The total number of contributors that answer this question.
  int32 total_votes = 2;

  // The total number of contributors that choose this label.
  int32 label_votes = 3;

  repeated string comments = 4;
}