aboutsummaryrefslogtreecommitdiff
path: root/google/cloud/vision/v1/image_annotator.proto
blob: abaf1bd756ac7bed25009c4d47a3aa38a3f0537a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
// Copyright 2018 Google LLC.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//

syntax = "proto3";

package google.cloud.vision.v1;

import "google/api/annotations.proto";
import "google/cloud/vision/v1/geometry.proto";
import "google/cloud/vision/v1/product_search.proto";
import "google/cloud/vision/v1/text_annotation.proto";
import "google/cloud/vision/v1/web_detection.proto";
import "google/longrunning/operations.proto";
import "google/protobuf/field_mask.proto";
import "google/protobuf/timestamp.proto";
import "google/rpc/status.proto";
import "google/type/color.proto";
import "google/type/latlng.proto";

option cc_enable_arenas = true;
option go_package = "google.golang.org/genproto/googleapis/cloud/vision/v1;vision";
option java_multiple_files = true;
option java_outer_classname = "ImageAnnotatorProto";
option java_package = "com.google.cloud.vision.v1";
option objc_class_prefix = "GCVN";

// Service that performs Google Cloud Vision API detection tasks over client
// images, such as face, landmark, logo, label, and text detection. The
// ImageAnnotator service returns detected entities from the images.
service ImageAnnotator {
  // Run image detection and annotation for a batch of images.
  rpc BatchAnnotateImages(BatchAnnotateImagesRequest)
      returns (BatchAnnotateImagesResponse) {
    option (google.api.http) = {
      post: "/v1/images:annotate"
      body: "*"
    };
  }

  // Run asynchronous image detection and annotation for a list of generic
  // files, such as PDF files, which may contain multiple pages and multiple
  // images per page. Progress and results can be retrieved through the
  // `google.longrunning.Operations` interface.
  // `Operation.metadata` contains `OperationMetadata` (metadata).
  // `Operation.response` contains `AsyncBatchAnnotateFilesResponse` (results).
  rpc AsyncBatchAnnotateFiles(AsyncBatchAnnotateFilesRequest)
      returns (google.longrunning.Operation) {
    option (google.api.http) = {
      post: "/v1/files:asyncBatchAnnotate"
      body: "*"
    };
  }
}

// The type of Google Cloud Vision API detection to perform, and the maximum
// number of results to return for that type. Multiple `Feature` objects can
// be specified in the `features` list.
message Feature {
  // Type of Google Cloud Vision API feature to be extracted.
  enum Type {
    // Unspecified feature type.
    TYPE_UNSPECIFIED = 0;

    // Run face detection.
    FACE_DETECTION = 1;

    // Run landmark detection.
    LANDMARK_DETECTION = 2;

    // Run logo detection.
    LOGO_DETECTION = 3;

    // Run label detection.
    LABEL_DETECTION = 4;

    // Run text detection / optical character recognition (OCR). Text detection
    // is optimized for areas of text within a larger image; if the image is
    // a document, use `DOCUMENT_TEXT_DETECTION` instead.
    TEXT_DETECTION = 5;

    // Run dense text document OCR. Takes precedence when both
    // `DOCUMENT_TEXT_DETECTION` and `TEXT_DETECTION` are present.
    DOCUMENT_TEXT_DETECTION = 11;

    // Run Safe Search to detect potentially unsafe
    // or undesirable content.
    SAFE_SEARCH_DETECTION = 6;

    // Compute a set of image properties, such as the
    // image's dominant colors.
    IMAGE_PROPERTIES = 7;

    // Run crop hints.
    CROP_HINTS = 9;

    // Run web detection.
    WEB_DETECTION = 10;

    // Run Product Search.
    PRODUCT_SEARCH = 12;

    // Run localizer for object detection.
    OBJECT_LOCALIZATION = 19;
  }

  // The feature type.
  Type type = 1;

  // Maximum number of results of this type. Does not apply to
  // `TEXT_DETECTION`, `DOCUMENT_TEXT_DETECTION`, or `CROP_HINTS`.
  int32 max_results = 2;

  // Model to use for the feature.
  // Supported values: "builtin/stable" (the default if unset) and
  // "builtin/latest".
  string model = 3;
}

// External image source (Google Cloud Storage or web URL image location).
message ImageSource {
  // **Use `image_uri` instead.**
  //
  // The Google Cloud Storage  URI of the form
  // `gs://bucket_name/object_name`. Object versioning is not supported. See
  // [Google Cloud Storage Request
  // URIs](https://cloud.google.com/storage/docs/reference-uris) for more info.
  string gcs_image_uri = 1;

  // The URI of the source image. Can be either:
  //
  // 1. A Google Cloud Storage URI of the form
  //    `gs://bucket_name/object_name`. Object versioning is not supported. See
  //    [Google Cloud Storage Request
  //    URIs](https://cloud.google.com/storage/docs/reference-uris) for more
  //    info.
  //
  // 2. A publicly-accessible image HTTP/HTTPS URL. When fetching images from
  //    HTTP/HTTPS URLs, Google cannot guarantee that the request will be
  //    completed. Your request may fail if the specified host denies the
  //    request (e.g. due to request throttling or DOS prevention), or if Google
  //    throttles requests to the site for abuse prevention. You should not
  //    depend on externally-hosted images for production applications.
  //
  // When both `gcs_image_uri` and `image_uri` are specified, `image_uri` takes
  // precedence.
  string image_uri = 2;
}

// Client image to perform Google Cloud Vision API tasks over.
message Image {
  // Image content, represented as a stream of bytes.
  // Note: As with all `bytes` fields, protobuffers use a pure binary
  // representation, whereas JSON representations use base64.
  bytes content = 1;

  // Google Cloud Storage image location, or publicly-accessible image
  // URL. If both `content` and `source` are provided for an image, `content`
  // takes precedence and is used to perform the image annotation request.
  ImageSource source = 2;
}

// A face annotation object contains the results of face detection.
message FaceAnnotation {
  // A face-specific landmark (for example, a face feature).
  message Landmark {
    // Face landmark (feature) type.
    // Left and right are defined from the vantage of the viewer of the image
    // without considering mirror projections typical of photos. So, `LEFT_EYE`,
    // typically, is the person's right eye.
    enum Type {
      // Unknown face landmark detected. Should not be filled.
      UNKNOWN_LANDMARK = 0;

      // Left eye.
      LEFT_EYE = 1;

      // Right eye.
      RIGHT_EYE = 2;

      // Left of left eyebrow.
      LEFT_OF_LEFT_EYEBROW = 3;

      // Right of left eyebrow.
      RIGHT_OF_LEFT_EYEBROW = 4;

      // Left of right eyebrow.
      LEFT_OF_RIGHT_EYEBROW = 5;

      // Right of right eyebrow.
      RIGHT_OF_RIGHT_EYEBROW = 6;

      // Midpoint between eyes.
      MIDPOINT_BETWEEN_EYES = 7;

      // Nose tip.
      NOSE_TIP = 8;

      // Upper lip.
      UPPER_LIP = 9;

      // Lower lip.
      LOWER_LIP = 10;

      // Mouth left.
      MOUTH_LEFT = 11;

      // Mouth right.
      MOUTH_RIGHT = 12;

      // Mouth center.
      MOUTH_CENTER = 13;

      // Nose, bottom right.
      NOSE_BOTTOM_RIGHT = 14;

      // Nose, bottom left.
      NOSE_BOTTOM_LEFT = 15;

      // Nose, bottom center.
      NOSE_BOTTOM_CENTER = 16;

      // Left eye, top boundary.
      LEFT_EYE_TOP_BOUNDARY = 17;

      // Left eye, right corner.
      LEFT_EYE_RIGHT_CORNER = 18;

      // Left eye, bottom boundary.
      LEFT_EYE_BOTTOM_BOUNDARY = 19;

      // Left eye, left corner.
      LEFT_EYE_LEFT_CORNER = 20;

      // Right eye, top boundary.
      RIGHT_EYE_TOP_BOUNDARY = 21;

      // Right eye, right corner.
      RIGHT_EYE_RIGHT_CORNER = 22;

      // Right eye, bottom boundary.
      RIGHT_EYE_BOTTOM_BOUNDARY = 23;

      // Right eye, left corner.
      RIGHT_EYE_LEFT_CORNER = 24;

      // Left eyebrow, upper midpoint.
      LEFT_EYEBROW_UPPER_MIDPOINT = 25;

      // Right eyebrow, upper midpoint.
      RIGHT_EYEBROW_UPPER_MIDPOINT = 26;

      // Left ear tragion.
      LEFT_EAR_TRAGION = 27;

      // Right ear tragion.
      RIGHT_EAR_TRAGION = 28;

      // Left eye pupil.
      LEFT_EYE_PUPIL = 29;

      // Right eye pupil.
      RIGHT_EYE_PUPIL = 30;

      // Forehead glabella.
      FOREHEAD_GLABELLA = 31;

      // Chin gnathion.
      CHIN_GNATHION = 32;

      // Chin left gonion.
      CHIN_LEFT_GONION = 33;

      // Chin right gonion.
      CHIN_RIGHT_GONION = 34;
    }

    // Face landmark type.
    Type type = 3;

    // Face landmark position.
    Position position = 4;
  }

  // The bounding polygon around the face. The coordinates of the bounding box
  // are in the original image's scale, as returned in `ImageParams`.
  // The bounding box is computed to "frame" the face in accordance with human
  // expectations. It is based on the landmarker results.
  // Note that one or more x and/or y coordinates may not be generated in the
  // `BoundingPoly` (the polygon will be unbounded) if only a partial face
  // appears in the image to be annotated.
  BoundingPoly bounding_poly = 1;

  // The `fd_bounding_poly` bounding polygon is tighter than the
  // `boundingPoly`, and encloses only the skin part of the face. Typically, it
  // is used to eliminate the face from any image analysis that detects the
  // "amount of skin" visible in an image. It is not based on the
  // landmarker results, only on the initial face detection, hence
  // the <code>fd</code> (face detection) prefix.
  BoundingPoly fd_bounding_poly = 2;

  // Detected face landmarks.
  repeated Landmark landmarks = 3;

  // Roll angle, which indicates the amount of clockwise/anti-clockwise rotation
  // of the face relative to the image vertical about the axis perpendicular to
  // the face. Range [-180,180].
  float roll_angle = 4;

  // Yaw angle, which indicates the leftward/rightward angle that the face is
  // pointing relative to the vertical plane perpendicular to the image. Range
  // [-180,180].
  float pan_angle = 5;

  // Pitch angle, which indicates the upwards/downwards angle that the face is
  // pointing relative to the image's horizontal plane. Range [-180,180].
  float tilt_angle = 6;

  // Detection confidence. Range [0, 1].
  float detection_confidence = 7;

  // Face landmarking confidence. Range [0, 1].
  float landmarking_confidence = 8;

  // Joy likelihood.
  Likelihood joy_likelihood = 9;

  // Sorrow likelihood.
  Likelihood sorrow_likelihood = 10;

  // Anger likelihood.
  Likelihood anger_likelihood = 11;

  // Surprise likelihood.
  Likelihood surprise_likelihood = 12;

  // Under-exposed likelihood.
  Likelihood under_exposed_likelihood = 13;

  // Blurred likelihood.
  Likelihood blurred_likelihood = 14;

  // Headwear likelihood.
  Likelihood headwear_likelihood = 15;
}

// Detected entity location information.
message LocationInfo {
  // lat/long location coordinates.
  google.type.LatLng lat_lng = 1;
}

// A `Property` consists of a user-supplied name/value pair.
message Property {
  // Name of the property.
  string name = 1;

  // Value of the property.
  string value = 2;

  // Value of numeric properties.
  uint64 uint64_value = 3;
}

// Set of detected entity features.
message EntityAnnotation {
  // Opaque entity ID. Some IDs may be available in
  // [Google Knowledge Graph Search
  // API](https://developers.google.com/knowledge-graph/).
  string mid = 1;

  // The language code for the locale in which the entity textual
  // `description` is expressed.
  string locale = 2;

  // Entity textual description, expressed in its `locale` language.
  string description = 3;

  // Overall score of the result. Range [0, 1].
  float score = 4;

  // **Deprecated. Use `score` instead.**
  // The accuracy of the entity detection in an image.
  // For example, for an image in which the "Eiffel Tower" entity is detected,
  // this field represents the confidence that there is a tower in the query
  // image. Range [0, 1].
  float confidence = 5 [deprecated = true];

  // The relevancy of the ICA (Image Content Annotation) label to the
  // image. For example, the relevancy of "tower" is likely higher to an image
  // containing the detected "Eiffel Tower" than to an image containing a
  // detected distant towering building, even though the confidence that
  // there is a tower in each image may be the same. Range [0, 1].
  float topicality = 6;

  // Image region to which this entity belongs. Not produced
  // for `LABEL_DETECTION` features.
  BoundingPoly bounding_poly = 7;

  // The location information for the detected entity. Multiple
  // `LocationInfo` elements can be present because one location may
  // indicate the location of the scene in the image, and another location
  // may indicate the location of the place where the image was taken.
  // Location information is usually present for landmarks.
  repeated LocationInfo locations = 8;

  // Some entities may have optional user-supplied `Property` (name/value)
  // fields, such a score or string that qualifies the entity.
  repeated Property properties = 9;
}

// Set of detected objects with bounding boxes.
message LocalizedObjectAnnotation {
  // Object ID that should align with EntityAnnotation mid.
  string mid = 1;

  // The BCP-47 language code, such as "en-US" or "sr-Latn". For more
  // information, see
  // http://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
  string language_code = 2;

  // Object name, expressed in its `language_code` language.
  string name = 3;

  // Score of the result. Range [0, 1].
  float score = 4;

  // Image region to which this object belongs. This must be populated.
  BoundingPoly bounding_poly = 5;
}

// Set of features pertaining to the image, computed by computer vision
// methods over safe-search verticals (for example, adult, spoof, medical,
// violence).
message SafeSearchAnnotation {
  // Represents the adult content likelihood for the image. Adult content may
  // contain elements such as nudity, pornographic images or cartoons, or
  // sexual activities.
  Likelihood adult = 1;

  // Spoof likelihood. The likelihood that an modification
  // was made to the image's canonical version to make it appear
  // funny or offensive.
  Likelihood spoof = 2;

  // Likelihood that this is a medical image.
  Likelihood medical = 3;

  // Likelihood that this image contains violent content.
  Likelihood violence = 4;

  // Likelihood that the request image contains racy content. Racy content may
  // include (but is not limited to) skimpy or sheer clothing, strategically
  // covered nudity, lewd or provocative poses, or close-ups of sensitive
  // body areas.
  Likelihood racy = 9;
}

// Rectangle determined by min and max `LatLng` pairs.
message LatLongRect {
  // Min lat/long pair.
  google.type.LatLng min_lat_lng = 1;

  // Max lat/long pair.
  google.type.LatLng max_lat_lng = 2;
}

// Color information consists of RGB channels, score, and the fraction of
// the image that the color occupies in the image.
message ColorInfo {
  // RGB components of the color.
  google.type.Color color = 1;

  // Image-specific score for this color. Value in range [0, 1].
  float score = 2;

  // The fraction of pixels the color occupies in the image.
  // Value in range [0, 1].
  float pixel_fraction = 3;
}

// Set of dominant colors and their corresponding scores.
message DominantColorsAnnotation {
  // RGB color values with their score and pixel fraction.
  repeated ColorInfo colors = 1;
}

// Stores image properties, such as dominant colors.
message ImageProperties {
  // If present, dominant colors completed successfully.
  DominantColorsAnnotation dominant_colors = 1;
}

// Single crop hint that is used to generate a new crop when serving an image.
message CropHint {
  // The bounding polygon for the crop region. The coordinates of the bounding
  // box are in the original image's scale, as returned in `ImageParams`.
  BoundingPoly bounding_poly = 1;

  // Confidence of this being a salient region.  Range [0, 1].
  float confidence = 2;

  // Fraction of importance of this salient region with respect to the original
  // image.
  float importance_fraction = 3;
}

// Set of crop hints that are used to generate new crops when serving images.
message CropHintsAnnotation {
  // Crop hint results.
  repeated CropHint crop_hints = 1;
}

// Parameters for crop hints annotation request.
message CropHintsParams {
  // Aspect ratios in floats, representing the ratio of the width to the height
  // of the image. For example, if the desired aspect ratio is 4/3, the
  // corresponding float value should be 1.33333.  If not specified, the
  // best possible crop is returned. The number of provided aspect ratios is
  // limited to a maximum of 16; any aspect ratios provided after the 16th are
  // ignored.
  repeated float aspect_ratios = 1;
}

// Parameters for web detection request.
message WebDetectionParams {
  // Whether to include results derived from the geo information in the image.
  bool include_geo_results = 2;
}

// Image context and/or feature-specific parameters.
message ImageContext {
  // Not used.
  LatLongRect lat_long_rect = 1;

  // List of languages to use for TEXT_DETECTION. In most cases, an empty value
  // yields the best results since it enables automatic language detection. For
  // languages based on the Latin alphabet, setting `language_hints` is not
  // needed. In rare cases, when the language of the text in the image is known,
  // setting a hint will help get better results (although it will be a
  // significant hindrance if the hint is wrong). Text detection returns an
  // error if one or more of the specified languages is not one of the
  // [supported languages](/vision/docs/languages).
  repeated string language_hints = 2;

  // Parameters for crop hints annotation request.
  CropHintsParams crop_hints_params = 4;

  // Parameters for product search.
  ProductSearchParams product_search_params = 5;

  // Parameters for web detection.
  WebDetectionParams web_detection_params = 6;
}

// Request for performing Google Cloud Vision API tasks over a user-provided
// image, with user-requested features.
message AnnotateImageRequest {
  // The image to be processed.
  Image image = 1;

  // Requested features.
  repeated Feature features = 2;

  // Additional context that may accompany the image.
  ImageContext image_context = 3;
}

// If an image was produced from a file (e.g. a PDF), this message gives
// information about the source of that image.
message ImageAnnotationContext {
  // The URI of the file used to produce the image.
  string uri = 1;

  // If the file was a PDF or TIFF, this field gives the page number within
  // the file used to produce the image.
  int32 page_number = 2;
}

// Response to an image annotation request.
message AnnotateImageResponse {
  // If present, face detection has completed successfully.
  repeated FaceAnnotation face_annotations = 1;

  // If present, landmark detection has completed successfully.
  repeated EntityAnnotation landmark_annotations = 2;

  // If present, logo detection has completed successfully.
  repeated EntityAnnotation logo_annotations = 3;

  // If present, label detection has completed successfully.
  repeated EntityAnnotation label_annotations = 4;

  // If present, localized object detection has completed successfully.
  // This will be sorted descending by confidence score.
  repeated LocalizedObjectAnnotation localized_object_annotations = 22;

  // If present, text (OCR) detection has completed successfully.
  repeated EntityAnnotation text_annotations = 5;

  // If present, text (OCR) detection or document (OCR) text detection has
  // completed successfully.
  // This annotation provides the structural hierarchy for the OCR detected
  // text.
  TextAnnotation full_text_annotation = 12;

  // If present, safe-search annotation has completed successfully.
  SafeSearchAnnotation safe_search_annotation = 6;

  // If present, image properties were extracted successfully.
  ImageProperties image_properties_annotation = 8;

  // If present, crop hints have completed successfully.
  CropHintsAnnotation crop_hints_annotation = 11;

  // If present, web detection has completed successfully.
  WebDetection web_detection = 13;

  // If present, product search has completed successfully.
  ProductSearchResults product_search_results = 14;

  // If set, represents the error message for the operation.
  // Note that filled-in image annotations are guaranteed to be
  // correct, even when `error` is set.
  google.rpc.Status error = 9;

  // If present, contextual information is needed to understand where this image
  // comes from.
  ImageAnnotationContext context = 21;
}

// Response to a single file annotation request. A file may contain one or more
// images, which individually have their own responses.
message AnnotateFileResponse {
  // Information about the file for which this response is generated.
  InputConfig input_config = 1;

  // Individual responses to images found within the file.
  repeated AnnotateImageResponse responses = 2;
}

// Multiple image annotation requests are batched into a single service call.
message BatchAnnotateImagesRequest {
  // Individual image annotation requests for this batch.
  repeated AnnotateImageRequest requests = 1;
}

// Response to a batch image annotation request.
message BatchAnnotateImagesResponse {
  // Individual responses to image annotation requests within the batch.
  repeated AnnotateImageResponse responses = 1;
}

// An offline file annotation request.
message AsyncAnnotateFileRequest {
  // Required. Information about the input file.
  InputConfig input_config = 1;

  // Required. Requested features.
  repeated Feature features = 2;

  // Additional context that may accompany the image(s) in the file.
  ImageContext image_context = 3;

  // Required. The desired output location and metadata (e.g. format).
  OutputConfig output_config = 4;
}

// The response for a single offline file annotation request.
message AsyncAnnotateFileResponse {
  // The output location and metadata from AsyncAnnotateFileRequest.
  OutputConfig output_config = 1;
}

// Multiple async file annotation requests are batched into a single service
// call.
message AsyncBatchAnnotateFilesRequest {
  // Individual async file annotation requests for this batch.
  repeated AsyncAnnotateFileRequest requests = 1;
}

// Response to an async batch file annotation request.
message AsyncBatchAnnotateFilesResponse {
  // The list of file annotation responses, one for each request in
  // AsyncBatchAnnotateFilesRequest.
  repeated AsyncAnnotateFileResponse responses = 1;
}

// The desired input location and metadata.
message InputConfig {
  // The Google Cloud Storage location to read the input from.
  GcsSource gcs_source = 1;

  // The type of the file. Currently only "application/pdf" and "image/tiff"
  // are supported. Wildcards are not supported.
  string mime_type = 2;
}

// The desired output location and metadata.
message OutputConfig {
  // The Google Cloud Storage location to write the output(s) to.
  GcsDestination gcs_destination = 1;

  // The max number of response protos to put into each output JSON file on
  // Google Cloud Storage.
  // The valid range is [1, 100]. If not specified, the default value is 20.
  //
  // For example, for one pdf file with 100 pages, 100 response protos will
  // be generated. If `batch_size` = 20, then 5 json files each
  // containing 20 response protos will be written under the prefix
  // `gcs_destination`.`uri`.
  //
  // Currently, batch_size only applies to GcsDestination, with potential future
  // support for other output configurations.
  int32 batch_size = 2;
}

// The Google Cloud Storage location where the input will be read from.
message GcsSource {
  // Google Cloud Storage URI for the input file. This must only be a
  // Google Cloud Storage object. Wildcards are not currently supported.
  string uri = 1;
}

// The Google Cloud Storage location where the output will be written to.
message GcsDestination {
  // Google Cloud Storage URI where the results will be stored. Results will
  // be in JSON format and preceded by its corresponding input URI. This field
  // can either represent a single file, or a prefix for multiple outputs.
  // Prefixes must end in a `/`.
  //
  // Examples:
  //
  // *    File: gs://bucket-name/filename.json
  // *    Prefix: gs://bucket-name/prefix/here/
  // *    File: gs://bucket-name/prefix/here
  //
  // If multiple outputs, each response is still AnnotateFileResponse, each of
  // which contains some subset of the full list of AnnotateImageResponse.
  // Multiple outputs can happen if, for example, the output JSON is too large
  // and overflows into multiple sharded files.
  string uri = 1;
}

// Contains metadata for the BatchAnnotateImages operation.
message OperationMetadata {
  // Batch operation states.
  enum State {
    // Invalid.
    STATE_UNSPECIFIED = 0;

    // Request is received.
    CREATED = 1;

    // Request is actively being processed.
    RUNNING = 2;

    // The batch processing is done.
    DONE = 3;

    // The batch processing was cancelled.
    CANCELLED = 4;
  }

  // Current state of the batch operation.
  State state = 1;

  // The time when the batch request was received.
  google.protobuf.Timestamp create_time = 5;

  // The time when the operation result was last updated.
  google.protobuf.Timestamp update_time = 6;
}

// A bucketized representation of likelihood, which is intended to give clients
// highly stable results across model upgrades.
enum Likelihood {
  // Unknown likelihood.
  UNKNOWN = 0;

  // It is very unlikely that the image belongs to the specified vertical.
  VERY_UNLIKELY = 1;

  // It is unlikely that the image belongs to the specified vertical.
  UNLIKELY = 2;

  // It is possible that the image belongs to the specified vertical.
  POSSIBLE = 3;

  // It is likely that the image belongs to the specified vertical.
  LIKELY = 4;

  // It is very likely that the image belongs to the specified vertical.
  VERY_LIKELY = 5;
}