aboutsummaryrefslogtreecommitdiff
path: root/proto/icing/proto/schema.proto
blob: c716dba87c9fb0bf67289fed0da783b1a7a05290 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto2";

package icing.lib;

import "icing/proto/status.proto";
import "icing/proto/term.proto";

option java_package = "com.google.android.icing.proto";
option java_multiple_files = true;
option objc_class_prefix = "ICNG";

// Defines the schema that every Document of a specific "type" should adhere
// to. These can be considered as definitions of rich structured types for
// Documents accepted by IcingSearchEngine.
//
// NOTE: Instances of SchemaTypeConfigProto are strongly recommended to be
// based on types defined in schema.org. This makes the data/config/code more
// shareable and easier to extend in the future.
//
// TODO(cassiewang) Define a sample proto file that can be used by tests and for
// documentation.
//
// Next tag: 7
message SchemaTypeConfigProto {
  // REQUIRED: Named type that uniquely identifies the structured, logical
  // schema being defined.
  //
  // Recommended format: Human readable string that's one of the types defined
  // in http://schema.org. Eg: DigitalDocument, Message, Person, etc.
  optional string schema_type = 1;

  // List of all properties that are supported by Documents of this type.
  // An Document should never have properties that are not listed here.
  //
  // TODO(cassiewang) Figure out if we should disallow, ignore or accept
  // unknown properties. Accepting them could make switching between versions
  // easier.
  repeated PropertyConfigProto properties = 4;

  // Version is an arbitrary number that the client may use to keep track of
  // different incarnations of the schema. Icing library imposes no requirements
  // on this field and will not validate it in anyway. If a client calls
  // SetSchema with a schema that contains one or more new version numbers, then
  // those version numbers will be updated so long as the SetSchema call
  // succeeds. Clients are free to leave the version number unset, in which case
  // it will default to value == 0.
  optional int32 version = 5;

  // An experimental field to make the type as a subtype of parent_types, which
  // enables parent_types to be interpreted as its subtypes in the context of
  // the Search APIs, including schema type filters and projections specified in
  // TypePropertyMask.
  repeated string parent_types = 6;

  reserved 2, 3;
}

// Describes how a string property should be indexed.
// Next tag: 3
message StringIndexingConfig {
  // Indicates how the content of this property should be matched in the index.
  //
  // TermMatchType.Code=UNKNOWN
  // Content in this property will not be tokenized or indexed. Useful if the
  // data type is not indexable. See schema-util for details.
  //
  // TermMatchType.Code=EXACT_ONLY
  // Content in this property should only be returned for queries matching the
  // exact tokens appearing in this property.
  // Ex. A property with "fool" should NOT match a query for "foo".
  //
  // TermMatchType.Code=PREFIX
  // Content in this property should be returned for queries that are either
  // exact matches or query matches of the tokens appearing in this property.
  // Ex. A property with "fool" *should* match a query for "foo".
  optional TermMatchType.Code term_match_type = 1;

  message TokenizerType {
    enum Code {
      // It is only valid for tokenizer_type to be 'NONE' if the data type is
      // not indexed.
      NONE = 0;

      // Tokenization for plain text.
      PLAIN = 1;

      // Tokenizes text in verbatim. This means no normalization or segmentation
      // is applied to string values that are tokenized using this type.
      // Therefore, the output token is equivalent to the raw string text. For
      // example, "Hello, world!" would be tokenized as "Hello, world!"
      // preserving punctuation and capitalization, and not creating separate
      // tokens between the space.
      VERBATIM = 2;

      // Tokenizes text as an email address. This means it will tokenize a
      // string into multiple emails, and further tokenize those into parts of
      // an email address. These parts include the local address, host
      // components, local components, as well as the name and comments. For
      // example, "User (comment) <user@domain.com>" would be tokenized into a
      // "User" name token, a "comment" comment token, a "user" local address, a
      // "user" local component token, a "domain" host component token, a "com"
      // host component token, a "user@domain.com" address token, and the entire
      // original string as an rfc822 token.
      // See more here: https://datatracker.ietf.org/doc/html/rfc822
      RFC822 = 3;

      // Tokenizes text as an url address. This tokenizes a url string into a
      // token for each component in the url, as well as any significant
      // url suffixes. For example,
      // https://www.google.com/path/subpath?query#ref would be tokenizes into a
      // scheme token "https“; 3 host tokens "www", "google", "com"; 2 path
      // tokens "path", "subpath"; a query token "query"; a reference token
      // "ref"; and 3 suffix tokens
      // "https://www.google.com/path/subpath?query#ref",
      // "www.google.com/path/subpath?query#ref",
      // "google.com/path/subpath?query#ref".
      // Currently only supports tokenization of one url string at a time
      // i.e. the input string cannot have spaces in the middle, but can have
      // leading or trailing spaces.
      URL = 4;
    }
  }
  optional TokenizerType.Code tokenizer_type = 2;
}

// Describes how a document property should be indexed.
// Next tag: 3
message DocumentIndexingConfig {
  // OPTIONAL: Whether nested properties within the document property should be
  // indexed. If true, then all nested properties will be indexed according to
  // the property's own indexing configurations. If false, nested documents'
  // properties will not be indexed even if they have an indexing configuration.
  //
  // The default value is false.
  optional bool index_nested_properties = 1;

  // List of nested properties within the document to index. Only the
  // provided list of properties will be indexed according to the property's
  // indexing configurations.
  //
  // index_nested_properties must be false in order to use this feature.
  repeated string indexable_nested_properties_list = 2;
}

// Describes how a int64 property should be indexed.
// Next tag: 3
message IntegerIndexingConfig {
  // OPTIONAL: Indicates how the int64 contents of this property should be
  // matched.
  //
  // The default value is UNKNOWN.
  message NumericMatchType {
    enum Code {
      // Contents in this property will not be indexed. Useful if the int64
      // property type is not indexable.
      UNKNOWN = 0;

      // Contents in this property should only be returned for queries matching
      // the range.
      RANGE = 1;
    }
  }
  optional NumericMatchType.Code numeric_match_type = 1;
}

// Describes how a property can be used to join this document with another
// document. See JoinSpecProto (in search.proto) for more details.
// Next tag: 3
message JoinableConfig {
  // OPTIONAL: Indicates what joinable type the content value of this property
  // is.
  //
  // The default value is NONE.
  message ValueType {
    enum Code {
      // Value in this property is not joinable.
      NONE = 0;

      // Value in this property is a joinable (string) qualified id, which is
      // composed of namespace and uri.
      // See JoinSpecProto (in search.proto) and DocumentProto (in
      // document.proto) for more details about qualified id, namespace and uri.
      QUALIFIED_ID = 1;
    }
  }
  optional ValueType.Code value_type = 1;

  // If the parent document a child document is joined to is deleted, delete the
  // child document as well. This will only apply to children joined through
  // QUALIFIED_ID, other (future) joinable value types won't use it.
  optional bool propagate_delete = 2 [default = false];
}

// Describes the schema of a single property of Documents that belong to a
// specific SchemaTypeConfigProto. These can be considered as a rich, structured
// type for each property of Documents accepted by IcingSearchEngine.
// Next tag: 9
message PropertyConfigProto {
  // REQUIRED: Name that uniquely identifies a property within an Document of
  // a specific SchemaTypeConfigProto.
  //
  // Recommended format: Human readable string that's one of the properties
  // defined in schema.org for the parent SchemaTypeConfigProto.
  // Eg: 'author' for http://schema.org/DigitalDocument.
  // Eg: 'address' for http://schema.org/Place.
  optional string property_name = 1;

  // REQUIRED: Physical data-types of the contents of the property.
  message DataType {
    enum Code {
      // This value should never purposely be used. This is used for backwards
      // compatibility reasons.
      UNKNOWN = 0;

      STRING = 1;
      INT64 = 2;
      DOUBLE = 3;
      BOOLEAN = 4;

      // Unstructured BLOB.
      BYTES = 5;

      // Indicates that the property itself is an Document, making it part
      // a hierarchical Document schema. Any property using this data_type
      // MUST have a valid 'schema_type'.
      DOCUMENT = 6;
    }
  }
  optional DataType.Code data_type = 2;

  // REQUIRED if (data_type == DOCUMENT). OPTIONAL otherwise.
  // Indicates the logical schema-type of the contents of this property.
  //
  // TODO(cassiewang): This could be useful for non-document properties, e.g.
  // to set this field as a schema.org/address for some string property.
  // Re-evaluate what recommendation we should give clients if we want to start
  // using this for non-document properties as well.
  //
  // Recommended format: Human readable string that is one of the types defined
  // in schema.org, matching the SchemaTypeConfigProto.schema_type of another
  // type.
  optional string schema_type = 3;

  // REQUIRED: The cardinality of the property.
  message Cardinality {
    // NOTE: The order of the cardinality is purposefully set to be from least
    // restrictive (REPEATED) to most restrictive (REQUIRED). This makes it
    // easier to check if a field is backwards compatible by doing a simple
    // greater-than/less-than check on the enum ints. Changing/adding new
    // cardinalities should be done cautiously.
    enum Code {
      // This should never purposely be set. This is used for backwards
      // compatibility reasons.
      UNKNOWN = 0;

      // Any number of items (including zero) [0...*].
      REPEATED = 1;

      // Zero or one value [0,1].
      OPTIONAL = 2;

      // Exactly one value [1].
      REQUIRED = 3;
    }
  }
  optional Cardinality.Code cardinality = 4;

  // OPTIONAL: Describes how string properties should be indexed. String
  // properties that do not set the indexing config will not be indexed.
  optional StringIndexingConfig string_indexing_config = 5;

  // OPTIONAL: Describes how document properties should be indexed.
  optional DocumentIndexingConfig document_indexing_config = 6;

  // OPTIONAL: Describes how int64 properties should be indexed. Int64
  // properties that do not set the indexing config will not be indexed.
  optional IntegerIndexingConfig integer_indexing_config = 7;

  // OPTIONAL: Describes how string properties can be used as a document joining
  // matcher.
  //
  // Note: currently we only support STRING single joining, so if a property is
  // set as joinable (i.e. joinable_config.content_type is not NONE), then:
  // - DataType should be STRING. Otherwise joinable_config will be ignored.
  // - The property itself and any upper-level (nested doc) property should
  //   contain at most one element (i.e. Cardinality is OPTIONAL or REQUIRED).
  optional JoinableConfig joinable_config = 8;
}

// List of all supported types constitutes the schema used by Icing.
// Next tag: 2
message SchemaProto {
  repeated SchemaTypeConfigProto types = 1;
}

// Result of a call to IcingSearchEngine.SetSchema
// Next tag: 9
message SetSchemaResultProto {
  // Status code can be one of:
  //   OK
  //   INVALID_ARGUMENT
  //   FAILED_PRECONDITION
  //   INTERNAL
  //
  // See status.proto for more details.
  //
  // TODO(b/147699081): Fix error codes: +ABORTED, +WARNING_DATA_LOSS,
  // -INTERNAL. go/icing-library-apis.
  optional StatusProto status = 1;

  // Schema types that existed in the previous schema, but were deleted from the
  // new schema. If ignore_errors_and_delete_documents=true, then all documents
  // of these types were also deleted.
  repeated string deleted_schema_types = 2;

  // Schema types that existed in the previous schema and were incompatible with
  // the new schema type. If ignore_errors_and_delete_documents=true, then any
  // documents that fail validation against the new schema types would also be
  // deleted.
  repeated string incompatible_schema_types = 3;

  // Schema types that did not exist in the previous schema and were added with
  // the new schema type.
  repeated string new_schema_types = 4;

  // Schema types that were changed in a way that was backwards compatible and
  // didn't invalidate the index.
  repeated string fully_compatible_changed_schema_types = 5;

  // Schema types that were changed in a way that was backwards compatible, but
  // invalidated the index.
  repeated string index_incompatible_changed_schema_types = 6;

  // Overall time used for the function call.
  optional int32 latency_ms = 7;

  // Schema types that were changed in a way that was backwards compatible, but
  // invalidated the joinable cache.
  //
  // For example, a property was set non joinable in the old schema definition,
  // but changed to joinable in the new definition. In this case, this property
  // will be considered join incompatible when setting new schema.
  repeated string join_incompatible_changed_schema_types = 8;
}

// Result of a call to IcingSearchEngine.GetSchema
// Next tag: 3
message GetSchemaResultProto {
  // Status code can be one of:
  //   OK
  //   FAILED_PRECONDITION
  //   NOT_FOUND
  //   INTERNAL
  //
  // See status.proto for more details.
  //
  // TODO(b/147699081): Fix error codes: +ABORTED, -INTERNAL
  // go/icing-library-apis.
  optional StatusProto status = 1;

  // Copy of the Schema proto. Modifying this does not affect the Schema that
  // IcingSearchEngine holds.
  optional SchemaProto schema = 2;
}

// Result of a call to IcingSearchEngine.GetSchemaType
// Next tag: 3
message GetSchemaTypeResultProto {
  // Status code can be one of:
  //   OK
  //   FAILED_PRECONDITION
  //   NOT_FOUND
  //   INTERNAL
  //
  // See status.proto for more details.
  //
  // TODO(b/147699081): Fix error codes: +ABORTED, -INTERNAL
  // go/icing-library-apis.
  optional StatusProto status = 1;

  // Copy of the SchemaTypeConfig proto with the specified schema_type.
  // Modifying this does not affect the SchemaTypeConfig that IcingSearchEngine
  // holds.
  optional SchemaTypeConfigProto schema_type_config = 2;
}