aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTim <tjbarron@google.com>2023-05-11 20:56:04 +0000
committerTim <tjbarron@google.com>2023-05-11 21:23:49 +0000
commitb3b664dc373ddd2ff1c004cdcafd5c04bf82bdd5 (patch)
treecc621f7731dc8af9b55ae2356c14fe3e125cd5d2
parent72d892535dfa5dca1366ae7b34b29bcd236bc0c7 (diff)
parentfb6eb3c7c025b798b13ae36b923aac8c6ebe24bd (diff)
downloadicing-b3b664dc373ddd2ff1c004cdcafd5c04bf82bdd5.tar.gz
Merge remote-tracking branch 'goog/upstream-master' into androidx-platform-dev
Update Icing from upstream. Descriptions: ======================================================================== Handle version changes in the schema store. ======================================================================== Modify the definition of propertyDefined: ======================================================================== Remove default args in SchemaStore::SetSchema and fix calls ======================================================================== Add allow_circular_schema_definitions flag ======================================================================== Onboard version detection to Icing ======================================================================== Add version util to help read/write version info ======================================================================== Add support for the overlay schema. ======================================================================== Allow cycles in schema-property-iterator ======================================================================== Add joinable properties into schema definition cycle restrictions. ======================================================================== Loosen circular references restriction for Schema Definitions. ======================================================================== Implement BackupSchemaProducer to generate a backup schema ======================================================================== Minor fix: remove a redundant log ======================================================================== Allow schema types to inherit from more than one parent ======================================================================== allow nested document properties to accept documents of subtype ======================================================================== Support polymorphism for Icing projection in Search and Get API ======================================================================== Add max_joined_child_per_parent into ResultSpec and change behavior ======================================================================== Support Icing schema type polymorphism for the search filter API ======================================================================== Verify that every child type's property set has included all compatible properties from parent types ======================================================================== Add individual type index latency ======================================================================== Build the iterator node for the propertyDefined() custom function ======================================================================== Advance all hits with same doc id from and merge sections once for the same bucket iter ======================================================================== Introduce DocHitInfoIteratorPropertyInSchema for property existence check ======================================================================== Add SchemaUtil::BuildTransitiveInheritanceGraph to build an inheritance map from schema ======================================================================== Introduce a lookup method for a property defined in a schema ======================================================================== Rollback of: Allow LanguageSegmenter::Iterators to declare AccessType. ======================================================================== Adds join info to QueryStatsProto ======================================================================== Bug:280698419 Bug:280698125 Bug:280698121 Bug:280697513 Bug:276349029 Bug:272145329 Bug:270102295 Bug:269295094 Bug:268680462 Bug:265304217 Bug:259744228 Bug:259743562 Bug:256022027 * goog/upstream-master: Update Icing from upstream. Update Icing from upstream. Change-Id: Ia9c5c88bf8e43122204acc8f5231fb8bf65019e3
-rw-r--r--icing/file/posting_list/flash-index-storage.cc21
-rw-r--r--icing/file/posting_list/flash-index-storage.h11
-rw-r--r--icing/file/posting_list/flash-index-storage_test.cc46
-rw-r--r--icing/file/version-util.cc105
-rw-r--r--icing/file/version-util.h97
-rw-r--r--icing/file/version-util_test.cc386
-rw-r--r--icing/icing-search-engine.cc142
-rw-r--r--icing/icing-search-engine.h9
-rw-r--r--icing/icing-search-engine_backwards_compatibility_test.cc34
-rw-r--r--icing/icing-search-engine_benchmark.cc3
-rw-r--r--icing/icing-search-engine_initialization_test.cc517
-rw-r--r--icing/icing-search-engine_optimize_test.cc30
-rw-r--r--icing/icing-search-engine_schema_test.cc135
-rw-r--r--icing/icing-search-engine_search_test.cc448
-rw-r--r--icing/icing-search-engine_test.cc402
-rw-r--r--icing/index/index-processor_benchmark.cc4
-rw-r--r--icing/index/index-processor_test.cc18
-rw-r--r--icing/index/index.cc6
-rw-r--r--icing/index/index.h10
-rw-r--r--icing/index/integer-section-indexing-handler.cc14
-rw-r--r--icing/index/integer-section-indexing-handler_test.cc4
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-filter.cc11
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-filter_test.cc174
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-property-in-schema.cc114
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-property-in-schema.h76
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-property-in-schema_test.cc263
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc4
-rw-r--r--icing/index/iterator/doc-hit-info-iterator.h7
-rw-r--r--icing/index/main/main-index.cc13
-rw-r--r--icing/index/main/main-index.h10
-rw-r--r--icing/index/numeric/integer-index-storage.cc11
-rw-r--r--icing/index/numeric/integer-index-storage_benchmark.cc124
-rw-r--r--icing/index/numeric/integer-index_test.cc20
-rw-r--r--icing/index/string-section-indexing-handler.cc5
-rw-r--r--icing/join/join-processor.cc17
-rw-r--r--icing/join/join-processor_test.cc133
-rw-r--r--icing/join/qualified-id-join-indexing-handler.cc (renamed from icing/join/qualified-id-joinable-property-indexing-handler.cc)25
-rw-r--r--icing/join/qualified-id-join-indexing-handler.h (renamed from icing/join/qualified-id-joinable-property-indexing-handler.h)23
-rw-r--r--icing/join/qualified-id-join-indexing-handler_test.cc (renamed from icing/join/qualified-id-joinable-property-indexing-handler_test.cc)73
-rw-r--r--icing/query/advanced_query_parser/query-visitor.cc28
-rw-r--r--icing/query/advanced_query_parser/query-visitor_test.cc227
-rw-r--r--icing/query/query-features.h11
-rw-r--r--icing/query/query-processor.cc1
-rw-r--r--icing/query/query-processor_benchmark.cc16
-rw-r--r--icing/query/query-processor_test.cc195
-rw-r--r--icing/query/suggestion-processor_test.cc55
-rw-r--r--icing/result/projection-tree.cc5
-rw-r--r--icing/result/projection-tree.h6
-rw-r--r--icing/result/projection-tree_test.cc86
-rw-r--r--icing/result/result-adjustment-info.cc10
-rw-r--r--icing/result/result-adjustment-info.h2
-rw-r--r--icing/result/result-adjustment-info_test.cc73
-rw-r--r--icing/result/result-retriever-v2.cc14
-rw-r--r--icing/result/result-retriever-v2.h3
-rw-r--r--icing/result/result-retriever-v2_group-result-limiter_test.cc4
-rw-r--r--icing/result/result-retriever-v2_projection_test.cc488
-rw-r--r--icing/result/result-retriever-v2_snippet_test.cc25
-rw-r--r--icing/result/result-retriever-v2_test.cc172
-rw-r--r--icing/result/result-state-manager_test.cc13
-rw-r--r--icing/result/result-state-manager_thread-safety_test.cc4
-rw-r--r--icing/result/result-state-v2.cc2
-rw-r--r--icing/result/result-state-v2.h14
-rw-r--r--icing/result/result-state-v2_test.cc17
-rw-r--r--icing/result/snippet-retriever.cc4
-rw-r--r--icing/result/snippet-retriever_benchmark.cc8
-rw-r--r--icing/result/snippet-retriever_test.cc40
-rw-r--r--icing/schema-builder.h4
-rw-r--r--icing/schema/backup-schema-producer.cc164
-rw-r--r--icing/schema/backup-schema-producer.h55
-rw-r--r--icing/schema/backup-schema-producer_test.cc630
-rw-r--r--icing/schema/schema-property-iterator.cc10
-rw-r--r--icing/schema/schema-property-iterator.h3
-rw-r--r--icing/schema/schema-property-iterator_test.cc479
-rw-r--r--icing/schema/schema-store.cc533
-rw-r--r--icing/schema/schema-store.h187
-rw-r--r--icing/schema/schema-store_test.cc1900
-rw-r--r--icing/schema/schema-type-manager.cc1
-rw-r--r--icing/schema/schema-type-manager.h7
-rw-r--r--icing/schema/schema-type-manager_test.cc8
-rw-r--r--icing/schema/schema-util.cc459
-rw-r--r--icing/schema/schema-util.h116
-rw-r--r--icing/schema/schema-util_test.cc2143
-rw-r--r--icing/schema/section.h2
-rw-r--r--icing/scoring/advanced_scoring/advanced-scorer_test.cc4
-rw-r--r--icing/scoring/score-and-rank_benchmark.cc28
-rw-r--r--icing/scoring/scorer_test.cc4
-rw-r--r--icing/scoring/scoring-processor_test.cc4
-rw-r--r--icing/scoring/section-weights_test.cc4
-rw-r--r--icing/store/document-store.cc39
-rw-r--r--icing/store/document-store.h8
-rw-r--r--icing/store/document-store_benchmark.cc4
-rw-r--r--icing/store/document-store_test.cc244
-rw-r--r--icing/testing/numeric/normal-distribution-number-generator.h42
-rw-r--r--icing/testing/numeric/uniform-distribution-integer-generator.h2
-rw-r--r--icing/tokenization/icu/icu-language-segmenter.cc5
-rw-r--r--icing/tokenization/icu/icu-language-segmenter.h2
-rw-r--r--icing/tokenization/icu/icu-language-segmenter_test.cc189
-rw-r--r--icing/tokenization/language-segmenter-iterator_test.cc51
-rw-r--r--icing/tokenization/language-segmenter.h7
-rw-r--r--icing/tokenization/language-segmenter_benchmark.cc15
-rw-r--r--icing/tokenization/plain-tokenizer.cc10
-rw-r--r--icing/tokenization/plain-tokenizer.h3
-rw-r--r--icing/tokenization/plain-tokenizer_test.cc33
-rw-r--r--icing/tokenization/raw-query-tokenizer.cc3
-rw-r--r--icing/tokenization/raw-query-tokenizer.h2
-rw-r--r--icing/tokenization/raw-query-tokenizer_test.cc7
-rw-r--r--icing/tokenization/reverse_jni/reverse-jni-break-iterator.cc21
-rw-r--r--icing/tokenization/reverse_jni/reverse-jni-break-iterator.h14
-rw-r--r--icing/tokenization/reverse_jni/reverse-jni-language-segmenter.cc18
-rw-r--r--icing/tokenization/reverse_jni/reverse-jni-language-segmenter.h3
-rw-r--r--icing/tokenization/reverse_jni/reverse-jni-language-segmenter_test.cc184
-rw-r--r--icing/tokenization/rfc822-tokenizer.cc7
-rw-r--r--icing/tokenization/rfc822-tokenizer.h3
-rw-r--r--icing/tokenization/rfc822-tokenizer_test.cc16
-rw-r--r--icing/tokenization/tokenizer.h21
-rw-r--r--icing/tokenization/verbatim-tokenizer.cc7
-rw-r--r--icing/tokenization/verbatim-tokenizer.h3
-rw-r--r--icing/tokenization/verbatim-tokenizer_test.cc31
-rw-r--r--icing/util/document-validator.cc18
-rw-r--r--icing/util/document-validator_test.cc149
-rw-r--r--icing/util/tokenized-document.cc6
-rw-r--r--icing/util/tokenized-document_test.cc6
-rw-r--r--proto/icing/proto/initialize.proto16
-rw-r--r--proto/icing/proto/logging.proto18
-rw-r--r--proto/icing/proto/schema.proto8
-rw-r--r--proto/icing/proto/search.proto11
-rw-r--r--synced_AOSP_CL_number.txt2
127 files changed, 11433 insertions, 1608 deletions
diff --git a/icing/file/posting_list/flash-index-storage.cc b/icing/file/posting_list/flash-index-storage.cc
index 2ba24a3..cd7ac12 100644
--- a/icing/file/posting_list/flash-index-storage.cc
+++ b/icing/file/posting_list/flash-index-storage.cc
@@ -52,6 +52,27 @@ libtextclassifier3::StatusOr<FlashIndexStorage> FlashIndexStorage::Create(
return storage;
}
+/* static */ libtextclassifier3::StatusOr<int>
+FlashIndexStorage::ReadHeaderMagic(const Filesystem* filesystem,
+ const std::string& index_filename) {
+ ICING_RETURN_ERROR_IF_NULL(filesystem);
+
+ if (!filesystem->FileExists(index_filename.c_str())) {
+ return absl_ports::NotFoundError("Flash index file doesn't exist");
+ }
+
+ ScopedFd sfd(filesystem->OpenForRead(index_filename.c_str()));
+ if (!sfd.is_valid()) {
+ return absl_ports::InternalError("Fail to open flash index file");
+ }
+
+ uint32_t block_size = SelectBlockSize();
+ // Read and validate header.
+ ICING_ASSIGN_OR_RETURN(HeaderBlock header_block,
+ HeaderBlock::Read(filesystem, sfd.get(), block_size));
+ return header_block.header()->magic;
+}
+
FlashIndexStorage::~FlashIndexStorage() {
if (header_block_ != nullptr) {
FlushInMemoryFreeList();
diff --git a/icing/file/posting_list/flash-index-storage.h b/icing/file/posting_list/flash-index-storage.h
index 05feb08..378b2dc 100644
--- a/icing/file/posting_list/flash-index-storage.h
+++ b/icing/file/posting_list/flash-index-storage.h
@@ -98,6 +98,17 @@ class FlashIndexStorage {
std::string index_filename, const Filesystem* filesystem,
PostingListSerializer* serializer, bool in_memory = true);
+ // Reads magic from existing file header. We need this during Icing
+ // initialization phase to determine the version.
+ //
+ // RETURNS:
+ // - On success, a valid magic
+ // - FAILED_PRECONDITION_ERROR if filesystem is null
+ // - NOT_FOUND_ERROR if the flash index file doesn't exist
+ // - INTERNAL_ERROR on I/O error
+ static libtextclassifier3::StatusOr<int> ReadHeaderMagic(
+ const Filesystem* filesystem, const std::string& index_filename);
+
FlashIndexStorage(FlashIndexStorage&&) = default;
FlashIndexStorage(const FlashIndexStorage&) = delete;
FlashIndexStorage& operator=(FlashIndexStorage&&) = default;
diff --git a/icing/file/posting_list/flash-index-storage_test.cc b/icing/file/posting_list/flash-index-storage_test.cc
index e63f5b0..3e2d239 100644
--- a/icing/file/posting_list/flash-index-storage_test.cc
+++ b/icing/file/posting_list/flash-index-storage_test.cc
@@ -26,6 +26,7 @@
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "icing/file/filesystem.h"
+#include "icing/file/posting_list/flash-index-storage-header.h"
#include "icing/index/hit/hit.h"
#include "icing/index/main/posting-list-hit-serializer.h"
#include "icing/store/document-id.h"
@@ -42,6 +43,7 @@ using ::testing::Eq;
using ::testing::IsEmpty;
using ::testing::IsFalse;
using ::testing::IsTrue;
+using ::testing::Ne;
using ::testing::Not;
class FlashIndexStorageTest : public testing::Test {
@@ -67,6 +69,50 @@ class FlashIndexStorageTest : public testing::Test {
std::unique_ptr<PostingListHitSerializer> serializer_;
};
+TEST_F(FlashIndexStorageTest, ReadHeaderMagic) {
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ FlashIndexStorage flash_index_storage,
+ FlashIndexStorage::Create(file_name_, &filesystem_, serializer_.get()));
+ }
+ EXPECT_THAT(FlashIndexStorage::ReadHeaderMagic(&filesystem_, file_name_),
+ IsOkAndHolds(HeaderBlock::Header::kMagic));
+}
+
+TEST_F(FlashIndexStorageTest, ReadHeaderMagicOldVersion) {
+ int block_size;
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ FlashIndexStorage flash_index_storage,
+ FlashIndexStorage::Create(file_name_, &filesystem_, serializer_.get()));
+ block_size = flash_index_storage.block_size();
+ }
+
+ int old_magic = 0x6dfba6ae;
+ ASSERT_THAT(old_magic, Ne(HeaderBlock::Header::kMagic));
+ {
+ // Manually modify the header magic.
+ ScopedFd sfd(filesystem_.OpenForWrite(file_name_.c_str()));
+ ASSERT_THAT(sfd.is_valid(), IsTrue());
+
+ // Read and validate header.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ HeaderBlock header_block,
+ HeaderBlock::Read(&filesystem_, sfd.get(), block_size));
+ header_block.header()->magic = old_magic;
+ ASSERT_THAT(header_block.Write(sfd.get()), IsTrue());
+ }
+
+ EXPECT_THAT(FlashIndexStorage::ReadHeaderMagic(&filesystem_, file_name_),
+ IsOkAndHolds(old_magic));
+}
+
+TEST_F(FlashIndexStorageTest,
+ ReadHeaderMagicNonExistingFileShouldGetNotFoundError) {
+ EXPECT_THAT(FlashIndexStorage::ReadHeaderMagic(&filesystem_, file_name_),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
TEST_F(FlashIndexStorageTest, CorruptHeader) {
{
// Create the header file
diff --git a/icing/file/version-util.cc b/icing/file/version-util.cc
new file mode 100644
index 0000000..468bde5
--- /dev/null
+++ b/icing/file/version-util.cc
@@ -0,0 +1,105 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/file/version-util.h"
+
+#include <cstdint>
+#include <string>
+#include <utility>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/file/filesystem.h"
+#include "icing/index/index.h"
+
+namespace icing {
+namespace lib {
+
+namespace version_util {
+
+libtextclassifier3::StatusOr<VersionInfo> ReadVersion(
+ const Filesystem& filesystem, const std::string& version_file_path,
+ const std::string& index_base_dir) {
+ // 1. Read the version info.
+ VersionInfo existing_version_info(-1, -1);
+ if (filesystem.FileExists(version_file_path.c_str()) &&
+ !filesystem.PRead(version_file_path.c_str(), &existing_version_info,
+ sizeof(VersionInfo), /*offset=*/0)) {
+ return absl_ports::InternalError("Fail to read version");
+ }
+
+ // 2. Check the Index magic to see if we're actually on version 0.
+ libtextclassifier3::StatusOr<int> existing_flash_index_magic_or =
+ Index::ReadFlashIndexMagic(&filesystem, index_base_dir);
+ if (!existing_flash_index_magic_or.ok()) {
+ if (absl_ports::IsNotFound(existing_flash_index_magic_or.status())) {
+ // Flash index magic doesn't exist. In this case, we're unable to
+ // determine the version change state correctly (regardless of the
+ // existence of the version file), so invalidate VersionInfo by setting
+ // version to -1, but still keep the max_version value read in step 1.
+ existing_version_info.version = -1;
+ return existing_version_info;
+ }
+ // Real error.
+ return std::move(existing_flash_index_magic_or).status();
+ }
+ if (existing_flash_index_magic_or.ValueOrDie() ==
+ kVersionZeroFlashIndexMagic) {
+ existing_version_info.version = 0;
+ if (existing_version_info.max_version == -1) {
+ existing_version_info.max_version = 0;
+ }
+ }
+
+ return existing_version_info;
+}
+
+libtextclassifier3::Status WriteVersion(const Filesystem& filesystem,
+ const std::string& version_file_path,
+ const VersionInfo& version_info) {
+ if (!filesystem.PWrite(version_file_path.c_str(), /*offset=*/0, &version_info,
+ sizeof(VersionInfo))) {
+ return absl_ports::InternalError("Fail to write version");
+ }
+ return libtextclassifier3::Status::OK;
+}
+
+StateChange GetVersionStateChange(const VersionInfo& existing_version_info,
+ int32_t curr_version) {
+ if (!existing_version_info.IsValid()) {
+ return StateChange::kUndetermined;
+ }
+
+ if (existing_version_info.version == 0) {
+ return (existing_version_info.max_version == existing_version_info.version)
+ ? StateChange::kVersionZeroUpgrade
+ : StateChange::kVersionZeroRollForward;
+ }
+
+ if (existing_version_info.version == curr_version) {
+ return StateChange::kCompatible;
+ } else if (existing_version_info.version > curr_version) {
+ return StateChange::kRollBack;
+ } else { // existing_version_info.version < curr_version
+ return (existing_version_info.max_version == existing_version_info.version)
+ ? StateChange::kUpgrade
+ : StateChange::kRollForward;
+ }
+}
+
+} // namespace version_util
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/file/version-util.h b/icing/file/version-util.h
new file mode 100644
index 0000000..7fa7fbd
--- /dev/null
+++ b/icing/file/version-util.h
@@ -0,0 +1,97 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_FILE_VERSION_UTIL_H_
+#define ICING_FILE_VERSION_UTIL_H_
+
+#include <cstdint>
+#include <string>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/file/filesystem.h"
+
+namespace icing {
+namespace lib {
+
+namespace version_util {
+
+// - Version 0: Android T. Can be identified only by flash index magic.
+// - Version 1: mainline release 2023-06.
+inline static constexpr int32_t kVersion = 1;
+inline static constexpr int32_t kVersionOne = 1;
+
+inline static constexpr int kVersionZeroFlashIndexMagic = 0x6dfba6ae;
+
+struct VersionInfo {
+ int32_t version;
+ int32_t max_version;
+
+ explicit VersionInfo(int32_t version_in, int32_t max_version_in)
+ : version(version_in), max_version(max_version_in) {}
+
+ bool IsValid() const { return version >= 0 && max_version >= 0; }
+
+ bool operator==(const VersionInfo& other) const {
+ return version == other.version && max_version == other.max_version;
+ }
+} __attribute__((packed));
+static_assert(sizeof(VersionInfo) == 8, "");
+
+enum class StateChange {
+ kUndetermined,
+ kCompatible,
+ kRollForward,
+ kRollBack,
+ kUpgrade,
+ kVersionZeroUpgrade,
+ kVersionZeroRollForward,
+};
+
+// Helper method to read version info (using version file and flash index header
+// magic) from the existing data. If the state is invalid (e.g. flash index
+// header file is missing), then return an invalid VersionInfo.
+//
+// RETURNS:
+// - Existing data's VersionInfo on success
+// - INTERNAL_ERROR on I/O errors
+libtextclassifier3::StatusOr<VersionInfo> ReadVersion(
+ const Filesystem& filesystem, const std::string& version_file_path,
+ const std::string& index_base_dir);
+
+// Helper method to write version file.
+//
+// RETURNS:
+// - OK on success
+// - INTERNAL_ERROR on I/O errors
+libtextclassifier3::Status WriteVersion(const Filesystem& filesystem,
+ const std::string& version_file_path,
+ const VersionInfo& version_info);
+
+// Helper method to determine the change state between the existing data version
+// and the current code version.
+//
+// REQUIRES: curr_version > 0. We implement version checking in version 1, so
+// the callers (except unit tests) will always use a version # greater than 0.
+//
+// RETURNS: StateChange
+StateChange GetVersionStateChange(const VersionInfo& existing_version_info,
+ int32_t curr_version = kVersion);
+
+} // namespace version_util
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_FILE_VERSION_UTIL_H_
diff --git a/icing/file/version-util_test.cc b/icing/file/version-util_test.cc
new file mode 100644
index 0000000..78cdb7d
--- /dev/null
+++ b/icing/file/version-util_test.cc
@@ -0,0 +1,386 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/file/version-util.h"
+
+#include <optional>
+#include <string>
+#include <utility>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/posting_list/flash-index-storage-header.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+namespace version_util {
+
+namespace {
+
+using ::testing::Eq;
+
+struct VersionUtilReadVersionTestParam {
+ std::optional<VersionInfo> existing_version_info;
+ std::optional<int> existing_flash_index_magic;
+ VersionInfo expected_version_info;
+
+ explicit VersionUtilReadVersionTestParam(
+ std::optional<VersionInfo> existing_version_info_in,
+ std::optional<int> existing_flash_index_magic_in,
+ VersionInfo expected_version_info_in)
+ : existing_version_info(std::move(existing_version_info_in)),
+ existing_flash_index_magic(std::move(existing_flash_index_magic_in)),
+ expected_version_info(std::move(expected_version_info_in)) {}
+};
+
+class VersionUtilReadVersionTest
+ : public ::testing::TestWithParam<VersionUtilReadVersionTestParam> {
+ protected:
+ void SetUp() override {
+ base_dir_ = GetTestTempDir() + "/version_util_test";
+ version_file_path_ = base_dir_ + "/version";
+ index_path_ = base_dir_ + "/index";
+
+ ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(base_dir_.c_str()));
+ }
+
+ void TearDown() override {
+ ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(base_dir_.c_str()));
+ }
+
+ const Filesystem& filesystem() const { return filesystem_; }
+
+ Filesystem filesystem_;
+ std::string base_dir_;
+ std::string version_file_path_;
+ std::string index_path_;
+};
+
+TEST_P(VersionUtilReadVersionTest, ReadVersion) {
+ const VersionUtilReadVersionTestParam& param = GetParam();
+
+ // Prepare version file and flash index file.
+ if (param.existing_version_info.has_value()) {
+ ICING_ASSERT_OK(WriteVersion(filesystem_, version_file_path_,
+ param.existing_version_info.value()));
+ }
+
+ if (param.existing_flash_index_magic.has_value()) {
+ HeaderBlock header_block(&filesystem_, /*block_size=*/4096);
+ header_block.header()->magic = param.existing_flash_index_magic.value();
+
+ std::string main_index_dir = index_path_ + "/idx/main";
+ ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(main_index_dir.c_str()));
+ std::string flash_index_file_path = main_index_dir + "/main_index";
+
+ ScopedFd sfd(filesystem_.OpenForWrite(flash_index_file_path.c_str()));
+ ASSERT_TRUE(sfd.is_valid());
+ ASSERT_TRUE(header_block.Write(sfd.get()));
+ }
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ VersionInfo version_info,
+ ReadVersion(filesystem_, version_file_path_, index_path_));
+ EXPECT_THAT(version_info, Eq(param.expected_version_info));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+ VersionUtilReadVersionTest, VersionUtilReadVersionTest,
+ testing::Values(
+ // - Version file doesn't exist
+ // - Flash index doesn't exist
+ // - Result: version -1, max_version -1 (invalid)
+ VersionUtilReadVersionTestParam(
+ /*existing_version_info_in=*/std::nullopt,
+ /*existing_flash_index_magic_in=*/std::nullopt,
+ /*expected_version_info_in=*/
+ VersionInfo(/*version_in=*/-1, /*max_version=*/-1)),
+
+ // - Version file doesn't exist
+ // - Flash index exists with version 0 magic
+ // - Result: version 0, max_version 0
+ VersionUtilReadVersionTestParam(
+ /*existing_version_info_in=*/std::nullopt,
+ /*existing_flash_index_magic_in=*/
+ std::make_optional<int>(kVersionZeroFlashIndexMagic),
+ /*expected_version_info_in=*/
+ VersionInfo(/*version_in=*/0, /*max_version=*/0)),
+
+ // - Version file doesn't exist
+ // - Flash index exists with non version 0 magic
+ // - Result: version -1, max_version -1 (invalid)
+ VersionUtilReadVersionTestParam(
+ /*existing_version_info_in=*/std::nullopt,
+ /*existing_flash_index_magic_in=*/
+ std::make_optional<int>(kVersionZeroFlashIndexMagic + 1),
+ /*expected_version_info_in=*/
+ VersionInfo(/*version_in=*/-1, /*max_version=*/-1)),
+
+ // - Version file exists
+ // - Flash index doesn't exist
+ // - Result: version -1, max_version 1 (invalid)
+ VersionUtilReadVersionTestParam(
+ /*existing_version_info_in=*/std::make_optional<VersionInfo>(
+ /*version_in=*/1, /*max_version=*/1),
+ /*existing_flash_index_magic_in=*/std::nullopt,
+ /*expected_version_info_in=*/
+ VersionInfo(/*version_in=*/-1, /*max_version=*/1)),
+
+ // - Version file exists: version 1, max_version 1
+ // - Flash index exists with version 0 magic
+ // - Result: version 0, max_version 1
+ VersionUtilReadVersionTestParam(
+ /*existing_version_info_in=*/std::make_optional<VersionInfo>(
+ /*version_in=*/1, /*max_version=*/1),
+ /*existing_flash_index_magic_in=*/
+ std::make_optional<int>(kVersionZeroFlashIndexMagic),
+ /*expected_version_info_in=*/
+ VersionInfo(/*version_in=*/0, /*max_version=*/1)),
+
+ // - Version file exists: version 2, max_version 3
+ // - Flash index exists with version 0 magic
+ // - Result: version 0, max_version 3
+ VersionUtilReadVersionTestParam(
+ /*existing_version_info_in=*/std::make_optional<VersionInfo>(
+ /*version_in=*/2, /*max_version=*/3),
+ /*existing_flash_index_magic_in=*/
+ std::make_optional<int>(kVersionZeroFlashIndexMagic),
+ /*expected_version_info_in=*/
+ VersionInfo(/*version_in=*/0, /*max_version=*/3)),
+
+ // - Version file exists: version 1, max_version 1
+ // - Flash index exists with non version 0 magic
+ // - Result: version 1, max_version 1
+ VersionUtilReadVersionTestParam(
+ /*existing_version_info_in=*/std::make_optional<VersionInfo>(
+ /*version_in=*/1, /*max_version=*/1),
+ /*existing_flash_index_magic_in=*/
+ std::make_optional<int>(kVersionZeroFlashIndexMagic + 1),
+ /*expected_version_info_in=*/
+ VersionInfo(/*version_in=*/1, /*max_version=*/1)),
+
+ // - Version file exists: version 2, max_version 3
+ // - Flash index exists with non version 0 magic
+ // - Result: version 2, max_version 3
+ VersionUtilReadVersionTestParam(
+ /*existing_version_info_in=*/std::make_optional<VersionInfo>(
+ /*version_in=*/2, /*max_version=*/3),
+ /*existing_flash_index_magic_in=*/
+ std::make_optional<int>(kVersionZeroFlashIndexMagic + 1),
+ /*expected_version_info_in=*/
+ VersionInfo(/*version_in=*/2, /*max_version=*/3))));
+
+struct VersionUtilStateChangeTestParam {
+ VersionInfo existing_version_info;
+ int32_t curr_version;
+ StateChange expected_state_change;
+
+ explicit VersionUtilStateChangeTestParam(VersionInfo existing_version_info_in,
+ int32_t curr_version_in,
+ StateChange expected_state_change_in)
+ : existing_version_info(std::move(existing_version_info_in)),
+ curr_version(curr_version_in),
+ expected_state_change(expected_state_change_in) {}
+};
+
+class VersionUtilStateChangeTest
+ : public ::testing::TestWithParam<VersionUtilStateChangeTestParam> {};
+
+TEST_P(VersionUtilStateChangeTest, GetVersionStateChange) {
+ const VersionUtilStateChangeTestParam& param = GetParam();
+
+ EXPECT_THAT(
+ GetVersionStateChange(param.existing_version_info, param.curr_version),
+ Eq(param.expected_state_change));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+ VersionUtilStateChangeTest, VersionUtilStateChangeTest,
+ testing::Values(
+ // - version -1, max_version -1 (invalid)
+ // - Current version = 1
+ // - Result: undetermined
+ VersionUtilStateChangeTestParam(
+ /*existing_version_info_in=*/VersionInfo(-1, -1),
+ /*curr_version_in=*/1,
+ /*expected_state_change_in=*/StateChange::kUndetermined),
+
+ // - version -1, max_version 1 (invalid)
+ // - Current version = 1
+ // - Result: undetermined
+ VersionUtilStateChangeTestParam(
+ /*existing_version_info_in=*/VersionInfo(-1, 1),
+ /*curr_version_in=*/1,
+ /*expected_state_change_in=*/StateChange::kUndetermined),
+
+ // - version -1, max_version -1 (invalid)
+ // - Current version = 2
+ // - Result: undetermined
+ VersionUtilStateChangeTestParam(
+ /*existing_version_info_in=*/VersionInfo(-1, -1),
+ /*curr_version_in=*/2,
+ /*expected_state_change_in=*/StateChange::kUndetermined),
+
+ // - version -1, max_version 1 (invalid)
+ // - Current version = 2
+ // - Result: undetermined
+ VersionUtilStateChangeTestParam(
+ /*existing_version_info_in=*/VersionInfo(-1, 1),
+ /*curr_version_in=*/2,
+ /*expected_state_change_in=*/StateChange::kUndetermined),
+
+ // - version 0, max_version 0
+ // - Current version = 1
+ // - Result: version 0 upgrade
+ VersionUtilStateChangeTestParam(
+ /*existing_version_info_in=*/VersionInfo(0, 0),
+ /*curr_version_in=*/1,
+ /*expected_state_change_in=*/StateChange::kVersionZeroUpgrade),
+
+ // - version 0, max_version 1
+ // - Current version = 1
+ // - Result: version 0 roll forward
+ VersionUtilStateChangeTestParam(
+ /*existing_version_info_in=*/VersionInfo(0, 1),
+ /*curr_version_in=*/1,
+ /*expected_state_change_in=*/StateChange::kVersionZeroRollForward),
+
+ // - version 0, max_version 2
+ // - Current version = 1
+ // - Result: version 0 roll forward
+ VersionUtilStateChangeTestParam(
+ /*existing_version_info_in=*/VersionInfo(0, 2),
+ /*curr_version_in=*/1,
+ /*expected_state_change_in=*/StateChange::kVersionZeroRollForward),
+
+ // - version 0, max_version 0
+ // - Current version = 2
+ // - Result: version 0 upgrade
+ VersionUtilStateChangeTestParam(
+ /*existing_version_info_in=*/VersionInfo(0, 0),
+ /*curr_version_in=*/2,
+ /*expected_state_change_in=*/StateChange::kVersionZeroUpgrade),
+
+ // - version 0, max_version 1
+ // - Current version = 2
+ // - Result: version 0 upgrade
+ VersionUtilStateChangeTestParam(
+ /*existing_version_info_in=*/VersionInfo(0, 1),
+ /*curr_version_in=*/2,
+ /*expected_state_change_in=*/StateChange::kVersionZeroRollForward),
+
+ // - version 0, max_version 2
+ // - Current version = 2
+ // - Result: version 0 roll forward
+ VersionUtilStateChangeTestParam(
+ /*existing_version_info_in=*/VersionInfo(0, 2),
+ /*curr_version_in=*/2,
+ /*expected_state_change_in=*/StateChange::kVersionZeroRollForward),
+
+ // - version 1, max_version 1
+ // - Current version = 1
+ // - Result: compatible
+ VersionUtilStateChangeTestParam(
+ /*existing_version_info_in=*/VersionInfo(1, 1),
+ /*curr_version_in=*/1,
+ /*expected_state_change_in=*/StateChange::kCompatible),
+
+ // - version 1, max_version 2
+ // - Current version = 1
+ // - Result: compatible
+ VersionUtilStateChangeTestParam(
+ /*existing_version_info_in=*/VersionInfo(1, 2),
+ /*curr_version_in=*/1,
+ /*expected_state_change_in=*/StateChange::kCompatible),
+
+ // - version 2, max_version 2
+ // - Current version = 1
+ // - Result: roll back
+ VersionUtilStateChangeTestParam(
+ /*existing_version_info_in=*/VersionInfo(2, 2),
+ /*curr_version_in=*/1,
+ /*expected_state_change_in=*/StateChange::kRollBack),
+
+ // - version 2, max_version 3
+ // - Current version = 1
+ // - Result: roll back
+ VersionUtilStateChangeTestParam(
+ /*existing_version_info_in=*/VersionInfo(2, 3),
+ /*curr_version_in=*/1,
+ /*expected_state_change_in=*/StateChange::kRollBack),
+
+ // - version 1, max_version 1
+ // - Current version = 2
+ // - Result: upgrade
+ VersionUtilStateChangeTestParam(
+ /*existing_version_info_in=*/VersionInfo(1, 1),
+ /*curr_version_in=*/2,
+ /*expected_state_change_in=*/StateChange::kUpgrade),
+
+ // - version 1, max_version 2
+ // - Current version = 2
+ // - Result: roll forward
+ VersionUtilStateChangeTestParam(
+ /*existing_version_info_in=*/VersionInfo(1, 2),
+ /*curr_version_in=*/2,
+ /*expected_state_change_in=*/StateChange::kRollForward),
+
+ // - version 1, max_version 3
+ // - Current version = 2
+ // - Result: roll forward
+ VersionUtilStateChangeTestParam(
+ /*existing_version_info_in=*/VersionInfo(1, 3),
+ /*curr_version_in=*/2,
+ /*expected_state_change_in=*/StateChange::kRollForward),
+
+ // - version 2, max_version 2
+ // - Current version = 2
+ // - Result: compatible
+ VersionUtilStateChangeTestParam(
+ /*existing_version_info_in=*/VersionInfo(2, 2),
+ /*curr_version_in=*/2,
+ /*expected_state_change_in=*/StateChange::kCompatible),
+
+ // - version 2, max_version 3
+ // - Current version = 2
+ // - Result: compatible
+ VersionUtilStateChangeTestParam(
+ /*existing_version_info_in=*/VersionInfo(2, 3),
+ /*curr_version_in=*/2,
+ /*expected_state_change_in=*/StateChange::kCompatible),
+
+ // - version 3, max_version 3
+ // - Current version = 2
+ // - Result: rollback
+ VersionUtilStateChangeTestParam(
+ /*existing_version_info_in=*/VersionInfo(3, 3),
+ /*curr_version_in=*/2,
+ /*expected_state_change_in=*/StateChange::kRollBack),
+
+ // - version 3, max_version 4
+ // - Current version = 2
+ // - Result: rollback
+ VersionUtilStateChangeTestParam(
+ /*existing_version_info_in=*/VersionInfo(3, 4),
+ /*curr_version_in=*/2,
+ /*expected_state_change_in=*/StateChange::kRollBack)));
+
+} // namespace
+
+} // namespace version_util
+} // namespace lib
+} // namespace icing
diff --git a/icing/icing-search-engine.cc b/icing/icing-search-engine.cc
index 56c7795..e7b6ae9 100644
--- a/icing/icing-search-engine.cc
+++ b/icing/icing-search-engine.cc
@@ -32,6 +32,7 @@
#include "icing/file/destructible-file.h"
#include "icing/file/file-backed-proto.h"
#include "icing/file/filesystem.h"
+#include "icing/file/version-util.h"
#include "icing/index/data-indexing-handler.h"
#include "icing/index/hit/doc-hit-info.h"
#include "icing/index/index-processor.h"
@@ -41,7 +42,7 @@
#include "icing/index/numeric/integer-index.h"
#include "icing/index/string-section-indexing-handler.h"
#include "icing/join/join-processor.h"
-#include "icing/join/qualified-id-joinable-property-indexing-handler.h"
+#include "icing/join/qualified-id-join-indexing-handler.h"
#include "icing/join/qualified-id-type-joinable-index.h"
#include "icing/legacy/index/icing-filesystem.h"
#include "icing/portable/endian.h"
@@ -96,6 +97,7 @@ namespace lib {
namespace {
+constexpr std::string_view kVersionFilename = "version";
constexpr std::string_view kDocumentSubfolderName = "document_dir";
constexpr std::string_view kIndexSubfolderName = "index_dir";
constexpr std::string_view kIntegerIndexSubfolderName = "integer_index_dir";
@@ -216,6 +218,12 @@ libtextclassifier3::Status ValidateSuggestionSpec(
return libtextclassifier3::Status::OK;
}
+// Version file is a single file under base_dir containing version info of the
+// existing data.
+std::string MakeVersionFilePath(const std::string& base_dir) {
+ return absl_ports::StrCat(base_dir, "/", kVersionFilename);
+}
+
// Document store files are in a standalone subfolder for easier file
// management. We can delete and recreate the subfolder and not touch/affect
// anything else.
@@ -454,20 +462,34 @@ libtextclassifier3::Status IcingSearchEngine::CheckInitMarkerFile(
// fails, then just assume the value is zero (the most likely reason for
// failure would be non-existence because the last init was successful
// anyways).
- ScopedFd marker_file_fd(filesystem_->OpenForWrite(marker_filepath.c_str()));
+ std::unique_ptr<ScopedFd> marker_file_fd = std::make_unique<ScopedFd>(
+ filesystem_->OpenForWrite(marker_filepath.c_str()));
libtextclassifier3::Status status;
if (file_exists &&
- filesystem_->PRead(marker_file_fd.get(), &network_init_attempts,
+ filesystem_->PRead(marker_file_fd->get(), &network_init_attempts,
sizeof(network_init_attempts), /*offset=*/0)) {
host_init_attempts = GNetworkToHostL(network_init_attempts);
if (host_init_attempts > kMaxUnsuccessfulInitAttempts) {
// We're tried and failed to init too many times. We need to throw
// everything out and start from scratch.
ResetMembers();
+ marker_file_fd.reset();
+
+ // Delete the entire base directory.
if (!filesystem_->DeleteDirectoryRecursively(
options_.base_dir().c_str())) {
return absl_ports::InternalError("Failed to delete icing base dir!");
}
+
+ // Create the base directory again and reopen marker file.
+ if (!filesystem_->CreateDirectoryRecursively(
+ options_.base_dir().c_str())) {
+ return absl_ports::InternalError("Failed to create icing base dir!");
+ }
+
+ marker_file_fd = std::make_unique<ScopedFd>(
+ filesystem_->OpenForWrite(marker_filepath.c_str()));
+
status = absl_ports::DataLossError(
"Encountered failed initialization limit. Cleared all data.");
host_init_attempts = 0;
@@ -482,10 +504,10 @@ libtextclassifier3::Status IcingSearchEngine::CheckInitMarkerFile(
++host_init_attempts;
network_init_attempts = GHostToNetworkL(host_init_attempts);
// Write the updated number of attempts before we get started.
- if (!filesystem_->PWrite(marker_file_fd.get(), /*offset=*/0,
+ if (!filesystem_->PWrite(marker_file_fd->get(), /*offset=*/0,
&network_init_attempts,
sizeof(network_init_attempts)) ||
- !filesystem_->DataSync(marker_file_fd.get())) {
+ !filesystem_->DataSync(marker_file_fd->get())) {
return absl_ports::InternalError(
"Failed to write and sync init marker file");
}
@@ -547,6 +569,31 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers(
return status;
}
+ // Read version file and determine the state change.
+ const std::string version_filepath = MakeVersionFilePath(options_.base_dir());
+ const std::string index_dir = MakeIndexDirectoryPath(options_.base_dir());
+ ICING_ASSIGN_OR_RETURN(
+ version_util::VersionInfo version_info,
+ version_util::ReadVersion(*filesystem_, version_filepath, index_dir));
+ version_util::StateChange version_state_change =
+ version_util::GetVersionStateChange(version_info);
+ if (version_state_change != version_util::StateChange::kCompatible) {
+ // Step 1: migrate schema according to the version state change.
+ ICING_RETURN_IF_ERROR(SchemaStore::MigrateSchema(
+ filesystem_.get(), MakeSchemaDirectoryPath(options_.base_dir()),
+ version_state_change, version_util::kVersion));
+
+ // Step 2: discard all derived data
+ ICING_RETURN_IF_ERROR(DiscardDerivedFiles());
+
+ // Step 3: update version file
+ version_util::VersionInfo new_version_info(
+ version_util::kVersion,
+ std::max(version_info.max_version, version_util::kVersion));
+ ICING_RETURN_IF_ERROR(version_util::WriteVersion(
+ *filesystem_, version_filepath, new_version_info));
+ }
+
ICING_RETURN_IF_ERROR(InitializeSchemaStore(initialize_stats));
// TODO(b/156383798) : Resolve how to specify the locale.
@@ -567,7 +614,6 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers(
// and index directories and initialize them from scratch.
const std::string doc_store_dir =
MakeDocumentDirectoryPath(options_.base_dir());
- const std::string index_dir = MakeIndexDirectoryPath(options_.base_dir());
const std::string integer_index_dir =
MakeIntegerIndexWorkingPath(options_.base_dir());
const std::string qualified_id_join_index_dir =
@@ -597,7 +643,6 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers(
// We're going to need to build the index from scratch. So just delete its
// directory now.
// Discard index directory and instantiate a new one.
- const std::string index_dir = MakeIndexDirectoryPath(options_.base_dir());
Index::Options index_options(index_dir, options_.index_merge_size());
if (!filesystem_->DeleteDirectoryRecursively(index_dir.c_str()) ||
!filesystem_->CreateDirectoryRecursively(index_dir.c_str())) {
@@ -649,6 +694,24 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers(
InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC);
initialize_stats->set_qualified_id_join_index_restoration_cause(
InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC);
+ } else if (version_state_change != version_util::StateChange::kCompatible) {
+ ICING_RETURN_IF_ERROR(InitializeDocumentStore(
+ /*force_recovery_and_revalidate_documents=*/true, initialize_stats));
+ index_init_status = InitializeIndex(initialize_stats);
+ if (!index_init_status.ok() && !absl_ports::IsDataLoss(index_init_status)) {
+ return index_init_status;
+ }
+
+ initialize_stats->set_schema_store_recovery_cause(
+ InitializeStatsProto::VERSION_CHANGED);
+ initialize_stats->set_document_store_recovery_cause(
+ InitializeStatsProto::VERSION_CHANGED);
+ initialize_stats->set_index_restoration_cause(
+ InitializeStatsProto::VERSION_CHANGED);
+ initialize_stats->set_integer_index_restoration_cause(
+ InitializeStatsProto::VERSION_CHANGED);
+ initialize_stats->set_qualified_id_join_index_restoration_cause(
+ InitializeStatsProto::VERSION_CHANGED);
} else {
ICING_RETURN_IF_ERROR(InitializeDocumentStore(
/*force_recovery_and_revalidate_documents=*/false, initialize_stats));
@@ -861,7 +924,8 @@ SetSchemaResultProto IcingSearchEngine::SetSchema(
DestructibleFile marker_file(marker_filepath, filesystem_.get());
auto set_schema_result_or = schema_store_->SetSchema(
- std::move(new_schema), ignore_errors_and_delete_documents);
+ std::move(new_schema), ignore_errors_and_delete_documents,
+ options_.allow_circular_schema_definitions());
if (!set_schema_result_or.ok()) {
TransformStatus(set_schema_result_or.status(), result_status);
return result_proto;
@@ -1123,12 +1187,13 @@ GetResultProto IcingSearchEngine::Get(const std::string_view name_space,
DocumentProto document = std::move(document_or).ValueOrDie();
std::unique_ptr<ProjectionTree> type_projection_tree;
std::unique_ptr<ProjectionTree> wildcard_projection_tree;
- for (const TypePropertyMask& type_field_mask :
- result_spec.type_property_masks()) {
- if (type_field_mask.schema_type() == document.schema()) {
+ for (const SchemaStore::ExpandedTypePropertyMask& type_field_mask :
+ schema_store_->ExpandTypePropertyMasks(
+ result_spec.type_property_masks())) {
+ if (type_field_mask.schema_type == document.schema()) {
type_projection_tree = std::make_unique<ProjectionTree>(type_field_mask);
- } else if (type_field_mask.schema_type() ==
- ProjectionTree::kSchemaTypeWildcard) {
+ } else if (type_field_mask.schema_type ==
+ SchemaStore::kSchemaTypeWildcard) {
wildcard_projection_tree =
std::make_unique<ProjectionTree>(type_field_mask);
}
@@ -1817,7 +1882,7 @@ SearchResultProto IcingSearchEngine::Search(
child_result_adjustment_info = std::make_unique<ResultAdjustmentInfo>(
join_spec.nested_spec().search_spec(),
join_spec.nested_spec().scoring_spec(),
- join_spec.nested_spec().result_spec(),
+ join_spec.nested_spec().result_spec(), schema_store_.get(),
std::move(nested_query_scoring_results.query_terms));
}
@@ -1847,7 +1912,7 @@ SearchResultProto IcingSearchEngine::Search(
// Construct parent's result adjustment info.
auto parent_result_adjustment_info = std::make_unique<ResultAdjustmentInfo>(
- search_spec, scoring_spec, result_spec,
+ search_spec, scoring_spec, result_spec, schema_store_.get(),
std::move(query_scoring_results.query_terms));
std::unique_ptr<ScoredDocumentHitsRanker> ranker;
@@ -2352,11 +2417,10 @@ IcingSearchEngine::CreateDataIndexingHandlers() {
handlers.push_back(std::move(integer_section_indexing_handler));
// Qualified id joinable property index handler
- ICING_ASSIGN_OR_RETURN(
- std::unique_ptr<QualifiedIdJoinablePropertyIndexingHandler>
- qualified_id_joinable_property_indexing_handler,
- QualifiedIdJoinablePropertyIndexingHandler::Create(
- clock_.get(), qualified_id_join_index_.get()));
+ ICING_ASSIGN_OR_RETURN(std::unique_ptr<QualifiedIdJoinIndexingHandler>
+ qualified_id_joinable_property_indexing_handler,
+ QualifiedIdJoinIndexingHandler::Create(
+ clock_.get(), qualified_id_join_index_.get()));
handlers.push_back(
std::move(qualified_id_joinable_property_indexing_handler));
@@ -2454,6 +2518,44 @@ IcingSearchEngine::TruncateIndicesTo(DocumentId last_stored_document_id) {
qualified_id_join_index_needed_restoration);
}
+libtextclassifier3::Status IcingSearchEngine::DiscardDerivedFiles() {
+ if (schema_store_ != nullptr || document_store_ != nullptr ||
+ index_ != nullptr || integer_index_ != nullptr ||
+ qualified_id_join_index_ != nullptr) {
+ return absl_ports::FailedPreconditionError(
+ "Cannot discard derived files while having valid instances");
+ }
+
+ // Schema store
+ ICING_RETURN_IF_ERROR(
+ SchemaStore::DiscardDerivedFiles(filesystem_.get(), options_.base_dir()));
+
+ // Document store
+ ICING_RETURN_IF_ERROR(DocumentStore::DiscardDerivedFiles(
+ filesystem_.get(), options_.base_dir()));
+
+ // Term index
+ if (!filesystem_->DeleteDirectoryRecursively(
+ MakeIndexDirectoryPath(options_.base_dir()).c_str())) {
+ return absl_ports::InternalError("Failed to discard index");
+ }
+
+ // Integer index
+ if (!filesystem_->DeleteDirectoryRecursively(
+ MakeIntegerIndexWorkingPath(options_.base_dir()).c_str())) {
+ return absl_ports::InternalError("Failed to discard integer index");
+ }
+
+ // Qualified id join index
+ if (!filesystem_->DeleteDirectoryRecursively(
+ MakeQualifiedIdJoinIndexWorkingPath(options_.base_dir()).c_str())) {
+ return absl_ports::InternalError(
+ "Failed to discard qualified id join index");
+ }
+
+ return libtextclassifier3::Status::OK;
+}
+
libtextclassifier3::Status IcingSearchEngine::ClearSearchIndices() {
ICING_RETURN_IF_ERROR(index_->Reset());
ICING_RETURN_IF_ERROR(integer_index_->Clear());
diff --git a/icing/icing-search-engine.h b/icing/icing-search-engine.h
index 3e85f69..4192169 100644
--- a/icing/icing-search-engine.h
+++ b/icing/icing-search-engine.h
@@ -614,6 +614,15 @@ class IcingSearchEngine {
libtextclassifier3::Status CheckConsistency()
ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+ // Discards all derived data.
+ //
+ // Returns:
+ // OK on success
+ // FAILED_PRECONDITION_ERROR if those instances are valid (non nullptr)
+ // INTERNAL_ERROR on any I/O errors
+ libtextclassifier3::Status DiscardDerivedFiles()
+ ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
// Repopulates derived data off our ground truths.
//
// Returns:
diff --git a/icing/icing-search-engine_backwards_compatibility_test.cc b/icing/icing-search-engine_backwards_compatibility_test.cc
index 848c347..178e923 100644
--- a/icing/icing-search-engine_backwards_compatibility_test.cc
+++ b/icing/icing-search-engine_backwards_compatibility_test.cc
@@ -118,16 +118,17 @@ TEST_F(IcingSearchEngineBackwardsCompatibilityTest,
IcingSearchEngine icing(icing_options, GetTestJniCache());
InitializeResultProto init_result = icing.Initialize();
EXPECT_THAT(init_result.status(), ProtoIsOk());
+
+ // Since there will be version change, the recovery cause will be
+ // VERSION_CHANGED.
EXPECT_THAT(init_result.initialize_stats().document_store_data_status(),
Eq(InitializeStatsProto::NO_DATA_LOSS));
EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(),
- Eq(InitializeStatsProto::LEGACY_DOCUMENT_LOG_FORMAT));
+ Eq(InitializeStatsProto::VERSION_CHANGED));
EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(),
- Eq(InitializeStatsProto::NONE));
- // The main and lite indexes are in legacy formats and therefore will need to
- // be rebuilt from scratch.
+ Eq(InitializeStatsProto::VERSION_CHANGED));
EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(),
- Eq(InitializeStatsProto::IO_ERROR));
+ Eq(InitializeStatsProto::VERSION_CHANGED));
// Set up schema, this is the one used to validate documents in the testdata
// files. Do not change unless you're also updating the testdata files.
@@ -257,17 +258,17 @@ TEST_F(IcingSearchEngineBackwardsCompatibilityTest, MigrateToLargerScale) {
IcingSearchEngine icing(icing_options, GetTestJniCache());
InitializeResultProto init_result = icing.Initialize();
EXPECT_THAT(init_result.status(), ProtoIsOk());
+
+ // Since there will be version change, the recovery cause will be
+ // VERSION_CHANGED.
EXPECT_THAT(init_result.initialize_stats().document_store_data_status(),
Eq(InitializeStatsProto::NO_DATA_LOSS));
- // No recovery is required for the document store.
EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(),
- Eq(InitializeStatsProto::NONE));
+ Eq(InitializeStatsProto::VERSION_CHANGED));
EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(),
- Eq(InitializeStatsProto::NONE));
- // The main and lite indexes are in legacy formats and therefore will need to
- // be rebuilt from scratch.
+ Eq(InitializeStatsProto::VERSION_CHANGED));
EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(),
- Eq(InitializeStatsProto::IO_ERROR));
+ Eq(InitializeStatsProto::VERSION_CHANGED));
// Verify that the schema stored in the index matches the one that we expect.
// Do not change unless you're also updating the testdata files.
@@ -404,18 +405,19 @@ TEST_F(IcingSearchEngineBackwardsCompatibilityTest,
IcingSearchEngine icing(icing_options, GetTestJniCache());
InitializeResultProto init_result = icing.Initialize();
EXPECT_THAT(init_result.status(), ProtoIsOk());
+
+ // Since there will be version change, the recovery cause will be
+ // VERSION_CHANGED.
EXPECT_THAT(init_result.initialize_stats().document_store_data_status(),
Eq(InitializeStatsProto::NO_DATA_LOSS));
- // No recovery is required for the document store.
EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(),
- Eq(InitializeStatsProto::NONE));
+ Eq(InitializeStatsProto::VERSION_CHANGED));
// TODO: create enum code for legacy schema store recovery after schema store
// change is made.
EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(),
- Eq(InitializeStatsProto::NONE));
- // No recovery is required for the index.
+ Eq(InitializeStatsProto::VERSION_CHANGED));
EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(),
- Eq(InitializeStatsProto::NONE));
+ Eq(InitializeStatsProto::VERSION_CHANGED));
// Verify that the schema stored in the index matches the one that we expect.
// Do not change unless you're also updating the testdata files.
diff --git a/icing/icing-search-engine_benchmark.cc b/icing/icing-search-engine_benchmark.cc
index cf654a8..fb44595 100644
--- a/icing/icing-search-engine_benchmark.cc
+++ b/icing/icing-search-engine_benchmark.cc
@@ -1164,7 +1164,6 @@ void BM_JoinQueryQualifiedId(benchmark::State& state) {
// JoinSpec
JoinSpecProto* join_spec = search_spec.mutable_join_spec();
- join_spec->set_max_joined_child_count(std::numeric_limits<int32_t>::max());
join_spec->set_parent_property_expression(
std::string(JoinProcessor::kQualifiedIdExpr));
join_spec->set_child_property_expression("personQualifiedId");
@@ -1181,6 +1180,8 @@ void BM_JoinQueryQualifiedId(benchmark::State& state) {
static constexpr int kNumPerPage = 10;
ResultSpecProto result_spec;
result_spec.set_num_per_page(kNumPerPage);
+ result_spec.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
ScoringSpecProto score_spec = ScoringSpecProto::default_instance();
diff --git a/icing/icing-search-engine_initialization_test.cc b/icing/icing-search-engine_initialization_test.cc
index 0db4d54..13a2dc3 100644
--- a/icing/icing-search-engine_initialization_test.cc
+++ b/icing/icing-search-engine_initialization_test.cc
@@ -24,13 +24,19 @@
#include "icing/document-builder.h"
#include "icing/file/filesystem.h"
#include "icing/file/mock-filesystem.h"
+#include "icing/file/version-util.h"
#include "icing/icing-search-engine.h"
+#include "icing/index/index-processor.h"
#include "icing/index/index.h"
+#include "icing/index/integer-section-indexing-handler.h"
#include "icing/index/numeric/integer-index.h"
+#include "icing/index/string-section-indexing-handler.h"
#include "icing/jni/jni-cache.h"
#include "icing/join/doc-join-info.h"
#include "icing/join/join-processor.h"
+#include "icing/join/qualified-id-join-indexing-handler.h"
#include "icing/join/qualified-id-type-joinable-index.h"
+#include "icing/legacy/index/icing-filesystem.h"
#include "icing/legacy/index/icing-mock-filesystem.h"
#include "icing/portable/endian.h"
#include "icing/portable/equals-proto.h"
@@ -61,6 +67,12 @@
#include "icing/testing/jni-test-helpers.h"
#include "icing/testing/test-data.h"
#include "icing/testing/tmp-directory.h"
+#include "icing/tokenization/language-segmenter-factory.h"
+#include "icing/tokenization/language-segmenter.h"
+#include "icing/transform/normalizer-factory.h"
+#include "icing/transform/normalizer.h"
+#include "icing/util/tokenized-document.h"
+#include "unicode/uloc.h"
namespace icing {
namespace lib {
@@ -144,21 +156,39 @@ class IcingSearchEngineInitializationTest : public testing::Test {
icu_data_file_helper::SetUpICUDataFile(icu_data_file_path));
}
filesystem_.CreateDirectoryRecursively(GetTestBaseDir().c_str());
+
+ language_segmenter_factory::SegmenterOptions segmenter_options(ULOC_US);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ lang_segmenter_,
+ language_segmenter_factory::Create(std::move(segmenter_options)));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ normalizer_,
+ normalizer_factory::Create(
+ /*max_term_byte_size=*/std::numeric_limits<int32_t>::max()));
}
void TearDown() override {
+ normalizer_.reset();
+ lang_segmenter_.reset();
filesystem_.DeleteDirectoryRecursively(GetTestBaseDir().c_str());
}
const Filesystem* filesystem() const { return &filesystem_; }
- private:
+ const IcingFilesystem* icing_filesystem() const { return &icing_filesystem_; }
+
Filesystem filesystem_;
+ IcingFilesystem icing_filesystem_;
+ std::unique_ptr<LanguageSegmenter> lang_segmenter_;
+ std::unique_ptr<Normalizer> normalizer_;
};
// Non-zero value so we don't override it to be the current time
constexpr int64_t kDefaultCreationTimestampMs = 1575492852000;
+std::string GetVersionFilename() { return GetTestBaseDir() + "/version"; }
+
std::string GetDocumentDir() { return GetTestBaseDir() + "/document_dir"; }
std::string GetIndexDir() { return GetTestBaseDir() + "/index_dir"; }
@@ -869,7 +899,9 @@ TEST_F(IcingSearchEngineInitializationTest,
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock));
- ICING_EXPECT_OK(schema_store->SetSchema(new_schema));
+ ICING_EXPECT_OK(schema_store->SetSchema(
+ new_schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
} // Will persist new schema
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
@@ -1024,7 +1056,6 @@ TEST_F(IcingSearchEngineInitializationTest,
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock));
- ICING_EXPECT_OK(schema_store->SetSchema(CreateMessageSchema()));
// Puts message2 into DocumentStore but doesn't index it.
ICING_ASSERT_OK_AND_ASSIGN(
@@ -1137,7 +1168,6 @@ TEST_F(IcingSearchEngineInitializationTest,
search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
search_spec3.set_query("name:person");
JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
- join_spec->set_max_joined_child_count(100);
join_spec->set_parent_property_expression(
std::string(JoinProcessor::kQualifiedIdExpr));
join_spec->set_child_property_expression("senderQualifiedId");
@@ -1151,6 +1181,10 @@ TEST_F(IcingSearchEngineInitializationTest,
*nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
*nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+ ResultSpecProto result_spec3 = ResultSpecProto::default_instance();
+ result_spec3.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
+
SearchResultProto expected_join_search_result_proto;
expected_join_search_result_proto.mutable_status()->set_code(StatusProto::OK);
SearchResultProto::ResultProto* result_proto =
@@ -1159,9 +1193,8 @@ TEST_F(IcingSearchEngineInitializationTest,
*result_proto->mutable_joined_results()->Add()->mutable_document() = message2;
*result_proto->mutable_joined_results()->Add()->mutable_document() = message1;
- SearchResultProto search_result_proto3 =
- icing.Search(search_spec3, ScoringSpecProto::default_instance(),
- ResultSpecProto::default_instance());
+ SearchResultProto search_result_proto3 = icing.Search(
+ search_spec3, ScoringSpecProto::default_instance(), result_spec3);
EXPECT_THAT(search_result_proto3, EqualsSearchResultIgnoreStatsAndScores(
expected_join_search_result_proto));
}
@@ -1527,7 +1560,6 @@ TEST_F(IcingSearchEngineInitializationTest,
search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
search_spec.set_query("name:person");
JoinSpecProto* join_spec = search_spec.mutable_join_spec();
- join_spec->set_max_joined_child_count(100);
join_spec->set_parent_property_expression(
std::string(JoinProcessor::kQualifiedIdExpr));
join_spec->set_child_property_expression("senderQualifiedId");
@@ -1541,6 +1573,10 @@ TEST_F(IcingSearchEngineInitializationTest,
*nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
*nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+ ResultSpecProto result_spec = ResultSpecProto::default_instance();
+ result_spec.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
+
SearchResultProto expected_search_result_proto;
expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
SearchResultProto::ResultProto* result_proto =
@@ -1559,8 +1595,7 @@ TEST_F(IcingSearchEngineInitializationTest,
EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
expected_search_result_proto));
} // This should shut down IcingSearchEngine and persist anything it needs to
@@ -1619,24 +1654,26 @@ TEST_F(IcingSearchEngineInitializationTest,
// Check that our index is ok by searching over the restored index
SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
expected_search_result_proto));
}
TEST_F(IcingSearchEngineInitializationTest, RestoreIndexLoseTermIndex) {
- // Test the following scenario: losing the entire term index directory.
+ // Test the following scenario: losing the entire term index. Since we need
+ // flash index magic to determine the version, in this test we will throw out
+ // the entire term index and re-initialize an empty one, to bypass
+ // undetermined version state change and correctly trigger "lose term index"
+ // scenario.
// IcingSearchEngine should be able to recover term index. Several additional
// behaviors are also tested:
// - Index directory handling:
- // - Term index directory should not be discarded since we've already lost
- // it. Start it from scratch.
+ // - Term index directory should not be discarded (but instead just being
+ // rebuilt by replaying all docs).
// - Integer index directory should be unaffected.
// - Qualified id join index directory should be unaffected.
// - Truncate indices:
- // - "TruncateTo()" for term index shouldn't take effect since we start it
- // from scratch.
+ // - "TruncateTo()" for term index shouldn't take effect since it is empty.
// - "Clear()" shouldn't be called for integer index, i.e. no integer index
// storage sub directories (path_expr = "*/integer_index_dir/*") should be
// discarded.
@@ -1704,9 +1741,18 @@ TEST_F(IcingSearchEngineInitializationTest, RestoreIndexLoseTermIndex) {
EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
}
- // 2. Delete the term index directory to trigger RestoreIndexIfNeeded.
- std::string idx_dir = GetIndexDir();
- filesystem()->DeleteDirectoryRecursively(idx_dir.c_str());
+ // 2. Delete and re-initialize an empty term index to trigger
+ // RestoreIndexIfNeeded.
+ {
+ std::string idx_subdir = GetIndexDir() + "/idx";
+ ASSERT_TRUE(filesystem()->DeleteDirectoryRecursively(idx_subdir.c_str()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<Index> index,
+ Index::Create(Index::Options(GetIndexDir(),
+ /*index_merge_size=*/100),
+ filesystem(), icing_filesystem()));
+ ICING_ASSERT_OK(index->PersistToDisk());
+ }
// 3. Create the index again. This should trigger index restoration.
{
@@ -1791,7 +1837,6 @@ TEST_F(IcingSearchEngineInitializationTest, RestoreIndexLoseTermIndex) {
search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
search_spec3.set_query("name:person");
JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
- join_spec->set_max_joined_child_count(100);
join_spec->set_parent_property_expression(
std::string(JoinProcessor::kQualifiedIdExpr));
join_spec->set_child_property_expression("senderQualifiedId");
@@ -1805,9 +1850,12 @@ TEST_F(IcingSearchEngineInitializationTest, RestoreIndexLoseTermIndex) {
*nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
*nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
- SearchResultProto results3 =
- icing.Search(search_spec3, ScoringSpecProto::default_instance(),
- ResultSpecProto::default_instance());
+ ResultSpecProto result_spec3 = ResultSpecProto::default_instance();
+ result_spec3.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
+
+ SearchResultProto results3 = icing.Search(
+ search_spec3, ScoringSpecProto::default_instance(), result_spec3);
ASSERT_THAT(results3.results(), SizeIs(1));
EXPECT_THAT(results3.results(0).document().uri(), Eq("person"));
EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3));
@@ -1985,7 +2033,6 @@ TEST_F(IcingSearchEngineInitializationTest, RestoreIndexLoseIntegerIndex) {
search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
search_spec3.set_query("name:person");
JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
- join_spec->set_max_joined_child_count(100);
join_spec->set_parent_property_expression(
std::string(JoinProcessor::kQualifiedIdExpr));
join_spec->set_child_property_expression("senderQualifiedId");
@@ -1999,9 +2046,12 @@ TEST_F(IcingSearchEngineInitializationTest, RestoreIndexLoseIntegerIndex) {
*nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
*nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
- SearchResultProto results3 =
- icing.Search(search_spec3, ScoringSpecProto::default_instance(),
- ResultSpecProto::default_instance());
+ ResultSpecProto result_spec3 = ResultSpecProto::default_instance();
+ result_spec3.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
+
+ SearchResultProto results3 = icing.Search(
+ search_spec3, ScoringSpecProto::default_instance(), result_spec3);
ASSERT_THAT(results3.results(), SizeIs(1));
EXPECT_THAT(results3.results(0).document().uri(), Eq("person"));
EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3));
@@ -2181,7 +2231,6 @@ TEST_F(IcingSearchEngineInitializationTest,
search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
search_spec3.set_query("name:person");
JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
- join_spec->set_max_joined_child_count(100);
join_spec->set_parent_property_expression(
std::string(JoinProcessor::kQualifiedIdExpr));
join_spec->set_child_property_expression("senderQualifiedId");
@@ -2195,9 +2244,12 @@ TEST_F(IcingSearchEngineInitializationTest,
*nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
*nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
- SearchResultProto results3 =
- icing.Search(search_spec3, ScoringSpecProto::default_instance(),
- ResultSpecProto::default_instance());
+ ResultSpecProto result_spec3 = ResultSpecProto::default_instance();
+ result_spec3.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
+
+ SearchResultProto results3 = icing.Search(
+ search_spec3, ScoringSpecProto::default_instance(), result_spec3);
ASSERT_THAT(results3.results(), SizeIs(1));
EXPECT_THAT(results3.results(0).document().uri(), Eq("person"));
EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3));
@@ -2303,14 +2355,12 @@ TEST_F(IcingSearchEngineInitializationTest,
// - Integer index: [0, 1, 2]
// - Qualified id join index: [0, 1, 2]
{
- Filesystem filesystem;
- IcingFilesystem icing_filesystem;
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Index> index,
Index::Create(
Index::Options(GetIndexDir(),
/*index_merge_size=*/message.ByteSizeLong()),
- &filesystem, &icing_filesystem));
+ filesystem(), icing_filesystem()));
DocumentId original_last_added_doc_id = index->last_added_document_id();
index->set_last_added_document_id(original_last_added_doc_id + 1);
Index::Editor editor =
@@ -2405,7 +2455,6 @@ TEST_F(IcingSearchEngineInitializationTest,
search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
search_spec3.set_query("name:person");
JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
- join_spec->set_max_joined_child_count(100);
join_spec->set_parent_property_expression(
std::string(JoinProcessor::kQualifiedIdExpr));
join_spec->set_child_property_expression("senderQualifiedId");
@@ -2419,9 +2468,12 @@ TEST_F(IcingSearchEngineInitializationTest,
*nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
*nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
- SearchResultProto results3 =
- icing.Search(search_spec3, ScoringSpecProto::default_instance(),
- ResultSpecProto::default_instance());
+ ResultSpecProto result_spec3 = ResultSpecProto::default_instance();
+ result_spec3.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
+
+ SearchResultProto results3 = icing.Search(
+ search_spec3, ScoringSpecProto::default_instance(), result_spec3);
ASSERT_THAT(results3.results(), SizeIs(1));
EXPECT_THAT(results3.results(0).document().uri(), Eq("person"));
EXPECT_THAT(results3.results(0).joined_results(), SizeIs(2));
@@ -2435,14 +2487,12 @@ TEST_F(IcingSearchEngineInitializationTest,
// verify the correctness of term index restoration. Instead, we have to check
// hits for "foo" should not be found in term index.
{
- Filesystem filesystem;
- IcingFilesystem icing_filesystem;
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Index> index,
Index::Create(
Index::Options(GetIndexDir(),
/*index_merge_size=*/message.ByteSizeLong()),
- &filesystem, &icing_filesystem));
+ filesystem(), icing_filesystem()));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> doc_hit_info_iter,
index->GetIterator("foo", /*term_start_index=*/0,
@@ -2549,14 +2599,12 @@ TEST_F(IcingSearchEngineInitializationTest,
// - Integer index: [0, 1, 2, 3]
// - Qualified id join index: [0, 1, 2, 3]
{
- Filesystem filesystem;
- IcingFilesystem icing_filesystem;
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Index> index,
Index::Create(
Index::Options(GetIndexDir(),
/*index_merge_size=*/message.ByteSizeLong()),
- &filesystem, &icing_filesystem));
+ filesystem(), icing_filesystem()));
DocumentId original_last_added_doc_id = index->last_added_document_id();
index->set_last_added_document_id(original_last_added_doc_id + 1);
Index::Editor editor =
@@ -2654,7 +2702,6 @@ TEST_F(IcingSearchEngineInitializationTest,
search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
search_spec3.set_query("name:person");
JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
- join_spec->set_max_joined_child_count(100);
join_spec->set_parent_property_expression(
std::string(JoinProcessor::kQualifiedIdExpr));
join_spec->set_child_property_expression("senderQualifiedId");
@@ -2668,9 +2715,12 @@ TEST_F(IcingSearchEngineInitializationTest,
*nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
*nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
- SearchResultProto results3 =
- icing.Search(search_spec3, ScoringSpecProto::default_instance(),
- ResultSpecProto::default_instance());
+ ResultSpecProto result_spec3 = ResultSpecProto::default_instance();
+ result_spec3.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
+
+ SearchResultProto results3 = icing.Search(
+ search_spec3, ScoringSpecProto::default_instance(), result_spec3);
ASSERT_THAT(results3.results(), SizeIs(1));
EXPECT_THAT(results3.results(0).document().uri(), Eq("person"));
EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3));
@@ -2686,14 +2736,12 @@ TEST_F(IcingSearchEngineInitializationTest,
// verify the correctness of term index restoration. Instead, we have to check
// hits for "foo" should not be found in term index.
{
- Filesystem filesystem;
- IcingFilesystem icing_filesystem;
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Index> index,
Index::Create(
Index::Options(GetIndexDir(),
/*index_merge_size=*/message.ByteSizeLong()),
- &filesystem, &icing_filesystem));
+ filesystem(), icing_filesystem()));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> doc_hit_info_iter,
index->GetIterator("foo", /*term_start_index=*/0,
@@ -2747,15 +2795,13 @@ TEST_F(IcingSearchEngineInitializationTest,
// - Integer index: []
// - Qualified id join index: []
{
- Filesystem filesystem;
- IcingFilesystem icing_filesystem;
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Index> index,
Index::Create(
// index merge size is not important here because we will manually
// invoke merge below.
Index::Options(GetIndexDir(), /*index_merge_size=*/100),
- &filesystem, &icing_filesystem));
+ filesystem(), icing_filesystem()));
// Add hits for document 0 and merge.
ASSERT_THAT(index->last_added_document_id(), kInvalidDocumentId);
index->set_last_added_document_id(0);
@@ -2828,12 +2874,10 @@ TEST_F(IcingSearchEngineInitializationTest,
// enough to verify the correctness of term index restoration. Instead, we
// have to check hits for "foo", "bar" should not be found in term index.
{
- Filesystem filesystem;
- IcingFilesystem icing_filesystem;
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Index> index,
Index::Create(Index::Options(GetIndexDir(), /*index_merge_size=*/100),
- &filesystem, &icing_filesystem));
+ filesystem(), icing_filesystem()));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> doc_hit_info_iter,
index->GetIterator("foo", /*term_start_index=*/0,
@@ -2944,14 +2988,12 @@ TEST_F(IcingSearchEngineInitializationTest,
// - Integer index: [0, 1, 2, 3]
// - Qualified id join index: [0, 1, 2, 3]
{
- Filesystem filesystem;
- IcingFilesystem icing_filesystem;
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Index> index,
Index::Create(
Index::Options(GetIndexDir(),
/*index_merge_size=*/message.ByteSizeLong()),
- &filesystem, &icing_filesystem));
+ filesystem(), icing_filesystem()));
// Add hits for document 4 and merge.
DocumentId original_last_added_doc_id = index->last_added_document_id();
index->set_last_added_document_id(original_last_added_doc_id + 1);
@@ -3057,7 +3099,6 @@ TEST_F(IcingSearchEngineInitializationTest,
search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
search_spec3.set_query("name:person");
JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
- join_spec->set_max_joined_child_count(100);
join_spec->set_parent_property_expression(
std::string(JoinProcessor::kQualifiedIdExpr));
join_spec->set_child_property_expression("senderQualifiedId");
@@ -3071,9 +3112,12 @@ TEST_F(IcingSearchEngineInitializationTest,
*nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
*nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
- SearchResultProto results3 =
- icing.Search(search_spec3, ScoringSpecProto::default_instance(),
- ResultSpecProto::default_instance());
+ ResultSpecProto result_spec3 = ResultSpecProto::default_instance();
+ result_spec3.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
+
+ SearchResultProto results3 = icing.Search(
+ search_spec3, ScoringSpecProto::default_instance(), result_spec3);
ASSERT_THAT(results3.results(), SizeIs(1));
EXPECT_THAT(results3.results(0).document().uri(), Eq("person"));
EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3));
@@ -3089,12 +3133,10 @@ TEST_F(IcingSearchEngineInitializationTest,
// enough to verify the correctness of term index restoration. Instead, we
// have to check hits for "foo", "bar" should not be found in term index.
{
- Filesystem filesystem;
- IcingFilesystem icing_filesystem;
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Index> index,
Index::Create(Index::Options(GetIndexDir(), /*index_merge_size=*/100),
- &filesystem, &icing_filesystem));
+ filesystem(), icing_filesystem()));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> doc_hit_info_iter,
index->GetIterator("foo", /*term_start_index=*/0,
@@ -3426,7 +3468,6 @@ TEST_F(IcingSearchEngineInitializationTest,
search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
search_spec3.set_query("name:person");
JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
- join_spec->set_max_joined_child_count(100);
join_spec->set_parent_property_expression(
std::string(JoinProcessor::kQualifiedIdExpr));
join_spec->set_child_property_expression("senderQualifiedId");
@@ -3440,9 +3481,12 @@ TEST_F(IcingSearchEngineInitializationTest,
*nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
*nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
- SearchResultProto results3 =
- icing.Search(search_spec3, ScoringSpecProto::default_instance(),
- ResultSpecProto::default_instance());
+ ResultSpecProto result_spec3 = ResultSpecProto::default_instance();
+ result_spec3.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
+
+ SearchResultProto results3 = icing.Search(
+ search_spec3, ScoringSpecProto::default_instance(), result_spec3);
ASSERT_THAT(results3.results(), SizeIs(1));
EXPECT_THAT(results3.results(0).document().uri(), Eq("person"));
EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3));
@@ -3808,7 +3852,6 @@ TEST_F(IcingSearchEngineInitializationTest,
search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
search_spec3.set_query("name:person");
JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
- join_spec->set_max_joined_child_count(100);
join_spec->set_parent_property_expression(
std::string(JoinProcessor::kQualifiedIdExpr));
join_spec->set_child_property_expression("senderQualifiedId");
@@ -3822,9 +3865,12 @@ TEST_F(IcingSearchEngineInitializationTest,
*nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
*nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
- SearchResultProto results3 =
- icing.Search(search_spec3, ScoringSpecProto::default_instance(),
- ResultSpecProto::default_instance());
+ ResultSpecProto result_spec3 = ResultSpecProto::default_instance();
+ result_spec3.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
+
+ SearchResultProto results3 = icing.Search(
+ search_spec3, ScoringSpecProto::default_instance(), result_spec3);
ASSERT_THAT(results3.results(), SizeIs(1));
EXPECT_THAT(results3.results(0).document().uri(), Eq("person"));
EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3));
@@ -4249,9 +4295,16 @@ TEST_F(IcingSearchEngineInitializationTest,
}
{
- // Delete the index file to trigger RestoreIndexIfNeeded.
+ // Delete and re-initialize an empty index file to trigger
+ // RestoreIndexIfNeeded.
std::string idx_subdir = GetIndexDir() + "/idx";
- filesystem()->DeleteDirectoryRecursively(idx_subdir.c_str());
+ ASSERT_TRUE(filesystem()->DeleteDirectoryRecursively(idx_subdir.c_str()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<Index> index,
+ Index::Create(Index::Options(GetIndexDir(),
+ /*index_merge_size=*/100),
+ filesystem(), icing_filesystem()));
+ ICING_ASSERT_OK(index->PersistToDisk());
}
{
@@ -4501,7 +4554,9 @@ TEST_F(IcingSearchEngineInitializationTest,
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock));
- ICING_EXPECT_OK(schema_store->SetSchema(new_schema));
+ ICING_EXPECT_OK(schema_store->SetSchema(
+ new_schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
}
{
@@ -4890,10 +4945,11 @@ TEST_F(IcingSearchEngineInitializationTest,
}
{
- // Delete the schema store header file to trigger an I/O error.
+ // Delete the schema store type mapper to trigger an I/O error.
std::string schema_store_header_file_path =
- GetSchemaDir() + "/schema_store_header";
- filesystem()->DeleteFile(schema_store_header_file_path.c_str());
+ GetSchemaDir() + "/schema_type_mapper";
+ ASSERT_TRUE(filesystem()->DeleteDirectoryRecursively(
+ schema_store_header_file_path.c_str()));
}
{
@@ -4974,6 +5030,303 @@ TEST_F(IcingSearchEngineInitializationTest,
}
}
+class IcingSearchEngineInitializationVersionChangeTest
+ : public IcingSearchEngineInitializationTest,
+ public ::testing::WithParamInterface<version_util::VersionInfo> {};
+
+TEST_P(IcingSearchEngineInitializationVersionChangeTest,
+ RecoverFromVersionChange) {
+ // TODO(b/280697513): test backup schema migration
+ // Test the following scenario: version change. All derived data should be
+ // rebuilt. We test this by manually adding some invalid derived data and
+ // verifying they're removed due to rebuild.
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("indexableInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto person1 =
+ DocumentBuilder()
+ .SetKey("namespace", "person/1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto person2 =
+ DocumentBuilder()
+ .SetKey("namespace", "person/2")
+ .SetSchema("Person")
+ .AddStringProperty("name", "person")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto message =
+ DocumentBuilder()
+ .SetKey("namespace", "message")
+ .SetSchema("Message")
+ .AddStringProperty("body", "correct message")
+ .AddInt64Property("indexableInteger", 123)
+ .AddStringProperty("senderQualifiedId", "namespace#person/1")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ {
+ // Initializes folder and schema, index person1 and person2
+ TestIcingSearchEngine icing(
+ GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+ GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(person1).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(person2).status(), ProtoIsOk());
+ } // This should shut down IcingSearchEngine and persist anything it needs to
+
+ {
+ // Manually:
+ // - Put message into DocumentStore
+ // - But add some incorrect data for message into 3 indices
+ // - Change version file
+ //
+ // These will make sure last_added_document_id is consistent with
+ // last_stored_document_id, so if Icing didn't handle version change
+ // correctly, then the index won't be rebuilt.
+ FakeClock fake_clock;
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock));
+
+ // Put message into DocumentStore
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(filesystem(), GetDocumentDir(), &fake_clock,
+ schema_store.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId doc_id, document_store->Put(message));
+
+ // Index doc_id with incorrect data
+ Index::Options options(GetIndexDir(), /*index_merge_size=*/1024 * 1024);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<Index> index,
+ Index::Create(options, filesystem(), icing_filesystem()));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndex> integer_index,
+ IntegerIndex::Create(*filesystem(), GetIntegerIndexDir()));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdTypeJoinableIndex> qualified_id_join_index,
+ QualifiedIdTypeJoinableIndex::Create(*filesystem(),
+ GetQualifiedIdJoinIndexDir()));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<StringSectionIndexingHandler>
+ string_section_indexing_handler,
+ StringSectionIndexingHandler::Create(&fake_clock, normalizer_.get(),
+ index.get()));
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<IntegerSectionIndexingHandler>
+ integer_section_indexing_handler,
+ IntegerSectionIndexingHandler::Create(
+ &fake_clock, integer_index.get()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<QualifiedIdJoinIndexingHandler>
+ qualified_id_joinable_property_indexing_handler,
+ QualifiedIdJoinIndexingHandler::Create(&fake_clock,
+ qualified_id_join_index.get()));
+ std::vector<std::unique_ptr<DataIndexingHandler>> handlers;
+ handlers.push_back(std::move(string_section_indexing_handler));
+ handlers.push_back(std::move(integer_section_indexing_handler));
+ handlers.push_back(
+ std::move(qualified_id_joinable_property_indexing_handler));
+ IndexProcessor index_processor(std::move(handlers), &fake_clock);
+
+ DocumentProto incorrect_message =
+ DocumentBuilder()
+ .SetKey("namespace", "message")
+ .SetSchema("Message")
+ .AddStringProperty("body", "wrong message")
+ .AddInt64Property("indexableInteger", 456)
+ .AddStringProperty("senderQualifiedId", "namespace#person/2")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document,
+ TokenizedDocument::Create(schema_store.get(), lang_segmenter_.get(),
+ std::move(incorrect_message)));
+ ICING_ASSERT_OK(index_processor.IndexDocument(tokenized_document, doc_id));
+
+ // Change existing data's version file
+ const version_util::VersionInfo& existing_version_info = GetParam();
+ ICING_ASSERT_OK(version_util::WriteVersion(
+ *filesystem(), GetVersionFilename(), existing_version_info));
+ }
+
+ // Mock filesystem to observe and check the behavior of all indices.
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(), GetTestJniCache());
+ InitializeResultProto initialize_result = icing.Initialize();
+ EXPECT_THAT(initialize_result.status(), ProtoIsOk());
+ // Index Restoration should be triggered here. Incorrect data should be
+ // deleted and correct data of message should be indexed.
+ EXPECT_THAT(
+ initialize_result.initialize_stats().document_store_recovery_cause(),
+ Eq(InitializeStatsProto::VERSION_CHANGED));
+ EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::VERSION_CHANGED));
+ EXPECT_THAT(
+ initialize_result.initialize_stats().integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::VERSION_CHANGED));
+ EXPECT_THAT(initialize_result.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::VERSION_CHANGED));
+
+ // Manually check version file
+ ICING_ASSERT_OK_AND_ASSIGN(
+ version_util::VersionInfo version_info_after_init,
+ version_util::ReadVersion(*filesystem(), GetVersionFilename(),
+ GetIndexDir()));
+ EXPECT_THAT(version_info_after_init.version, Eq(version_util::kVersion));
+ EXPECT_THAT(version_info_after_init.max_version,
+ Eq(std::max(version_util::kVersion, GetParam().max_version)));
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ message;
+
+ // Verify term search
+ SearchSpecProto search_spec1;
+ search_spec1.set_query("body:correct");
+ search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
+ SearchResultProto search_result_proto1 =
+ icing.Search(search_spec1, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto1, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Verify numeric (integer) search
+ SearchSpecProto search_spec2;
+ search_spec2.set_query("indexableInteger == 123");
+ search_spec2.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
+
+ SearchResultProto search_result_google::protobuf =
+ icing.Search(search_spec2, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_google::protobuf, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+
+ // Verify join search: join a query for `name:person` with a child query for
+ // `body:message` based on the child's `senderQualifiedId` field.
+ SearchSpecProto search_spec3;
+ search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec3.set_query("name:person");
+ JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
+ join_spec->set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec->set_child_property_expression("senderQualifiedId");
+ join_spec->set_aggregation_scoring_strategy(
+ JoinSpecProto::AggregationScoringStrategy::COUNT);
+ JoinSpecProto::NestedSpecProto* nested_spec =
+ join_spec->mutable_nested_spec();
+ SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+ nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+ nested_search_spec->set_query("body:message");
+ *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+ *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+ ResultSpecProto result_spec3 = ResultSpecProto::default_instance();
+ result_spec3.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
+
+ SearchResultProto expected_join_search_result_proto;
+ expected_join_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ // Person 1 with message
+ SearchResultProto::ResultProto* result_proto =
+ expected_join_search_result_proto.mutable_results()->Add();
+ *result_proto->mutable_document() = person1;
+ *result_proto->mutable_joined_results()->Add()->mutable_document() = message;
+ // Person 2 without children
+ *expected_join_search_result_proto.mutable_results()
+ ->Add()
+ ->mutable_document() = person2;
+
+ SearchResultProto search_result_proto3 = icing.Search(
+ search_spec3, ScoringSpecProto::default_instance(), result_spec3);
+ EXPECT_THAT(search_result_proto3, EqualsSearchResultIgnoreStatsAndScores(
+ expected_join_search_result_proto));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+ IcingSearchEngineInitializationVersionChangeTest,
+ IcingSearchEngineInitializationVersionChangeTest,
+ testing::Values(
+ // Manually change existing data set's version to kVersion + 1. When
+ // initializing, it will detect "rollback".
+ version_util::VersionInfo(
+ /*version_in=*/version_util::kVersion + 1,
+ /*max_version_in=*/version_util::kVersion + 1),
+
+ // Manually change existing data set's version to kVersion - 1 and
+ // max_version to kVersion - 1. When initializing, it will detect
+ // "upgrade".
+ version_util::VersionInfo(
+ /*version_in=*/version_util::kVersion - 1,
+ /*max_version_in=*/version_util::kVersion - 1),
+
+ // Manually change existing data set's version to kVersion - 1 and
+ // max_version to kVersion. When initializing, it will detect "roll
+ // forward".
+ version_util::VersionInfo(
+ /*version_in=*/version_util::kVersion - 1,
+ /*max_version_in=*/version_util::kVersion),
+
+ // Manually change existing data set's version to 0 and max_version to
+ // 0. When initializing, it will detect "version 0 upgrade".
+ //
+ // Note: in reality, version 0 won't be written into version file, but
+ // it is ok here since it is hack to simulate version 0 situation.
+ version_util::VersionInfo(
+ /*version_in=*/0,
+ /*max_version_in=*/0),
+
+ // Manually change existing data set's version to 0 and max_version to
+ // kVersion. When initializing, it will detect "version 0 roll forward".
+ //
+ // Note: in reality, version 0 won't be written into version file, but
+ // it is ok here since it is hack to simulate version 0 situation.
+ version_util::VersionInfo(
+ /*version_in=*/0,
+ /*max_version_in=*/version_util::kVersion)));
+
} // namespace
} // namespace lib
} // namespace icing
diff --git a/icing/icing-search-engine_optimize_test.cc b/icing/icing-search-engine_optimize_test.cc
index 48fae13..3127171 100644
--- a/icing/icing-search-engine_optimize_test.cc
+++ b/icing/icing-search-engine_optimize_test.cc
@@ -897,7 +897,6 @@ TEST_F(IcingSearchEngineOptimizeTest,
search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
search_spec.set_query("name:person");
JoinSpecProto* join_spec = search_spec.mutable_join_spec();
- join_spec->set_max_joined_child_count(100);
join_spec->set_parent_property_expression(
std::string(JoinProcessor::kQualifiedIdExpr));
join_spec->set_child_property_expression("senderQualifiedId");
@@ -911,6 +910,10 @@ TEST_F(IcingSearchEngineOptimizeTest,
*nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
*nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+ ResultSpecProto result_spec = ResultSpecProto::default_instance();
+ result_spec.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
+
// Person1 is going to be deleted below. Only person2 which is joined with
// message3 should match the query.
SearchResultProto expected_search_result_proto;
@@ -935,8 +938,7 @@ TEST_F(IcingSearchEngineOptimizeTest,
// Validates that join search query works right after Optimize()
SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
expected_search_result_proto));
} // Destroys IcingSearchEngine to make sure nothing is cached.
@@ -945,8 +947,7 @@ TEST_F(IcingSearchEngineOptimizeTest,
EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
expected_search_result_proto));
}
@@ -1020,7 +1021,6 @@ TEST_F(IcingSearchEngineOptimizeTest,
search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
search_spec.set_query("name:person");
JoinSpecProto* join_spec = search_spec.mutable_join_spec();
- join_spec->set_max_joined_child_count(100);
join_spec->set_parent_property_expression(
std::string(JoinProcessor::kQualifiedIdExpr));
join_spec->set_child_property_expression("senderQualifiedId");
@@ -1034,6 +1034,10 @@ TEST_F(IcingSearchEngineOptimizeTest,
*nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
*nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+ ResultSpecProto result_spec = ResultSpecProto::default_instance();
+ result_spec.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
+
// Message1 and message3 are going to be deleted below. Both person1 and
// person2 should be included even though person2 has no child (since we're
// doing left join).
@@ -1064,8 +1068,7 @@ TEST_F(IcingSearchEngineOptimizeTest,
// Validates that join search query works right after Optimize()
SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
expected_search_result_proto));
} // Destroys IcingSearchEngine to make sure nothing is cached.
@@ -1074,8 +1077,7 @@ TEST_F(IcingSearchEngineOptimizeTest,
EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
expected_search_result_proto));
}
@@ -1207,7 +1209,6 @@ TEST_F(IcingSearchEngineOptimizeTest,
search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
search_spec3.set_query("name:person");
JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
- join_spec->set_max_joined_child_count(100);
join_spec->set_parent_property_expression(
std::string(JoinProcessor::kQualifiedIdExpr));
join_spec->set_child_property_expression("senderQualifiedId");
@@ -1221,6 +1222,10 @@ TEST_F(IcingSearchEngineOptimizeTest,
*nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
*nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+ ResultSpecProto result_spec3 = ResultSpecProto::default_instance();
+ result_spec3.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
+
SearchResultProto expected_join_search_result_proto;
expected_join_search_result_proto.mutable_status()->set_code(StatusProto::OK);
SearchResultProto::ResultProto* result_proto =
@@ -1230,8 +1235,7 @@ TEST_F(IcingSearchEngineOptimizeTest,
*result_proto->mutable_joined_results()->Add()->mutable_document() = message1;
SearchResultProto search_result_proto3 =
- icing.Search(search_spec3, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
+ icing.Search(search_spec3, GetDefaultScoringSpec(), result_spec3);
EXPECT_THAT(search_result_proto3, EqualsSearchResultIgnoreStatsAndScores(
expected_join_search_result_proto));
}
diff --git a/icing/icing-search-engine_schema_test.cc b/icing/icing-search-engine_schema_test.cc
index 7081ba2..2609cce 100644
--- a/icing/icing-search-engine_schema_test.cc
+++ b/icing/icing-search-engine_schema_test.cc
@@ -1157,6 +1157,10 @@ TEST_F(IcingSearchEngineSchemaTest,
EXPECT_THAT(icing.Put(person2).status(), ProtoIsOk());
EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+ ResultSpecProto result_spec = ResultSpecProto::default_instance();
+ result_spec.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
+
// Verify join search: join a query for `name:person` with a child query for
// `subject:message` based on the child's `receiverQualifiedId` field.
// Since "receiverQualifiedId" is not JOINABLE_VALUE_TYPE_QUALIFIED_ID,
@@ -1166,7 +1170,6 @@ TEST_F(IcingSearchEngineSchemaTest,
search_spec_join_by_receiver.set_query("name:person");
search_spec_join_by_receiver.set_term_match_type(TermMatchType::EXACT_ONLY);
JoinSpecProto* join_spec = search_spec_join_by_receiver.mutable_join_spec();
- join_spec->set_max_joined_child_count(100);
join_spec->set_parent_property_expression(
std::string(JoinProcessor::kQualifiedIdExpr));
join_spec->set_child_property_expression("receiverQualifiedId");
@@ -1189,9 +1192,8 @@ TEST_F(IcingSearchEngineSchemaTest,
*expected_empty_child_search_result_proto.mutable_results()
->Add()
->mutable_document() = person1;
- SearchResultProto actual_results =
- icing.Search(search_spec_join_by_receiver, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
+ SearchResultProto actual_results = icing.Search(
+ search_spec_join_by_receiver, GetDefaultScoringSpec(), result_spec);
EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
expected_empty_child_search_result_proto));
@@ -1214,9 +1216,8 @@ TEST_F(IcingSearchEngineSchemaTest,
*expected_join_by_sender_search_result_proto.mutable_results()
->Add()
->mutable_document() = person1;
- actual_results =
- icing.Search(search_spec_join_by_sender, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
+ actual_results = icing.Search(search_spec_join_by_sender,
+ GetDefaultScoringSpec(), result_spec);
EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
expected_join_by_sender_search_result_proto));
@@ -1259,9 +1260,8 @@ TEST_F(IcingSearchEngineSchemaTest,
*expected_join_by_receiver_search_result_proto.mutable_results()
->Add()
->mutable_document() = person2;
- actual_results =
- icing.Search(search_spec_join_by_receiver, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
+ actual_results = icing.Search(search_spec_join_by_receiver,
+ GetDefaultScoringSpec(), result_spec);
EXPECT_THAT(actual_results,
EqualsSearchResultIgnoreStatsAndScores(
expected_join_by_receiver_search_result_proto));
@@ -1269,13 +1269,98 @@ TEST_F(IcingSearchEngineSchemaTest,
// Verify join search: join a query for `name:person` with a child query for
// `subject:message` based on the child's `senderQualifiedId` field. We should
// get the same set of result since `senderQualifiedId` is unchanged.
- actual_results =
- icing.Search(search_spec_join_by_sender, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
+ actual_results = icing.Search(search_spec_join_by_sender,
+ GetDefaultScoringSpec(), result_spec);
EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
expected_join_by_sender_search_result_proto));
}
+TEST_F(IcingSearchEngineSchemaTest,
+ SetSchemaWithValidCycle_circularSchemaDefinitionNotAllowedFails) {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_allow_circular_schema_definitions(false);
+ IcingSearchEngine icing(options, GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // Create schema with circular type definitions: A <-> B
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("A").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/true)))
+ .AddType(SchemaTypeConfigBuilder().SetType("B").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("A", /*index_nested_properties=*/false)))
+ .Build();
+
+ EXPECT_THAT(
+ icing.SetSchema(schema, /*ignore_errors_and_delete_documents=*/false)
+ .status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineSchemaTest,
+ SetSchemaWithValidCycle_allowCircularSchemaDefinitionsOK) {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_allow_circular_schema_definitions(true);
+ IcingSearchEngine icing(options, GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // Create schema with valid circular type definitions: A <-> B, B->A sets
+ // index_nested_properties=false
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("A").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/true)))
+ .AddType(SchemaTypeConfigBuilder().SetType("B").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("A", /*index_nested_properties=*/false)))
+ .Build();
+
+ EXPECT_THAT(
+ icing.SetSchema(schema, /*ignore_errors_and_delete_documents=*/false)
+ .status(),
+ ProtoStatusIs(StatusProto::OK));
+}
+
+TEST_F(IcingSearchEngineSchemaTest,
+ SetSchemaWithInvalidCycle_allowCircularSchemaDefinitionsFails) {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_allow_circular_schema_definitions(true);
+ IcingSearchEngine icing(options, GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // Create schema with invalid circular type definitions: A <-> B, all edges
+ // set index_nested_properties=true
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("A").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/true)))
+ .AddType(SchemaTypeConfigBuilder().SetType("B").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("A", /*index_nested_properties=*/true)))
+ .Build();
+
+ EXPECT_THAT(
+ icing.SetSchema(schema, /*ignore_errors_and_delete_documents=*/false)
+ .status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
TEST_F(
IcingSearchEngineSchemaTest,
ForceSetSchemaIndexedPropertyDeletionTriggersIndexRestorationAndReturnsOk) {
@@ -1500,7 +1585,6 @@ TEST_F(IcingSearchEngineSchemaTest,
search_spec.set_query("name:person");
search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
JoinSpecProto* join_spec = search_spec.mutable_join_spec();
- join_spec->set_max_joined_child_count(100);
join_spec->set_parent_property_expression(
std::string(JoinProcessor::kQualifiedIdExpr));
join_spec->set_child_property_expression("senderQualifiedId");
@@ -1514,9 +1598,12 @@ TEST_F(IcingSearchEngineSchemaTest,
*nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
*nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+ ResultSpecProto result_spec = ResultSpecProto::default_instance();
+ result_spec.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
+
SearchResultProto actual_results =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
expected_search_result_proto));
@@ -1568,8 +1655,8 @@ TEST_F(IcingSearchEngineSchemaTest,
// Verify join search: join a query for `name:person` with a child query for
// `subject:tps` based on the child's `senderQualifiedId` field. We should
// still be able to join person and email documents by this property.
- actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
+ actual_results =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
expected_search_result_proto));
}
@@ -1800,7 +1887,6 @@ TEST_F(
search_spec.set_query("name:person");
search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
JoinSpecProto* join_spec = search_spec.mutable_join_spec();
- join_spec->set_max_joined_child_count(100);
join_spec->set_parent_property_expression(
std::string(JoinProcessor::kQualifiedIdExpr));
join_spec->set_child_property_expression("senderQualifiedId");
@@ -1814,9 +1900,12 @@ TEST_F(
*nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
*nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+ ResultSpecProto result_spec = ResultSpecProto::default_instance();
+ result_spec.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
+
SearchResultProto actual_results =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
expected_search_result_proto));
@@ -1870,8 +1959,8 @@ TEST_F(
// Verify join search: join a query for `name:person` with a child query for
// `subject:tps` based on the child's `senderQualifiedId` field. We should
// still be able to join person and email documents by this property.
- actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
+ actual_results =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
expected_search_result_proto));
}
diff --git a/icing/icing-search-engine_search_test.cc b/icing/icing-search-engine_search_test.cc
index 63fb657..cada6c7 100644
--- a/icing/icing-search-engine_search_test.cc
+++ b/icing/icing-search-engine_search_test.cc
@@ -1118,6 +1118,99 @@ TEST_P(IcingSearchEngineSearchTest, SearchResultShouldBeRankedByDocumentScore) {
expected_search_result_proto));
}
+TEST_P(IcingSearchEngineSearchTest, SearchWorksForNestedSubtypeDocument) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Artist")
+ .AddParentType("Person")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emailAddress")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder().SetType("Company").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("employee")
+ .SetDataTypeDocument("Person",
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .Build();
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ // Create a company with a person and an artist.
+ DocumentProto document_company =
+ DocumentBuilder()
+ .SetKey("namespace", "uri")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Company")
+ .AddDocumentProperty("employee",
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .AddStringProperty("name", "name_person")
+ .Build(),
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Artist")
+ .AddStringProperty("name", "name_artist")
+ .AddStringProperty("emailAddress", "email")
+ .Build())
+ .Build();
+ ASSERT_THAT(icing.Put(document_company).status(), ProtoIsOk());
+
+ SearchResultProto company_search_result_proto;
+ company_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *company_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document_company;
+
+ SearchResultProto empty_search_result_proto;
+ empty_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_search_type(GetParam());
+
+ // "name_person" should match the company.
+ search_spec.set_query("name_person");
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ company_search_result_proto));
+
+ // "name_artist" should match the company.
+ search_spec.set_query("name_artist");
+ search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ company_search_result_proto));
+
+ // "email" should not match the company even though the artist has a matched
+ // property. This is because the "employee" property is defined as Person
+ // type, and indexing on document properties should be based on defined types,
+ // instead of subtypes.
+ search_spec.set_query("email");
+ search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ empty_search_result_proto));
+}
+
TEST_P(IcingSearchEngineSearchTest, SearchShouldAllowNoScoring) {
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
@@ -3472,7 +3565,6 @@ TEST_P(IcingSearchEngineSearchTest, JoinQueryStatsProtoTest) {
// JoinSpec
JoinSpecProto* join_spec = search_spec.mutable_join_spec();
- join_spec->set_max_joined_child_count(100);
join_spec->set_parent_property_expression(
std::string(JoinProcessor::kQualifiedIdExpr));
join_spec->set_child_property_expression("personQualifiedId");
@@ -3496,6 +3588,8 @@ TEST_P(IcingSearchEngineSearchTest, JoinQueryStatsProtoTest) {
// Parent ResultSpec
ResultSpecProto result_spec;
result_spec.set_num_per_page(1);
+ result_spec.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
// Since we:
// - Use MAX for aggregation scoring strategy.
@@ -4050,7 +4144,6 @@ TEST_P(IcingSearchEngineSearchTest, JoinByQualifiedId) {
// JoinSpec
JoinSpecProto* join_spec = search_spec.mutable_join_spec();
- join_spec->set_max_joined_child_count(100);
join_spec->set_parent_property_expression(
std::string(JoinProcessor::kQualifiedIdExpr));
join_spec->set_child_property_expression("personQualifiedId");
@@ -4071,6 +4164,8 @@ TEST_P(IcingSearchEngineSearchTest, JoinByQualifiedId) {
// Parent ResultSpec
ResultSpecProto result_spec;
result_spec.set_num_per_page(1);
+ result_spec.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
// Since we:
// - Use MAX for aggregation scoring strategy.
@@ -4123,6 +4218,346 @@ TEST_P(IcingSearchEngineSearchTest, JoinByQualifiedId) {
EqualsSearchResultIgnoreStatsAndScores(expected_result3));
}
+TEST_P(IcingSearchEngineSearchTest,
+ JoinShouldLimitNumChildDocumentsByMaxJoinedChildPerParent) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Person")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("firstName")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("lastName")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emailAddress")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("personQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ DocumentProto person1 =
+ DocumentBuilder()
+ .SetKey("pkg$db/namespace", "person1")
+ .SetSchema("Person")
+ .AddStringProperty("firstName", "first1")
+ .AddStringProperty("lastName", "last1")
+ .AddStringProperty("emailAddress", "email1@gmail.com")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(1)
+ .Build();
+ DocumentProto person2 =
+ DocumentBuilder()
+ .SetKey("pkg$db/namespace", "person2")
+ .SetSchema("Person")
+ .AddStringProperty("firstName", "first2")
+ .AddStringProperty("lastName", "last2")
+ .AddStringProperty("emailAddress", "email2@gmail.com")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(2)
+ .Build();
+
+ DocumentProto email1 =
+ DocumentBuilder()
+ .SetKey("namespace", "email1")
+ .SetSchema("Email")
+ .AddStringProperty("subject", "test subject 1")
+ .AddStringProperty("personQualifiedId", "pkg$db/namespace#person1")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(100)
+ .Build();
+ DocumentProto email2 =
+ DocumentBuilder()
+ .SetKey("namespace", "email2")
+ .SetSchema("Email")
+ .AddStringProperty("subject", "test subject 2")
+ .AddStringProperty("personQualifiedId", "pkg$db/namespace#person2")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(99)
+ .Build();
+ DocumentProto email3 =
+ DocumentBuilder()
+ .SetKey("namespace", "email3")
+ .SetSchema("Email")
+ .AddStringProperty("subject", "test subject 3")
+ .AddStringProperty("personQualifiedId", "pkg$db/namespace#person2")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(98)
+ .Build();
+ DocumentProto email4 =
+ DocumentBuilder()
+ .SetKey("namespace", "email4")
+ .SetSchema("Email")
+ .AddStringProperty("subject", "test subject 4")
+ .AddStringProperty("personQualifiedId", "pkg$db/namespace#person2")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(97)
+ .Build();
+
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(person1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(person2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(email3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(email4).status(), ProtoIsOk());
+
+ // Parent SearchSpec
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("firstName:first");
+ search_spec.set_search_type(GetParam());
+
+ // JoinSpec
+ JoinSpecProto* join_spec = search_spec.mutable_join_spec();
+ join_spec->set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec->set_child_property_expression("personQualifiedId");
+ join_spec->set_aggregation_scoring_strategy(
+ JoinSpecProto::AggregationScoringStrategy::COUNT);
+ JoinSpecProto::NestedSpecProto* nested_spec =
+ join_spec->mutable_nested_spec();
+ SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+ nested_search_spec->set_term_match_type(TermMatchType::PREFIX);
+ nested_search_spec->set_query("subject:test");
+ nested_search_spec->set_search_type(GetParam());
+ *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+ *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+ // Parent ScoringSpec
+ ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+
+ // Parent ResultSpec with max_joined_children_per_parent_to_return = 2
+ ResultSpecProto result_spec;
+ result_spec.set_num_per_page(1);
+ result_spec.set_max_joined_children_per_parent_to_return(2);
+
+ // - Use COUNT for aggregation scoring strategy.
+ // - max_joined_children_per_parent_to_return = 2.
+ // - (Default) use DESC as the ranking order.
+ //
+ // person2 should have the highest aggregated score (3) since email2, email3,
+ // email4 are joined to it and the COUNT aggregated score is 3. However, only
+ // email2 and email3 should be attached to person2 due to
+ // max_joined_children_per_parent_to_return limitation in result_spec.
+ // person1 should be the second (aggregated score = 1).
+ SearchResultProto::ResultProto expected_result_proto1;
+ *expected_result_proto1.mutable_document() = person2;
+ expected_result_proto1.set_score(3);
+ SearchResultProto::ResultProto* child_result_proto1 =
+ expected_result_proto1.mutable_joined_results()->Add();
+ *child_result_proto1->mutable_document() = email2;
+ child_result_proto1->set_score(99);
+ SearchResultProto::ResultProto* child_result_google::protobuf =
+ expected_result_proto1.mutable_joined_results()->Add();
+ *child_result_google::protobuf->mutable_document() = email3;
+ child_result_google::protobuf->set_score(98);
+
+ SearchResultProto::ResultProto expected_result_google::protobuf;
+ *expected_result_google::protobuf.mutable_document() = person1;
+ expected_result_google::protobuf.set_score(1);
+ SearchResultProto::ResultProto* child_result_proto3 =
+ expected_result_google::protobuf.mutable_joined_results()->Add();
+ *child_result_proto3->mutable_document() = email1;
+ child_result_proto3->set_score(100);
+
+ SearchResultProto result1 =
+ icing.Search(search_spec, scoring_spec, result_spec);
+ uint64_t next_page_token = result1.next_page_token();
+ EXPECT_THAT(next_page_token, Ne(kInvalidNextPageToken));
+ EXPECT_THAT(result1.results(),
+ ElementsAre(EqualsProto(expected_result_proto1)));
+
+ SearchResultProto result2 = icing.GetNextPage(next_page_token);
+ next_page_token = result2.next_page_token();
+ EXPECT_THAT(next_page_token, Eq(kInvalidNextPageToken));
+ EXPECT_THAT(result2.results(),
+ ElementsAre(EqualsProto(expected_result_google::protobuf)));
+}
+
+TEST_P(IcingSearchEngineSearchTest, JoinWithZeroMaxJoinedChildPerParent) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Person")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("firstName")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("lastName")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emailAddress")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("personQualifiedId")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ DocumentProto person1 =
+ DocumentBuilder()
+ .SetKey("pkg$db/namespace", "person1")
+ .SetSchema("Person")
+ .AddStringProperty("firstName", "first1")
+ .AddStringProperty("lastName", "last1")
+ .AddStringProperty("emailAddress", "email1@gmail.com")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(1)
+ .Build();
+ DocumentProto person2 =
+ DocumentBuilder()
+ .SetKey("pkg$db/namespace", "person2")
+ .SetSchema("Person")
+ .AddStringProperty("firstName", "first2")
+ .AddStringProperty("lastName", "last2")
+ .AddStringProperty("emailAddress", "email2@gmail.com")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(2)
+ .Build();
+
+ DocumentProto email1 =
+ DocumentBuilder()
+ .SetKey("namespace", "email1")
+ .SetSchema("Email")
+ .AddStringProperty("subject", "test subject 1")
+ .AddStringProperty("personQualifiedId", "pkg$db/namespace#person1")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(100)
+ .Build();
+ DocumentProto email2 =
+ DocumentBuilder()
+ .SetKey("namespace", "email2")
+ .SetSchema("Email")
+ .AddStringProperty("subject", "test subject 2")
+ .AddStringProperty("personQualifiedId", "pkg$db/namespace#person2")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(99)
+ .Build();
+ DocumentProto email3 =
+ DocumentBuilder()
+ .SetKey("namespace", "email3")
+ .SetSchema("Email")
+ .AddStringProperty("subject", "test subject 3")
+ .AddStringProperty("personQualifiedId", "pkg$db/namespace#person2")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(98)
+ .Build();
+ DocumentProto email4 =
+ DocumentBuilder()
+ .SetKey("namespace", "email4")
+ .SetSchema("Email")
+ .AddStringProperty("subject", "test subject 4")
+ .AddStringProperty("personQualifiedId", "pkg$db/namespace#person2")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .SetScore(97)
+ .Build();
+
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(person1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(person2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(email3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(email4).status(), ProtoIsOk());
+
+ // Parent SearchSpec
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("firstName:first");
+ search_spec.set_search_type(GetParam());
+
+ // JoinSpec
+ JoinSpecProto* join_spec = search_spec.mutable_join_spec();
+ join_spec->set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec->set_child_property_expression("personQualifiedId");
+ join_spec->set_aggregation_scoring_strategy(
+ JoinSpecProto::AggregationScoringStrategy::COUNT);
+ JoinSpecProto::NestedSpecProto* nested_spec =
+ join_spec->mutable_nested_spec();
+ SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+ nested_search_spec->set_term_match_type(TermMatchType::PREFIX);
+ nested_search_spec->set_query("subject:test");
+ nested_search_spec->set_search_type(GetParam());
+ *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+ *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
+
+ // Parent ScoringSpec
+ ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+
+ // Parent ResultSpec with max_joined_children_per_parent_to_return = 0
+ ResultSpecProto result_spec;
+ result_spec.set_num_per_page(1);
+ result_spec.set_max_joined_children_per_parent_to_return(0);
+
+ // - Use COUNT for aggregation scoring strategy.
+ // - max_joined_children_per_parent_to_return = 0.
+ // - (Default) use DESC as the ranking order.
+ //
+ // person2 should have the highest aggregated score (3) since email2, email3,
+ // email4 are joined to it and the COUNT aggregated score is 3. However, no
+ // child documents should be attached to person2 due to
+ // max_joined_children_per_parent_to_return limitation in result_spec.
+ // person1 should be the second (aggregated score = 1) with no attached child
+ // documents.
+ SearchResultProto::ResultProto expected_result_proto1;
+ *expected_result_proto1.mutable_document() = person2;
+ expected_result_proto1.set_score(3);
+
+ SearchResultProto::ResultProto expected_result_google::protobuf;
+ *expected_result_google::protobuf.mutable_document() = person1;
+ expected_result_google::protobuf.set_score(1);
+
+ SearchResultProto result1 =
+ icing.Search(search_spec, scoring_spec, result_spec);
+ uint64_t next_page_token = result1.next_page_token();
+ EXPECT_THAT(next_page_token, Ne(kInvalidNextPageToken));
+ EXPECT_THAT(result1.results(),
+ ElementsAre(EqualsProto(expected_result_proto1)));
+
+ SearchResultProto result2 = icing.GetNextPage(next_page_token);
+ next_page_token = result2.next_page_token();
+ EXPECT_THAT(next_page_token, Eq(kInvalidNextPageToken));
+ EXPECT_THAT(result2.results(),
+ ElementsAre(EqualsProto(expected_result_google::protobuf)));
+}
+
TEST_P(IcingSearchEngineSearchTest, JoinSnippet) {
SchemaProto schema =
SchemaBuilder()
@@ -4192,7 +4627,6 @@ TEST_P(IcingSearchEngineSearchTest, JoinSnippet) {
// JoinSpec
JoinSpecProto* join_spec = search_spec.mutable_join_spec();
- join_spec->set_max_joined_child_count(100);
join_spec->set_parent_property_expression(
std::string(JoinProcessor::kQualifiedIdExpr));
join_spec->set_child_property_expression("personQualifiedId");
@@ -4217,6 +4651,8 @@ TEST_P(IcingSearchEngineSearchTest, JoinSnippet) {
// Parent ResultSpec (without snippet)
ResultSpecProto result_spec;
result_spec.set_num_per_page(1);
+ result_spec.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
SearchResultProto result =
icing.Search(search_spec, scoring_spec, result_spec);
@@ -4315,7 +4751,6 @@ TEST_P(IcingSearchEngineSearchTest, JoinProjection) {
// JoinSpec
JoinSpecProto* join_spec = search_spec.mutable_join_spec();
- join_spec->set_max_joined_child_count(100);
join_spec->set_parent_property_expression(
std::string(JoinProcessor::kQualifiedIdExpr));
join_spec->set_child_property_expression("personQualifiedId");
@@ -4341,6 +4776,8 @@ TEST_P(IcingSearchEngineSearchTest, JoinProjection) {
// Parent ResultSpec (with projection)
ResultSpecProto result_spec;
result_spec.set_num_per_page(1);
+ result_spec.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
type_property_mask = result_spec.add_type_property_masks();
type_property_mask->set_schema_type("Person");
type_property_mask->add_paths("emailAddress");
@@ -4518,7 +4955,6 @@ TEST_F(IcingSearchEngineSearchTest, JoinWithAdvancedScoring) {
// JoinSpec
JoinSpecProto* join_spec = search_spec.mutable_join_spec();
- join_spec->set_max_joined_child_count(100);
join_spec->set_parent_property_expression(
std::string(JoinProcessor::kQualifiedIdExpr));
join_spec->set_child_property_expression("personQualifiedId");
@@ -4533,6 +4969,8 @@ TEST_F(IcingSearchEngineSearchTest, JoinWithAdvancedScoring) {
// Parent ResultSpec
ResultSpecProto result_spec;
result_spec.set_num_per_page(1);
+ result_spec.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
SearchResultProto results =
icing.Search(search_spec, parent_scoring_spec, result_spec);
diff --git a/icing/icing-search-engine_test.cc b/icing/icing-search-engine_test.cc
index 1340ebb..ddb83a8 100644
--- a/icing/icing-search-engine_test.cc
+++ b/icing/icing-search-engine_test.cc
@@ -428,6 +428,408 @@ TEST_F(IcingSearchEngineTest,
EqualsProto(expected_get_result_proto));
}
+TEST_F(IcingSearchEngineTest, GetDocumentProjectionPolymorphism) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Person")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emailAddress")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Artist")
+ .AddParentType("Person")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emailAddress")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("company")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ // Add a person document and an artist document
+ DocumentProto document_person =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .AddStringProperty("name", "Meg Ryan")
+ .AddStringProperty("emailAddress", "shopgirl@aol.com")
+ .Build();
+ DocumentProto document_artist =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Artist")
+ .AddStringProperty("name", "Meg Artist")
+ .AddStringProperty("emailAddress", "artist@aol.com")
+ .AddStringProperty("company", "aol")
+ .Build();
+ ASSERT_THAT(icing.Put(document_person).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document_artist).status(), ProtoIsOk());
+
+ // Add type property masks
+ GetResultSpecProto result_spec;
+ TypePropertyMask* person_type_property_mask =
+ result_spec.add_type_property_masks();
+ person_type_property_mask->set_schema_type("Person");
+ person_type_property_mask->add_paths("name");
+ // Since Artist is a child type of Person, the TypePropertyMask for Person
+ // will be merged to Artist's TypePropertyMask by polymorphism, so that 'name'
+ // will also show in Artist's projection results.
+ TypePropertyMask* artist_type_property_mask =
+ result_spec.add_type_property_masks();
+ artist_type_property_mask->set_schema_type("Artist");
+ artist_type_property_mask->add_paths("emailAddress");
+
+ // Verify that the returned person document only contains the 'name' property,
+ // and the returned artist document contain both the 'name' and 'emailAddress'
+ // properties.
+ GetResultProto expected_get_result_proto;
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_get_result_proto.mutable_document() =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .AddStringProperty("name", "Meg Ryan")
+ .Build();
+ ASSERT_THAT(icing.Get("namespace", "uri1", result_spec),
+ EqualsProto(expected_get_result_proto));
+
+ *expected_get_result_proto.mutable_document() =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Artist")
+ .AddStringProperty("name", "Meg Artist")
+ .AddStringProperty("emailAddress", "artist@aol.com")
+ .Build();
+ ASSERT_THAT(icing.Get("namespace", "uri2", result_spec),
+ EqualsProto(expected_get_result_proto));
+}
+
+TEST_F(IcingSearchEngineTest, GetDocumentProjectionMultipleParentPolymorphism) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("sender")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("recipient")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("content")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("note")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("EmailMessage")
+ .AddParentType("Email")
+ .AddParentType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("sender")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("recipient")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("content")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("note")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ // Add an email document and a message document
+ DocumentProto document_email =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("sender", "sender1")
+ .AddStringProperty("recipient", "recipient1")
+ .Build();
+ DocumentProto document_message = DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Message")
+ .AddStringProperty("content", "content1")
+ .AddStringProperty("note", "note1")
+ .Build();
+ // Add an emailMessage document
+ DocumentProto document_email_message =
+ DocumentBuilder()
+ .SetKey("namespace", "uri3")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("EmailMessage")
+ .AddStringProperty("sender", "sender2")
+ .AddStringProperty("recipient", "recipient2")
+ .AddStringProperty("content", "content2")
+ .AddStringProperty("note", "note2")
+ .Build();
+
+ ASSERT_THAT(icing.Put(document_email).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document_message).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document_email_message).status(), ProtoIsOk());
+
+ // Add type property masks for Email and Message, and both of them will apply
+ // to EmailMessage.
+ GetResultSpecProto result_spec;
+ TypePropertyMask* email_type_property_mask =
+ result_spec.add_type_property_masks();
+ email_type_property_mask->set_schema_type("Email");
+ email_type_property_mask->add_paths("sender");
+
+ TypePropertyMask* message_type_property_mask =
+ result_spec.add_type_property_masks();
+ message_type_property_mask->set_schema_type("Message");
+ message_type_property_mask->add_paths("content");
+
+ // Verify that
+ // - The returned email document only contains the 'sender' property.
+ // - The returned message document only contains the 'content' property.
+ // - The returned email message document contains both the 'sender' and
+ // 'content' properties,
+ GetResultProto expected_get_result_proto;
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_get_result_proto.mutable_document() =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("sender", "sender1")
+ .Build();
+ ASSERT_THAT(icing.Get("namespace", "uri1", result_spec),
+ EqualsProto(expected_get_result_proto));
+
+ *expected_get_result_proto.mutable_document() =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Message")
+ .AddStringProperty("content", "content1")
+ .Build();
+ ASSERT_THAT(icing.Get("namespace", "uri2", result_spec),
+ EqualsProto(expected_get_result_proto));
+
+ *expected_get_result_proto.mutable_document() =
+ DocumentBuilder()
+ .SetKey("namespace", "uri3")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("EmailMessage")
+ .AddStringProperty("sender", "sender2")
+ .AddStringProperty("content", "content2")
+ .Build();
+ ASSERT_THAT(icing.Get("namespace", "uri3", result_spec),
+ EqualsProto(expected_get_result_proto));
+}
+
+TEST_F(IcingSearchEngineTest, GetDocumentProjectionDiamondPolymorphism) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // Create a schema with a diamond inheritance relation.
+ // Object
+ // / \
+ // Email Message
+ // \ /
+ // EmailMessage
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Object").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("objectId")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddParentType("Object")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("objectId")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("sender")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("recipient")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddParentType("Object")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("objectId")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("content")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("note")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("EmailMessage")
+ .AddParentType("Email")
+ .AddParentType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("objectId")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("sender")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("recipient")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("content")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("note")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ // Add an email document and a message document
+ DocumentProto document_email =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("objectId", "object1")
+ .AddStringProperty("sender", "sender1")
+ .AddStringProperty("recipient", "recipient1")
+ .Build();
+ DocumentProto document_message = DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Message")
+ .AddStringProperty("objectId", "object2")
+ .AddStringProperty("content", "content1")
+ .AddStringProperty("note", "note1")
+ .Build();
+ // Add an emailMessage document
+ DocumentProto document_email_message =
+ DocumentBuilder()
+ .SetKey("namespace", "uri3")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("EmailMessage")
+ .AddStringProperty("objectId", "object3")
+ .AddStringProperty("sender", "sender2")
+ .AddStringProperty("recipient", "recipient2")
+ .AddStringProperty("content", "content2")
+ .AddStringProperty("note", "note2")
+ .Build();
+
+ ASSERT_THAT(icing.Put(document_email).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document_message).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document_email_message).status(), ProtoIsOk());
+
+ // Add type property masks for Object, which should apply to Email, Message
+ // and EmailMessage.
+ GetResultSpecProto result_spec;
+ TypePropertyMask* email_type_property_mask =
+ result_spec.add_type_property_masks();
+ email_type_property_mask->set_schema_type("Object");
+ email_type_property_mask->add_paths("objectId");
+
+ // Verify that all the documents only contain the 'objectId' property.
+ GetResultProto expected_get_result_proto;
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_get_result_proto.mutable_document() =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("objectId", "object1")
+ .Build();
+ ASSERT_THAT(icing.Get("namespace", "uri1", result_spec),
+ EqualsProto(expected_get_result_proto));
+
+ *expected_get_result_proto.mutable_document() =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Message")
+ .AddStringProperty("objectId", "object2")
+ .Build();
+ ASSERT_THAT(icing.Get("namespace", "uri2", result_spec),
+ EqualsProto(expected_get_result_proto));
+
+ *expected_get_result_proto.mutable_document() =
+ DocumentBuilder()
+ .SetKey("namespace", "uri3")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("EmailMessage")
+ .AddStringProperty("objectId", "object3")
+ .Build();
+ ASSERT_THAT(icing.Get("namespace", "uri3", result_spec),
+ EqualsProto(expected_get_result_proto));
+}
+
TEST_F(IcingSearchEngineTest, OlderUsageTimestampShouldNotOverrideNewerOnes) {
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
diff --git a/icing/index/index-processor_benchmark.cc b/icing/index/index-processor_benchmark.cc
index ee43364..1cbe00d 100644
--- a/icing/index/index-processor_benchmark.cc
+++ b/icing/index/index-processor_benchmark.cc
@@ -172,7 +172,9 @@ std::unique_ptr<SchemaStore> CreateSchemaStore(const Filesystem& filesystem,
SchemaProto schema;
CreateFakeTypeConfig(schema.add_types());
- auto set_schema_status = schema_store->SetSchema(schema);
+ auto set_schema_status = schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false);
if (!set_schema_status.ok()) {
ICING_LOG(ERROR) << set_schema_status.status().error_message();
diff --git a/icing/index/index-processor_test.cc b/icing/index/index-processor_test.cc
index 9453e58..ed9e856 100644
--- a/icing/index/index-processor_test.cc
+++ b/icing/index/index-processor_test.cc
@@ -40,7 +40,7 @@
#include "icing/index/numeric/numeric-index.h"
#include "icing/index/string-section-indexing-handler.h"
#include "icing/index/term-property-id.h"
-#include "icing/join/qualified-id-joinable-property-indexing-handler.h"
+#include "icing/join/qualified-id-join-indexing-handler.h"
#include "icing/join/qualified-id-type-joinable-index.h"
#include "icing/legacy/index/icing-filesystem.h"
#include "icing/legacy/index/icing-mock-filesystem.h"
@@ -267,7 +267,9 @@ class IndexProcessorTest : public Test {
TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
- ICING_ASSERT_OK(schema_store_->SetSchema(schema));
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(doc_store_dir_.c_str()));
ICING_ASSERT_OK_AND_ASSIGN(
@@ -291,10 +293,10 @@ class IndexProcessorTest : public Test {
IntegerSectionIndexingHandler::Create(
&fake_clock_, integer_index_.get()));
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<QualifiedIdJoinablePropertyIndexingHandler>
+ std::unique_ptr<QualifiedIdJoinIndexingHandler>
qualified_id_joinable_property_indexing_handler,
- QualifiedIdJoinablePropertyIndexingHandler::Create(
- &fake_clock_, qualified_id_join_index_.get()));
+ QualifiedIdJoinIndexingHandler::Create(&fake_clock_,
+ qualified_id_join_index_.get()));
std::vector<std::unique_ptr<DataIndexingHandler>> handlers;
handlers.push_back(std::move(string_section_indexing_handler));
handlers.push_back(std::move(integer_section_indexing_handler));
@@ -823,10 +825,10 @@ TEST_F(IndexProcessorTest, OutOfOrderDocumentIdsInRecoveryMode) {
IntegerSectionIndexingHandler::Create(
&fake_clock_, integer_index_.get()));
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<QualifiedIdJoinablePropertyIndexingHandler>
+ std::unique_ptr<QualifiedIdJoinIndexingHandler>
qualified_id_joinable_property_indexing_handler,
- QualifiedIdJoinablePropertyIndexingHandler::Create(
- &fake_clock_, qualified_id_join_index_.get()));
+ QualifiedIdJoinIndexingHandler::Create(&fake_clock_,
+ qualified_id_join_index_.get()));
std::vector<std::unique_ptr<DataIndexingHandler>> handlers;
handlers.push_back(std::move(string_section_indexing_handler));
handlers.push_back(std::move(integer_section_indexing_handler));
diff --git a/icing/index/index.cc b/icing/index/index.cc
index 5cfcd27..19edbb6 100644
--- a/icing/index/index.cc
+++ b/icing/index/index.cc
@@ -163,6 +163,12 @@ libtextclassifier3::StatusOr<std::unique_ptr<Index>> Index::Create(
std::move(main_index), filesystem));
}
+/* static */ libtextclassifier3::StatusOr<int> Index::ReadFlashIndexMagic(
+ const Filesystem* filesystem, const std::string& base_dir) {
+ return MainIndex::ReadFlashIndexMagic(filesystem,
+ MakeMainIndexFilepath(base_dir));
+}
+
libtextclassifier3::Status Index::TruncateTo(DocumentId document_id) {
if (lite_index_->last_added_document_id() != kInvalidDocumentId &&
lite_index_->last_added_document_id() > document_id) {
diff --git a/icing/index/index.h b/icing/index/index.h
index 3200d70..c170278 100644
--- a/icing/index/index.h
+++ b/icing/index/index.h
@@ -86,6 +86,16 @@ class Index {
const Options& options, const Filesystem* filesystem,
const IcingFilesystem* icing_filesystem);
+ // Reads magic from existing flash (main) index file header. We need this
+ // during Icing initialization phase to determine the version.
+ //
+ // Returns
+ // Valid magic on success
+ // NOT_FOUND if the lite index doesn't exist
+ // INTERNAL on I/O error
+ static libtextclassifier3::StatusOr<int> ReadFlashIndexMagic(
+ const Filesystem* filesystem, const std::string& base_dir);
+
// Clears all files created by the index. Returns OK if all files were
// cleared.
libtextclassifier3::Status Reset() {
diff --git a/icing/index/integer-section-indexing-handler.cc b/icing/index/integer-section-indexing-handler.cc
index 584f028..63b09df 100644
--- a/icing/index/integer-section-indexing-handler.cc
+++ b/icing/index/integer-section-indexing-handler.cc
@@ -16,12 +16,19 @@
#include <cstdint>
#include <memory>
+#include <utility>
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/index/numeric/numeric-index.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/proto/logging.pb.h"
#include "icing/schema/section.h"
#include "icing/store/document-id.h"
+#include "icing/util/clock.h"
#include "icing/util/logging.h"
+#include "icing/util/status-macros.h"
#include "icing/util/tokenized-document.h"
namespace icing {
@@ -41,7 +48,7 @@ IntegerSectionIndexingHandler::Create(const Clock* clock,
libtextclassifier3::Status IntegerSectionIndexingHandler::Handle(
const TokenizedDocument& tokenized_document, DocumentId document_id,
bool recovery_mode, PutDocumentStatsProto* put_document_stats) {
- // TODO(b/259744228): set integer indexing latency and other stats
+ std::unique_ptr<Timer> index_timer = clock_.GetNewTimer();
if (!IsDocumentIdValid(document_id)) {
return absl_ports::InvalidArgumentError(
@@ -93,6 +100,11 @@ libtextclassifier3::Status IntegerSectionIndexingHandler::Handle(
}
}
+ if (put_document_stats != nullptr) {
+ put_document_stats->set_integer_index_latency_ms(
+ index_timer->GetElapsedMilliseconds());
+ }
+
return status;
}
diff --git a/icing/index/integer-section-indexing-handler_test.cc b/icing/index/integer-section-indexing-handler_test.cc
index 895fe57..706856c 100644
--- a/icing/index/integer-section-indexing-handler_test.cc
+++ b/icing/index/integer-section-indexing-handler_test.cc
@@ -156,7 +156,9 @@ class IntegerSectionIndexingHandlerTest : public ::testing::Test {
TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
- ICING_ASSERT_OK(schema_store_->SetSchema(schema));
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ASSERT_TRUE(
filesystem_.CreateDirectoryRecursively(document_store_dir_.c_str()));
diff --git a/icing/index/iterator/doc-hit-info-iterator-filter.cc b/icing/index/iterator/doc-hit-info-iterator-filter.cc
index 83a73a4..2c0c2c2 100644
--- a/icing/index/iterator/doc-hit-info-iterator-filter.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-filter.cc
@@ -55,11 +55,16 @@ DocHitInfoIteratorFilter::DocHitInfoIteratorFilter(
// Precompute all the SchemaTypeIds
for (std::string_view schema_type : options_.schema_types) {
- auto schema_type_id_or = schema_store_.GetSchemaTypeId(schema_type);
+ libtextclassifier3::StatusOr<const std::unordered_set<SchemaTypeId>*>
+ schema_type_ids_or =
+ schema_store_.GetSchemaTypeIdsWithChildren(schema_type);
// If we can't find the SchemaTypeId, just throw it away
- if (schema_type_id_or.ok()) {
- target_schema_type_ids_.emplace(schema_type_id_or.ValueOrDie());
+ if (schema_type_ids_or.ok()) {
+ const std::unordered_set<SchemaTypeId>* schema_type_ids =
+ schema_type_ids_or.ValueOrDie();
+ target_schema_type_ids_.insert(schema_type_ids->begin(),
+ schema_type_ids->end());
}
}
}
diff --git a/icing/index/iterator/doc-hit-info-iterator-filter_test.cc b/icing/index/iterator/doc-hit-info-iterator-filter_test.cc
index 0900e1f..4b86cae 100644
--- a/icing/index/iterator/doc-hit-info-iterator-filter_test.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-filter_test.cc
@@ -17,6 +17,7 @@
#include <limits>
#include <memory>
#include <string>
+#include <string_view>
#include <utility>
#include <vector>
@@ -80,7 +81,9 @@ class DocHitInfoIteratorDeletedFilterTest : public ::testing::Test {
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
- ICING_ASSERT_OK(schema_store_->SetSchema(schema));
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
@@ -247,7 +250,9 @@ class DocHitInfoIteratorNamespaceFilterTest : public ::testing::Test {
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
- ICING_ASSERT_OK(schema_store_->SetSchema(schema));
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
@@ -379,30 +384,52 @@ TEST_F(DocHitInfoIteratorNamespaceFilterTest, FilterForMultipleNamespacesOk) {
class DocHitInfoIteratorSchemaTypeFilterTest : public ::testing::Test {
protected:
+ static constexpr std::string_view kSchema1 = "email";
+ static constexpr std::string_view kSchema2 = "message";
+ static constexpr std::string_view kSchema3 = "person";
+ static constexpr std::string_view kSchema4 = "artist";
+ static constexpr std::string_view kSchema5 = "emailMessage";
+
DocHitInfoIteratorSchemaTypeFilterTest()
: test_dir_(GetTestTempDir() + "/icing") {}
void SetUp() override {
filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
- document1_schema1_ =
- DocumentBuilder().SetKey("namespace", "1").SetSchema(schema1_).Build();
- document2_schema2_ =
- DocumentBuilder().SetKey("namespace", "2").SetSchema(schema2_).Build();
- document3_schema3_ =
- DocumentBuilder().SetKey("namespace", "3").SetSchema(schema3_).Build();
- document4_schema1_ =
- DocumentBuilder().SetKey("namespace", "4").SetSchema(schema1_).Build();
+ document1_schema1_ = DocumentBuilder()
+ .SetKey("namespace", "1")
+ .SetSchema(std::string(kSchema1))
+ .Build();
+ document2_schema2_ = DocumentBuilder()
+ .SetKey("namespace", "2")
+ .SetSchema(std::string(kSchema2))
+ .Build();
+ document3_schema3_ = DocumentBuilder()
+ .SetKey("namespace", "3")
+ .SetSchema(std::string(kSchema3))
+ .Build();
+ document4_schema1_ = DocumentBuilder()
+ .SetKey("namespace", "4")
+ .SetSchema(std::string(kSchema1))
+ .Build();
SchemaProto schema =
SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder().SetType(schema1_))
- .AddType(SchemaTypeConfigBuilder().SetType(schema2_))
- .AddType(SchemaTypeConfigBuilder().SetType(schema3_))
+ .AddType(SchemaTypeConfigBuilder().SetType(kSchema1))
+ .AddType(SchemaTypeConfigBuilder().SetType(kSchema2))
+ .AddType(SchemaTypeConfigBuilder().SetType(kSchema3))
+ .AddType(SchemaTypeConfigBuilder().SetType(kSchema4).AddParentType(
+ kSchema3))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(std::string(kSchema5))
+ .AddParentType(kSchema1)
+ .AddParentType(kSchema2))
.Build();
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
- ICING_ASSERT_OK(schema_store_->SetSchema(schema));
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
@@ -424,9 +451,6 @@ class DocHitInfoIteratorSchemaTypeFilterTest : public ::testing::Test {
FakeClock fake_clock_;
const Filesystem filesystem_;
const std::string test_dir_;
- const std::string schema1_ = "email";
- const std::string schema2_ = "message";
- const std::string schema3_ = "person";
DocumentProto document1_schema1_;
DocumentProto document2_schema2_;
DocumentProto document3_schema3_;
@@ -495,7 +519,7 @@ TEST_F(DocHitInfoIteratorSchemaTypeFilterTest,
std::unique_ptr<DocHitInfoIterator> original_iterator =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
- options_.schema_types = std::vector<std::string_view>{schema1_};
+ options_.schema_types = std::vector<std::string_view>{kSchema1};
DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
document_store_.get(),
schema_store_.get(), options_);
@@ -518,7 +542,7 @@ TEST_F(DocHitInfoIteratorSchemaTypeFilterTest, FilterForMultipleSchemaTypesOk) {
std::unique_ptr<DocHitInfoIterator> original_iterator =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
- options_.schema_types = std::vector<std::string_view>{schema2_, schema3_};
+ options_.schema_types = std::vector<std::string_view>{kSchema2, kSchema3};
DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
document_store_.get(),
schema_store_.get(), options_);
@@ -527,6 +551,110 @@ TEST_F(DocHitInfoIteratorSchemaTypeFilterTest, FilterForMultipleSchemaTypesOk) {
ElementsAre(document_id2, document_id3));
}
+TEST_F(DocHitInfoIteratorSchemaTypeFilterTest,
+ FilterForSchemaTypePolymorphismOk) {
+ // Add some irrelevant documents.
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ document_store_->Put(document1_schema1_));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ document_store_->Put(document2_schema2_));
+
+ // Create a person document and an artist document, where the artist should be
+ // able to be interpreted as a person by polymorphism.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId person_document_id,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace", "person")
+ .SetSchema("person")
+ .Build()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId artist_document_id,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace", "artist")
+ .SetSchema("artist")
+ .Build()));
+
+ std::vector<DocHitInfo> doc_hit_infos = {
+ DocHitInfo(document_id1), DocHitInfo(document_id2),
+ DocHitInfo(person_document_id), DocHitInfo(artist_document_id)};
+
+ // Filters for the "person" type should also include the "artist" type.
+ std::unique_ptr<DocHitInfoIterator> original_iterator =
+ std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+ options_.schema_types = {"person"};
+ DocHitInfoIteratorFilter filtered_iterator_1(std::move(original_iterator),
+ document_store_.get(),
+ schema_store_.get(), options_);
+ EXPECT_THAT(GetDocumentIds(&filtered_iterator_1),
+ ElementsAre(person_document_id, artist_document_id));
+
+ // Filters for the "artist" type should not include the "person" type.
+ original_iterator = std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+ options_.schema_types = {"artist"};
+ DocHitInfoIteratorFilter filtered_iterator_2(std::move(original_iterator),
+ document_store_.get(),
+ schema_store_.get(), options_);
+ EXPECT_THAT(GetDocumentIds(&filtered_iterator_2),
+ ElementsAre(artist_document_id));
+}
+
+TEST_F(DocHitInfoIteratorSchemaTypeFilterTest,
+ FilterForSchemaTypeMultipleParentPolymorphismOk) {
+ // Create an email and a message document.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId email_document_id,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace", "email")
+ .SetSchema("email")
+ .Build()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId message_document_id,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace", "message")
+ .SetSchema("message")
+ .Build()));
+
+ // Create a emailMessage document, which the should be able to be interpreted
+ // as both an email and a message by polymorphism.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId email_message_document_id,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace", "emailMessage")
+ .SetSchema("emailMessage")
+ .Build()));
+
+ std::vector<DocHitInfo> doc_hit_infos = {
+ DocHitInfo(email_document_id), DocHitInfo(message_document_id),
+ DocHitInfo(email_message_document_id)};
+
+ // Filters for the "email" type should also include the "emailMessage" type.
+ std::unique_ptr<DocHitInfoIterator> original_iterator =
+ std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+ options_.schema_types = std::vector<std::string_view>{"email"};
+ DocHitInfoIteratorFilter filtered_iterator_1(std::move(original_iterator),
+ document_store_.get(),
+ schema_store_.get(), options_);
+ EXPECT_THAT(GetDocumentIds(&filtered_iterator_1),
+ ElementsAre(email_document_id, email_message_document_id));
+
+ // Filters for the "message" type should also include the "emailMessage" type.
+ original_iterator = std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+ options_.schema_types = std::vector<std::string_view>{"message"};
+ DocHitInfoIteratorFilter filtered_iterator_2(std::move(original_iterator),
+ document_store_.get(),
+ schema_store_.get(), options_);
+ EXPECT_THAT(GetDocumentIds(&filtered_iterator_2),
+ ElementsAre(message_document_id, email_message_document_id));
+
+ // Filters for a irrelevant type should return nothing.
+ original_iterator = std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+ options_.schema_types = std::vector<std::string_view>{"person"};
+ DocHitInfoIteratorFilter filtered_iterator_3(std::move(original_iterator),
+ document_store_.get(),
+ schema_store_.get(), options_);
+ EXPECT_THAT(GetDocumentIds(&filtered_iterator_3), IsEmpty());
+}
+
class DocHitInfoIteratorExpirationFilterTest : public ::testing::Test {
protected:
DocHitInfoIteratorExpirationFilterTest()
@@ -542,7 +670,9 @@ class DocHitInfoIteratorExpirationFilterTest : public ::testing::Test {
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
- ICING_ASSERT_OK(schema_store_->SetSchema(schema));
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
@@ -742,7 +872,9 @@ class DocHitInfoIteratorFilterTest : public ::testing::Test {
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
- ICING_ASSERT_OK(schema_store_->SetSchema(schema));
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
diff --git a/icing/index/iterator/doc-hit-info-iterator-property-in-schema.cc b/icing/index/iterator/doc-hit-info-iterator-property-in-schema.cc
new file mode 100644
index 0000000..5f260a8
--- /dev/null
+++ b/icing/index/iterator/doc-hit-info-iterator-property-in-schema.cc
@@ -0,0 +1,114 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/iterator/doc-hit-info-iterator-property-in-schema.h"
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <utility>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/schema/schema-store.h"
+#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
+
+namespace icing {
+namespace lib {
+
+DocHitInfoIteratorPropertyInSchema::DocHitInfoIteratorPropertyInSchema(
+ std::unique_ptr<DocHitInfoIterator> delegate,
+ const DocumentStore* document_store, const SchemaStore* schema_store,
+ std::set<std::string> target_sections)
+ : delegate_(std::move(delegate)),
+ document_store_(*document_store),
+ schema_store_(*schema_store),
+ target_properties_(std::move(target_sections)) {}
+
+libtextclassifier3::Status DocHitInfoIteratorPropertyInSchema::Advance() {
+ doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
+ hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
+
+ // Maps from SchemaTypeId to a bool indicating whether or not the type has
+ // the requested property.
+ std::unordered_map<SchemaTypeId, bool> property_defined_types;
+ while (delegate_->Advance().ok()) {
+ DocumentId document_id = delegate_->doc_hit_info().document_id();
+ auto data_optional =
+ document_store_.GetAliveDocumentFilterData(document_id);
+ if (!data_optional) {
+ // Ran into some error retrieving information on this hit, skip
+ continue;
+ }
+
+ // Guaranteed that the DocumentFilterData exists at this point
+ SchemaTypeId schema_type_id = data_optional.value().schema_type_id();
+ bool valid_match = false;
+ auto itr = property_defined_types.find(schema_type_id);
+ if (itr != property_defined_types.end()) {
+ valid_match = itr->second;
+ } else {
+ for (const auto& property : target_properties_) {
+ if (schema_store_.IsPropertyDefinedInSchema(schema_type_id, property)) {
+ valid_match = true;
+ break;
+ }
+ }
+ property_defined_types[schema_type_id] = valid_match;
+ }
+
+ if (valid_match) {
+ doc_hit_info_ = delegate_->doc_hit_info();
+ hit_intersect_section_ids_mask_ =
+ delegate_->hit_intersect_section_ids_mask();
+ doc_hit_info_.set_hit_section_ids_mask(hit_intersect_section_ids_mask_);
+ return libtextclassifier3::Status::OK;
+ }
+
+ // The document's schema does not define any properties listed in
+ // target_properties_. Continue.
+ }
+
+ // Didn't find anything on the delegate iterator.
+ return absl_ports::ResourceExhaustedError("No more DocHitInfos in iterator");
+}
+
+libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode>
+DocHitInfoIteratorPropertyInSchema::TrimRightMostNode() && {
+ // Don't generate suggestion if the last operator is this custom function.
+ return absl_ports::InvalidArgumentError(
+ "Cannot generate suggestion if the last term is hasPropertyDefined().");
+}
+
+int32_t DocHitInfoIteratorPropertyInSchema::GetNumBlocksInspected() const {
+ return delegate_->GetNumBlocksInspected();
+}
+
+int32_t DocHitInfoIteratorPropertyInSchema::GetNumLeafAdvanceCalls() const {
+ return delegate_->GetNumLeafAdvanceCalls();
+}
+
+std::string DocHitInfoIteratorPropertyInSchema::ToString() const {
+ return absl_ports::StrCat("(", absl_ports::StrJoin(target_properties_, ","),
+ "): ", delegate_->ToString());
+}
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/index/iterator/doc-hit-info-iterator-property-in-schema.h b/icing/index/iterator/doc-hit-info-iterator-property-in-schema.h
new file mode 100644
index 0000000..35b87e1
--- /dev/null
+++ b/icing/index/iterator/doc-hit-info-iterator-property-in-schema.h
@@ -0,0 +1,76 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_PROPERTY_IN_SCHEMA_H_
+#define ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_PROPERTY_IN_SCHEMA_H_
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/schema/schema-store.h"
+#include "icing/store/document-store.h"
+
+namespace icing {
+namespace lib {
+
+// An iterator that helps filter for DocHitInfos whose schemas define the
+// properties named in target_properties_.
+class DocHitInfoIteratorPropertyInSchema : public DocHitInfoIterator {
+ public:
+ // Does not take any ownership, and all pointers must refer to valid objects
+ // that outlive the one constructed. The delegate should be at minimum be
+ // a DocHitInfoIteratorAllDocumentId, but other optimizations are possible,
+ // cf. go/icing-property-in-schema-existence.
+ explicit DocHitInfoIteratorPropertyInSchema(
+ std::unique_ptr<DocHitInfoIterator> delegate,
+ const DocumentStore* document_store, const SchemaStore* schema_store,
+ std::set<std::string> target_sections);
+
+ libtextclassifier3::Status Advance() override;
+
+ libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override;
+
+ int32_t GetNumBlocksInspected() const override;
+
+ int32_t GetNumLeafAdvanceCalls() const override;
+
+ std::string ToString() const override;
+
+ void PopulateMatchedTermsStats(
+ std::vector<TermMatchInfo>* matched_terms_stats,
+ SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
+ if (doc_hit_info_.document_id() == kInvalidDocumentId) {
+ // Current hit isn't valid, return.
+ return;
+ }
+ delegate_->PopulateMatchedTermsStats(matched_terms_stats,
+ filtering_section_mask);
+ }
+
+ private:
+ std::unique_ptr<DocHitInfoIterator> delegate_;
+ const DocumentStore& document_store_;
+ const SchemaStore& schema_store_;
+
+ std::set<std::string> target_properties_;
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_PROPERTY_IN_SCHEMA_H_
diff --git a/icing/index/iterator/doc-hit-info-iterator-property-in-schema_test.cc b/icing/index/iterator/doc-hit-info-iterator-property-in-schema_test.cc
new file mode 100644
index 0000000..9bffeeb
--- /dev/null
+++ b/icing/index/iterator/doc-hit-info-iterator-property-in-schema_test.cc
@@ -0,0 +1,263 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/iterator/doc-hit-info-iterator-property-in-schema.h"
+
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/iterator/doc-hit-info-iterator-all-document-id.h"
+#include "icing/index/iterator/doc-hit-info-iterator-test-util.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::IsEmpty;
+
+class DocHitInfoIteratorPropertyInSchemaTest : public ::testing::Test {
+ protected:
+ DocHitInfoIteratorPropertyInSchemaTest()
+ : test_dir_(GetTestTempDir() + "/icing") {}
+
+ void SetUp() override {
+ filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
+ document1_ = DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("email")
+ .Build();
+ document2_ =
+ DocumentBuilder().SetKey("namespace", "uri2").SetSchema("note").Build();
+
+ indexed_section_0 = "indexedSection0";
+ unindexed_section_1 = "unindexedSection1";
+ not_defined_section_2 = "notDefinedSection2";
+
+ schema_ =
+ SchemaBuilder()
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("email")
+ // Add an indexed property so we generate section
+ // metadata on it
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(indexed_section_0)
+ .SetDataTypeString(TERM_MATCH_EXACT,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(unindexed_section_1)
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder().SetType("note").AddProperty(
+ PropertyConfigBuilder()
+ .SetName(unindexed_section_1)
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ schema_, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
+ document_store_ = std::move(create_result.document_store);
+ }
+
+ void TearDown() override {
+ document_store_.reset();
+ schema_store_.reset();
+ filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+ }
+
+ std::unique_ptr<SchemaStore> schema_store_;
+ std::unique_ptr<DocumentStore> document_store_;
+ const Filesystem filesystem_;
+ const std::string test_dir_;
+ std::string indexed_section_0;
+ std::string unindexed_section_1;
+ std::string not_defined_section_2;
+ SchemaProto schema_;
+ DocumentProto document1_;
+ DocumentProto document2_;
+ FakeClock fake_clock_;
+};
+
+TEST_F(DocHitInfoIteratorPropertyInSchemaTest,
+ AdvanceToDocumentWithIndexedProperty) {
+ // Populate the DocumentStore's FilterCache with this document's data
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+ document_store_->Put(document1_));
+
+ auto original_iterator = std::make_unique<DocHitInfoIteratorAllDocumentId>(
+ document_store_->num_documents());
+
+ DocHitInfoIteratorPropertyInSchema property_defined_iterator(
+ std::move(original_iterator), document_store_.get(), schema_store_.get(),
+ /*target_target_sections=*/{indexed_section_0});
+
+ EXPECT_THAT(GetDocumentIds(&property_defined_iterator),
+ ElementsAre(document_id));
+
+ EXPECT_FALSE(property_defined_iterator.Advance().ok());
+}
+
+TEST_F(DocHitInfoIteratorPropertyInSchemaTest,
+ AdvanceToDocumentWithUnindexedProperty) {
+ // Populate the DocumentStore's FilterCache with this document's data
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+ document_store_->Put(document1_));
+
+ auto original_iterator = std::make_unique<DocHitInfoIteratorAllDocumentId>(
+ document_store_->num_documents());
+
+ DocHitInfoIteratorPropertyInSchema property_defined_iterator(
+ std::move(original_iterator), document_store_.get(), schema_store_.get(),
+ /*target_target_sections=*/{unindexed_section_1});
+
+ EXPECT_THAT(GetDocumentIds(&property_defined_iterator),
+ ElementsAre(document_id));
+
+ EXPECT_FALSE(property_defined_iterator.Advance().ok());
+}
+
+TEST_F(DocHitInfoIteratorPropertyInSchemaTest, NoMatchWithUndefinedProperty) {
+ ICING_EXPECT_OK(document_store_->Put(document1_));
+
+ auto original_iterator = std::make_unique<DocHitInfoIteratorAllDocumentId>(
+ document_store_->num_documents());
+
+ DocHitInfoIteratorPropertyInSchema property_defined_iterator(
+ std::move(original_iterator), document_store_.get(), schema_store_.get(),
+ /*target_target_sections=*/{not_defined_section_2});
+ EXPECT_FALSE(property_defined_iterator.Advance().ok());
+}
+
+TEST_F(DocHitInfoIteratorPropertyInSchemaTest,
+ CorrectlySetsSectionIdMasksAndPopulatesTermMatchInfo) {
+ // Populate the DocumentStore's FilterCache with this document's data
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+ document_store_->Put(document1_));
+
+ // Arbitrary section ids for the documents in the DocHitInfoIterators.
+ // Created to test correct section_id_mask behavior.
+ SectionIdMask original_section_id_mask = 0b00000101; // hits in sections 0, 2
+
+ DocHitInfoTermFrequencyPair doc_hit_info1 = DocHitInfo(document_id);
+ doc_hit_info1.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1);
+ doc_hit_info1.UpdateSection(/*section_id=*/2, /*hit_term_frequency=*/2);
+
+ // Create a hit that was found in the indexed section
+ std::vector<DocHitInfoTermFrequencyPair> doc_hit_infos = {doc_hit_info1};
+
+ auto original_iterator =
+ std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "hi");
+ original_iterator->set_hit_intersect_section_ids_mask(
+ original_section_id_mask);
+
+ DocHitInfoIteratorPropertyInSchema property_defined_iterator(
+ std::move(original_iterator), document_store_.get(), schema_store_.get(),
+ /*target_target_sections=*/{indexed_section_0});
+
+ std::vector<TermMatchInfo> matched_terms_stats;
+ property_defined_iterator.PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(matched_terms_stats, IsEmpty());
+
+ ICING_EXPECT_OK(property_defined_iterator.Advance());
+ EXPECT_THAT(property_defined_iterator.doc_hit_info().document_id(),
+ Eq(document_id));
+
+ // The expected mask is the same as the original mask, since the iterator
+ // should treat it as a pass-through.
+ SectionIdMask expected_section_id_mask = original_section_id_mask;
+ EXPECT_EQ(property_defined_iterator.hit_intersect_section_ids_mask(),
+ expected_section_id_mask);
+
+ property_defined_iterator.PopulateMatchedTermsStats(&matched_terms_stats);
+ std::unordered_map<SectionId, Hit::TermFrequency>
+ expected_section_ids_tf_map = {{0, 1}, {2, 2}};
+ EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+ "hi", expected_section_ids_tf_map)));
+
+ EXPECT_FALSE(property_defined_iterator.Advance().ok());
+}
+
+TEST_F(DocHitInfoIteratorPropertyInSchemaTest,
+ TrimRightMostNodeResultsInError) {
+ auto original_iterator = std::make_unique<DocHitInfoIteratorAllDocumentId>(
+ document_store_->num_documents());
+
+ DocHitInfoIteratorPropertyInSchema property_defined_iterator(
+ std::move(original_iterator), document_store_.get(), schema_store_.get(),
+ /*target_target_sections=*/{indexed_section_0});
+
+ EXPECT_THAT(std::move(property_defined_iterator).TrimRightMostNode(),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(DocHitInfoIteratorPropertyInSchemaTest,
+ FindPropertyDefinedByMultipleTypes) {
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ document_store_->Put(document1_));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ document_store_->Put(document2_));
+ auto original_iterator = std::make_unique<DocHitInfoIteratorAllDocumentId>(
+ document_store_->num_documents());
+
+ DocHitInfoIteratorPropertyInSchema property_defined_iterator(
+ std::move(original_iterator), document_store_.get(), schema_store_.get(),
+ /*target_target_sections=*/{unindexed_section_1});
+
+ EXPECT_THAT(GetDocumentIds(&property_defined_iterator),
+ ElementsAre(document_id2, document_id1));
+
+ EXPECT_FALSE(property_defined_iterator.Advance().ok());
+}
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc b/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
index 60b9a12..78f4d34 100644
--- a/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
@@ -95,7 +95,9 @@ class DocHitInfoIteratorSectionRestrictTest : public ::testing::Test {
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
- ICING_ASSERT_OK(schema_store_->SetSchema(schema_));
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ schema_, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
diff --git a/icing/index/iterator/doc-hit-info-iterator.h b/icing/index/iterator/doc-hit-info-iterator.h
index e1f06d0..d8cd3ad 100644
--- a/icing/index/iterator/doc-hit-info-iterator.h
+++ b/icing/index/iterator/doc-hit-info-iterator.h
@@ -85,10 +85,11 @@ class DocHitInfoIterator {
unnormalized_term_length_(unnormalized_term_length) {}
};
- // Trim the right-most itertor of the itertor tree.
- // This is to support search suggestion for the last terms which is the
+ // Trim the rightmost iterator of the iterator tree.
+ // This is to support search suggestions for the last term which is the
// right-most node of the root iterator tree. Only support trim the right-most
- // node on the AND, AND_NARY, OR, OR_NARY, OR_LEAF and Filter itertor.
+ // node on the AND, AND_NARY, OR, OR_NARY, OR_LEAF, Filter, and the
+ // property-in-schema-check iterator.
//
// After calling this method, this iterator is no longer usable. Please use
// the returned iterator.
diff --git a/icing/index/main/main-index.cc b/icing/index/main/main-index.cc
index 7df137c..d5e9d57 100644
--- a/icing/index/main/main-index.cc
+++ b/icing/index/main/main-index.cc
@@ -22,6 +22,7 @@
#include "icing/absl_ports/canonical_errors.h"
#include "icing/absl_ports/str_cat.h"
#include "icing/file/destructible-directory.h"
+#include "icing/file/posting_list/flash-index-storage.h"
#include "icing/file/posting_list/posting-list-common.h"
#include "icing/index/main/posting-list-hit-serializer.h"
#include "icing/index/term-id-codec.h"
@@ -90,6 +91,10 @@ FindTermResult FindShortestValidTermWithPrefixHits(
return result;
}
+std::string MakeFlashIndexFilename(const std::string& base_dir) {
+ return base_dir + "/main_index";
+}
+
} // namespace
MainIndex::MainIndex(const std::string& index_directory,
@@ -112,12 +117,18 @@ libtextclassifier3::StatusOr<std::unique_ptr<MainIndex>> MainIndex::Create(
return main_index;
}
+/* static */ libtextclassifier3::StatusOr<int> MainIndex::ReadFlashIndexMagic(
+ const Filesystem* filesystem, const std::string& index_directory) {
+ return FlashIndexStorage::ReadHeaderMagic(
+ filesystem, MakeFlashIndexFilename(index_directory));
+}
+
// TODO(b/139087650) : Migrate off of IcingFilesystem.
libtextclassifier3::Status MainIndex::Init() {
if (!filesystem_->CreateDirectoryRecursively(base_dir_.c_str())) {
return absl_ports::InternalError("Unable to create main index directory.");
}
- std::string flash_index_file = base_dir_ + "/main_index";
+ std::string flash_index_file = MakeFlashIndexFilename(base_dir_);
ICING_ASSIGN_OR_RETURN(
FlashIndexStorage flash_index,
FlashIndexStorage::Create(flash_index_file, filesystem_,
diff --git a/icing/index/main/main-index.h b/icing/index/main/main-index.h
index e181330..9e570d5 100644
--- a/icing/index/main/main-index.h
+++ b/icing/index/main/main-index.h
@@ -48,6 +48,16 @@ class MainIndex {
const std::string& index_directory, const Filesystem* filesystem,
const IcingFilesystem* icing_filesystem);
+ // Reads magic from existing flash index storage file header. We need this
+ // during Icing initialization phase to determine the version.
+ //
+ // RETURNS:
+ // - On success, a valid magic.
+ // - NOT_FOUND if the flash index doesn't exist.
+ // - INTERNAL on I/O error.
+ static libtextclassifier3::StatusOr<int> ReadFlashIndexMagic(
+ const Filesystem* filesystem, const std::string& index_directory);
+
// Get a PostingListHitAccessor that holds the posting list chain for 'term'.
//
// RETURNS:
diff --git a/icing/index/numeric/integer-index-storage.cc b/icing/index/numeric/integer-index-storage.cc
index f3901e1..5165040 100644
--- a/icing/index/numeric/integer-index-storage.cc
+++ b/icing/index/numeric/integer-index-storage.cc
@@ -292,12 +292,17 @@ libtextclassifier3::Status IntegerIndexStorageIterator::Advance() {
// Merge sections with same document_id into a single DocHitInfo
while (!pq_.empty() &&
pq_.top()->GetCurrentBasicHit().document_id() == document_id) {
- doc_hit_info_.UpdateSection(pq_.top()->GetCurrentBasicHit().section_id());
-
BucketPostingListIterator* bucket_itr = pq_.top();
pq_.pop();
- if (bucket_itr->AdvanceAndFilter(key_lower_, key_upper_).ok()) {
+ libtextclassifier3::Status advance_status;
+ do {
+ doc_hit_info_.UpdateSection(
+ bucket_itr->GetCurrentBasicHit().section_id());
+ advance_status = bucket_itr->AdvanceAndFilter(key_lower_, key_upper_);
+ } while (advance_status.ok() &&
+ bucket_itr->GetCurrentBasicHit().document_id() == document_id);
+ if (advance_status.ok()) {
pq_.push(bucket_itr);
}
}
diff --git a/icing/index/numeric/integer-index-storage_benchmark.cc b/icing/index/numeric/integer-index-storage_benchmark.cc
index 54b19c3..27f35d9 100644
--- a/icing/index/numeric/integer-index-storage_benchmark.cc
+++ b/icing/index/numeric/integer-index-storage_benchmark.cc
@@ -12,22 +12,30 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+#include <algorithm>
#include <cstdint>
+#include <limits>
#include <memory>
#include <string>
#include <unordered_map>
+#include <utility>
#include <vector>
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "testing/base/public/benchmark.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "icing/absl_ports/canonical_errors.h"
#include "icing/file/destructible-directory.h"
#include "icing/file/filesystem.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/index/numeric/integer-index-storage.h"
#include "icing/index/numeric/posting-list-integer-index-serializer.h"
+#include "icing/schema/section.h"
#include "icing/store/document-id.h"
#include "icing/testing/common-matchers.h"
+#include "icing/testing/numeric/normal-distribution-number-generator.h"
#include "icing/testing/numeric/number-generator.h"
#include "icing/testing/numeric/uniform-distribution-integer-generator.h"
#include "icing/testing/tmp-directory.h"
@@ -65,6 +73,7 @@ static constexpr int kDefaultSeed = 12345;
enum DistributionTypeEnum {
kUniformDistribution,
+ kNormalDistribution,
};
class IntegerIndexStorageBenchmark {
@@ -103,6 +112,19 @@ CreateIntegerGenerator(DistributionTypeEnum distribution_type, int seed,
return std::make_unique<UniformDistributionIntegerGenerator<int64_t>>(
seed, /*range_lower=*/0,
/*range_upper=*/static_cast<int64_t>(num_keys) * 10 - 1);
+ case DistributionTypeEnum::kNormalDistribution:
+ // Normal distribution with mean = 0 and stddev = num_keys / 1024.
+ // - keys in range [-1 * stddev, 1 * stddev]: 68.2%
+ // - keys in range [-2 * stddev, 2 * stddev]: 95.4%
+ // - keys in range [-3 * stddev, 3 * stddev]: 99.7%
+ //
+ // - When generating num_keys integers, 68.2% of them will be in range
+ // [-num_keys / 1024, num_keys / 1024]
+ // - Each number in this range will be sampled (num_keys * 0.682) /
+ // ((num_keys / 1024) * 2) = 349 times on average and become
+ // "single-range bucket".
+ return std::make_unique<NormalDistributionNumberGenerator<int64_t>>(
+ seed, /*mean=*/0.0, /*stddev=*/num_keys / 1024.0);
default:
return absl_ports::InvalidArgumentError("Unknown type");
}
@@ -155,7 +177,18 @@ BENCHMARK(BM_Index)
->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 17)
->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 18)
->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 19)
- ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 20);
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 20)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 10)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 11)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 12)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 13)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 14)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 15)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 16)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 17)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 18)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 19)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 20);
void BM_BatchIndex(benchmark::State& state) {
DistributionTypeEnum distribution_type =
@@ -203,7 +236,18 @@ BENCHMARK(BM_BatchIndex)
->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 17)
->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 18)
->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 19)
- ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 20);
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 20)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 10)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 11)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 12)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 13)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 14)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 15)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 16)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 17)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 18)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 19)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 20);
void BM_ExactQuery(benchmark::State& state) {
DistributionTypeEnum distribution_type =
@@ -269,7 +313,81 @@ BENCHMARK(BM_ExactQuery)
->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 17)
->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 18)
->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 19)
- ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 20);
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 20)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 10)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 11)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 12)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 13)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 14)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 15)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 16)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 17)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 18)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 19)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 20);
+
+void BM_RangeQueryAll(benchmark::State& state) {
+ DistributionTypeEnum distribution_type =
+ static_cast<DistributionTypeEnum>(state.range(0));
+ int num_keys = state.range(1);
+
+ IntegerIndexStorageBenchmark benchmark;
+ benchmark.filesystem.DeleteDirectoryRecursively(
+ benchmark.working_path.c_str());
+ DestructibleDirectory ddir(&benchmark.filesystem, benchmark.working_path);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(benchmark.filesystem, benchmark.working_path,
+ IntegerIndexStorage::Options(),
+ &benchmark.posting_list_serializer));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<NumberGenerator<int64_t>> generator,
+ CreateIntegerGenerator(distribution_type, kDefaultSeed, num_keys));
+ for (int i = 0; i < num_keys; ++i) {
+ ICING_ASSERT_OK(storage->AddKeys(static_cast<DocumentId>(i),
+ kDefaultSectionId,
+ {generator->Generate()}));
+ }
+ ICING_ASSERT_OK(storage->PersistToDisk());
+
+ for (auto _ : state) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> iterator,
+ storage->GetIterator(
+ /*query_key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*query_key_upper=*/std::numeric_limits<int64_t>::max()));
+ std::vector<DocHitInfo> data;
+ while (iterator->Advance().ok()) {
+ data.push_back(iterator->doc_hit_info());
+ }
+
+ ASSERT_THAT(data, SizeIs(num_keys));
+ }
+}
+BENCHMARK(BM_RangeQueryAll)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 10)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 11)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 12)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 13)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 14)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 15)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 16)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 17)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 18)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 19)
+ ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 20)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 10)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 11)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 12)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 13)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 14)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 15)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 16)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 17)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 18)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 19)
+ ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 20);
} // namespace
diff --git a/icing/index/numeric/integer-index_test.cc b/icing/index/numeric/integer-index_test.cc
index ec7f55b..92433e1 100644
--- a/icing/index/numeric/integer-index_test.cc
+++ b/icing/index/numeric/integer-index_test.cc
@@ -389,7 +389,10 @@ TYPED_TEST(NumericIndexIntegerTest, WildcardStorageQuery) {
.AddProperty(PropertyConfigBuilder(int_property_config)
.SetName("desiredProperty")))
.Build();
- ICING_ASSERT_OK(this->schema_store_->SetSchema(schema));
+ ICING_ASSERT_OK(this->schema_store_->SetSchema(
+ schema,
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
// Put 11 docs of "TypeA" into the document store.
DocumentProto doc =
@@ -1492,7 +1495,10 @@ TEST_F(IntegerIndexTest, WildcardStoragePersistenceQuery) {
.AddProperty(PropertyConfigBuilder(int_property_config)
.SetName("desiredProperty")))
.Build();
- ICING_ASSERT_OK(this->schema_store_->SetSchema(schema));
+ ICING_ASSERT_OK(this->schema_store_->SetSchema(
+ schema,
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
// Ids are assigned alphabetically, so the property ids are:
// TypeA.desiredProperty = 0
@@ -1862,7 +1868,10 @@ TEST_F(IntegerIndexTest, WildcardStorageWorksAfterOptimize) {
.AddProperty(PropertyConfigBuilder(int_property_config)
.SetName("desiredProperty")))
.Build();
- ICING_ASSERT_OK(this->schema_store_->SetSchema(schema));
+ ICING_ASSERT_OK(this->schema_store_->SetSchema(
+ schema,
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
// Ids are assigned alphabetically, so the property ids are:
// TypeA.desiredProperty = 0
@@ -2145,7 +2154,10 @@ TEST_F(IntegerIndexTest, WildcardStorageAvailableIndicesAfterOptimize) {
.AddProperty(PropertyConfigBuilder(int_property_config)
.SetName("undesiredProperty")))
.Build();
- ICING_ASSERT_OK(this->schema_store_->SetSchema(schema));
+ ICING_ASSERT_OK(this->schema_store_->SetSchema(
+ schema,
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
// Ids are assigned alphabetically, so the property ids are:
// TypeA.desiredProperty = 0
diff --git a/icing/index/string-section-indexing-handler.cc b/icing/index/string-section-indexing-handler.cc
index a992568..69b8889 100644
--- a/icing/index/string-section-indexing-handler.cc
+++ b/icing/index/string-section-indexing-handler.cc
@@ -30,6 +30,8 @@
#include "icing/store/document-id.h"
#include "icing/transform/normalizer.h"
#include "icing/util/clock.h"
+#include "icing/util/logging.h"
+#include "icing/util/status-macros.h"
#include "icing/util/tokenized-document.h"
namespace icing {
@@ -121,7 +123,8 @@ libtextclassifier3::Status StringSectionIndexingHandler::Handle(
}
if (put_document_stats != nullptr) {
- // TODO(b/259744228): set term index latency.
+ put_document_stats->set_term_index_latency_ms(
+ index_timer->GetElapsedMilliseconds());
put_document_stats->mutable_tokenization_stats()->set_num_tokens_indexed(
num_tokens);
}
diff --git a/icing/join/join-processor.cc b/icing/join/join-processor.cc
index da0e5d2..d68ec98 100644
--- a/icing/join/join-processor.cc
+++ b/icing/join/join-processor.cc
@@ -15,20 +15,27 @@
#include "icing/join/join-processor.h"
#include <algorithm>
-#include <functional>
+#include <memory>
+#include <optional>
#include <string>
#include <string_view>
+#include <unordered_map>
+#include <utility>
#include <vector>
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/absl_ports/canonical_errors.h"
#include "icing/absl_ports/str_cat.h"
#include "icing/join/aggregation-scorer.h"
+#include "icing/join/doc-join-info.h"
+#include "icing/join/join-children-fetcher.h"
#include "icing/join/qualified-id.h"
+#include "icing/proto/schema.pb.h"
#include "icing/proto/scoring.pb.h"
#include "icing/proto/search.pb.h"
#include "icing/schema/joinable-property.h"
#include "icing/scoring/scored-document-hit.h"
+#include "icing/store/document-filter-data.h"
#include "icing/store/document-id.h"
#include "icing/util/status-macros.h"
@@ -76,13 +83,7 @@ JoinProcessor::GetChildrenFetcher(
continue;
}
- // Since we've already sorted child_scored_document_hits, just simply omit
- // if the map_joinable_qualified_id[parent_doc_id].size() has reached max
- // joined child count.
- if (map_joinable_qualified_id[ref_doc_id].size() <
- join_spec.max_joined_child_count()) {
- map_joinable_qualified_id[ref_doc_id].push_back(child);
- }
+ map_joinable_qualified_id[ref_doc_id].push_back(child);
}
return JoinChildrenFetcher(join_spec, std::move(map_joinable_qualified_id));
}
diff --git a/icing/join/join-processor_test.cc b/icing/join/join-processor_test.cc
index 67b6201..ec92349 100644
--- a/icing/join/join-processor_test.cc
+++ b/icing/join/join-processor_test.cc
@@ -16,15 +16,20 @@
#include <memory>
#include <string>
+#include <utility>
#include <vector>
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "icing/document-builder.h"
#include "icing/file/filesystem.h"
-#include "icing/join/qualified-id-joinable-property-indexing-handler.h"
+#include "icing/file/portable-file-backed-proto-log.h"
+#include "icing/join/qualified-id-join-indexing-handler.h"
#include "icing/join/qualified-id-type-joinable-index.h"
+#include "icing/portable/platform.h"
#include "icing/proto/document.pb.h"
+#include "icing/proto/document_wrapper.pb.h"
#include "icing/proto/schema.pb.h"
#include "icing/proto/scoring.pb.h"
#include "icing/proto/search.pb.h"
@@ -33,6 +38,7 @@
#include "icing/schema/section.h"
#include "icing/scoring/scored-document-hit.h"
#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/fake-clock.h"
#include "icing/testing/icu-data-file-helper.h"
@@ -103,7 +109,10 @@ class JoinProcessorTest : public ::testing::Test {
JOINABLE_VALUE_TYPE_QUALIFIED_ID)
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
ASSERT_THAT(filesystem_.CreateDirectoryRecursively(doc_store_dir_.c_str()),
IsTrue());
@@ -140,9 +149,9 @@ class JoinProcessorTest : public ::testing::Test {
document));
ICING_ASSIGN_OR_RETURN(
- std::unique_ptr<QualifiedIdJoinablePropertyIndexingHandler> handler,
- QualifiedIdJoinablePropertyIndexingHandler::Create(
- &fake_clock_, qualified_id_join_index_.get()));
+ std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
+ QualifiedIdJoinIndexingHandler::Create(&fake_clock_,
+ qualified_id_join_index_.get()));
ICING_RETURN_IF_ERROR(handler->Handle(tokenized_document, document_id,
/*recovery_mode=*/false,
/*put_document_stats=*/nullptr));
@@ -244,7 +253,6 @@ TEST_F(JoinProcessorTest, JoinByQualifiedId) {
scored_doc_hit5, scored_doc_hit4, scored_doc_hit3};
JoinSpecProto join_spec;
- join_spec.set_max_joined_child_count(100);
join_spec.set_parent_property_expression(
std::string(JoinProcessor::kQualifiedIdExpr));
join_spec.set_child_property_expression("sender");
@@ -313,7 +321,6 @@ TEST_F(JoinProcessorTest, ShouldIgnoreChildDocumentsWithoutJoiningProperty) {
scored_doc_hit3};
JoinSpecProto join_spec;
- join_spec.set_max_joined_child_count(100);
join_spec.set_parent_property_expression(
std::string(JoinProcessor::kQualifiedIdExpr));
join_spec.set_child_property_expression("sender");
@@ -394,7 +401,6 @@ TEST_F(JoinProcessorTest, ShouldIgnoreChildDocumentsWithInvalidQualifiedId) {
scored_doc_hit2, scored_doc_hit3, scored_doc_hit4};
JoinSpecProto join_spec;
- join_spec.set_max_joined_child_count(100);
join_spec.set_parent_property_expression(
std::string(JoinProcessor::kQualifiedIdExpr));
join_spec.set_child_property_expression("sender");
@@ -459,7 +465,6 @@ TEST_F(JoinProcessorTest, LeftJoinShouldReturnParentWithoutChildren) {
std::vector<ScoredDocumentHit> child_scored_document_hits = {scored_doc_hit3};
JoinSpecProto join_spec;
- join_spec.set_max_joined_child_count(100);
join_spec.set_parent_property_expression(
std::string(JoinProcessor::kQualifiedIdExpr));
join_spec.set_child_property_expression("sender");
@@ -541,7 +546,6 @@ TEST_F(JoinProcessorTest, ShouldSortChildDocumentsByRankingStrategy) {
scored_doc_hit2, scored_doc_hit3, scored_doc_hit4};
JoinSpecProto join_spec;
- join_spec.set_max_joined_child_count(100);
join_spec.set_parent_property_expression(
std::string(JoinProcessor::kQualifiedIdExpr));
join_spec.set_child_property_expression("sender");
@@ -564,114 +568,6 @@ TEST_F(JoinProcessorTest, ShouldSortChildDocumentsByRankingStrategy) {
{scored_doc_hit3, scored_doc_hit4, scored_doc_hit2}))));
}
-TEST_F(JoinProcessorTest,
- ShouldTruncateByRankingStrategyIfExceedingMaxJoinedChildCount) {
- DocumentProto person1 = DocumentBuilder()
- .SetKey("pkg$db/namespace", "person1")
- .SetSchema("Person")
- .AddStringProperty("Name", "Alice")
- .Build();
- DocumentProto person2 = DocumentBuilder()
- .SetKey(R"(pkg$db/name#space\\)", "person2")
- .SetSchema("Person")
- .AddStringProperty("Name", "Bob")
- .Build();
-
- DocumentProto email1 =
- DocumentBuilder()
- .SetKey("pkg$db/namespace", "email1")
- .SetSchema("Email")
- .AddStringProperty("subject", "test subject 1")
- .AddStringProperty("sender", "pkg$db/namespace#person1")
- .Build();
- DocumentProto email2 =
- DocumentBuilder()
- .SetKey("pkg$db/namespace", "email2")
- .SetSchema("Email")
- .AddStringProperty("subject", "test subject 2")
- .AddStringProperty("sender", "pkg$db/namespace#person1")
- .Build();
- DocumentProto email3 =
- DocumentBuilder()
- .SetKey("pkg$db/namespace", "email3")
- .SetSchema("Email")
- .AddStringProperty("subject", "test subject 3")
- .AddStringProperty("sender", "pkg$db/namespace#person1")
- .Build();
- DocumentProto email4 =
- DocumentBuilder()
- .SetKey("pkg$db/namespace", "email4")
- .SetSchema("Email")
- .AddStringProperty("subject", "test subject 4")
- .AddStringProperty("sender",
- R"(pkg$db/name\#space\\\\#person2)") // escaped
- .Build();
-
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
- PutAndIndexDocument(person1));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
- PutAndIndexDocument(person2));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
- PutAndIndexDocument(email1));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4,
- PutAndIndexDocument(email2));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id5,
- PutAndIndexDocument(email3));
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id6,
- PutAndIndexDocument(email4));
-
- ScoredDocumentHit scored_doc_hit1(document_id1, kSectionIdMaskNone,
- /*score=*/0.0);
- ScoredDocumentHit scored_doc_hit2(document_id2, kSectionIdMaskNone,
- /*score=*/0.0);
- ScoredDocumentHit scored_doc_hit3(document_id3, kSectionIdMaskNone,
- /*score=*/2.0);
- ScoredDocumentHit scored_doc_hit4(document_id4, kSectionIdMaskNone,
- /*score=*/5.0);
- ScoredDocumentHit scored_doc_hit5(document_id5, kSectionIdMaskNone,
- /*score=*/3.0);
- ScoredDocumentHit scored_doc_hit6(document_id6, kSectionIdMaskNone,
- /*score=*/1.0);
-
- // Parent ScoredDocumentHits: all Person documents
- std::vector<ScoredDocumentHit> parent_scored_document_hits = {
- scored_doc_hit1, scored_doc_hit2};
-
- // Child ScoredDocumentHits: all Email documents
- std::vector<ScoredDocumentHit> child_scored_document_hits = {
- scored_doc_hit3, scored_doc_hit4, scored_doc_hit5, scored_doc_hit6};
-
- JoinSpecProto join_spec;
- join_spec.set_max_joined_child_count(2);
- join_spec.set_parent_property_expression(
- std::string(JoinProcessor::kQualifiedIdExpr));
- join_spec.set_child_property_expression("sender");
- join_spec.set_aggregation_scoring_strategy(
- JoinSpecProto::AggregationScoringStrategy::COUNT);
- join_spec.mutable_nested_spec()->mutable_scoring_spec()->set_order_by(
- ScoringSpecProto::Order::DESC);
-
- ICING_ASSERT_OK_AND_ASSIGN(
- std::vector<JoinedScoredDocumentHit> joined_result_document_hits,
- Join(join_spec, std::move(parent_scored_document_hits),
- std::move(child_scored_document_hits)));
- // Since we set max_joind_child_count as 2 and use DESC as the (nested)
- // ranking strategy, parent document with # of child documents more than 2
- // should only keep 2 child documents with higher scores and the rest should
- // be truncated.
- EXPECT_THAT(
- joined_result_document_hits,
- ElementsAre(EqualsJoinedScoredDocumentHit(JoinedScoredDocumentHit(
- /*final_score=*/2.0,
- /*parent_scored_document_hit=*/scored_doc_hit1,
- /*child_scored_document_hits=*/
- {scored_doc_hit4, scored_doc_hit5})),
- EqualsJoinedScoredDocumentHit(JoinedScoredDocumentHit(
- /*final_score=*/1.0,
- /*parent_scored_document_hit=*/scored_doc_hit2,
- /*child_scored_document_hits=*/{scored_doc_hit6}))));
-}
-
TEST_F(JoinProcessorTest, ShouldAllowSelfJoining) {
DocumentProto email1 =
DocumentBuilder()
@@ -695,7 +591,6 @@ TEST_F(JoinProcessorTest, ShouldAllowSelfJoining) {
std::vector<ScoredDocumentHit> child_scored_document_hits = {scored_doc_hit1};
JoinSpecProto join_spec;
- join_spec.set_max_joined_child_count(100);
join_spec.set_parent_property_expression(
std::string(JoinProcessor::kQualifiedIdExpr));
join_spec.set_child_property_expression("sender");
diff --git a/icing/join/qualified-id-joinable-property-indexing-handler.cc b/icing/join/qualified-id-join-indexing-handler.cc
index 150b23b..86af043 100644
--- a/icing/join/qualified-id-joinable-property-indexing-handler.cc
+++ b/icing/join/qualified-id-join-indexing-handler.cc
@@ -12,42 +12,44 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#include "icing/join/qualified-id-joinable-property-indexing-handler.h"
+#include "icing/join/qualified-id-join-indexing-handler.h"
#include <memory>
#include <string_view>
#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/absl_ports/canonical_errors.h"
#include "icing/join/doc-join-info.h"
#include "icing/join/qualified-id-type-joinable-index.h"
#include "icing/join/qualified-id.h"
#include "icing/legacy/core/icing-string-util.h"
#include "icing/proto/logging.pb.h"
+#include "icing/schema/joinable-property.h"
#include "icing/store/document-id.h"
+#include "icing/util/clock.h"
#include "icing/util/logging.h"
+#include "icing/util/status-macros.h"
#include "icing/util/tokenized-document.h"
namespace icing {
namespace lib {
/* static */ libtextclassifier3::StatusOr<
- std::unique_ptr<QualifiedIdJoinablePropertyIndexingHandler>>
-QualifiedIdJoinablePropertyIndexingHandler::Create(
+ std::unique_ptr<QualifiedIdJoinIndexingHandler>>
+QualifiedIdJoinIndexingHandler::Create(
const Clock* clock, QualifiedIdTypeJoinableIndex* qualified_id_join_index) {
ICING_RETURN_ERROR_IF_NULL(clock);
ICING_RETURN_ERROR_IF_NULL(qualified_id_join_index);
- return std::unique_ptr<QualifiedIdJoinablePropertyIndexingHandler>(
- new QualifiedIdJoinablePropertyIndexingHandler(clock,
- qualified_id_join_index));
+ return std::unique_ptr<QualifiedIdJoinIndexingHandler>(
+ new QualifiedIdJoinIndexingHandler(clock, qualified_id_join_index));
}
-libtextclassifier3::Status QualifiedIdJoinablePropertyIndexingHandler::Handle(
+libtextclassifier3::Status QualifiedIdJoinIndexingHandler::Handle(
const TokenizedDocument& tokenized_document, DocumentId document_id,
bool recovery_mode, PutDocumentStatsProto* put_document_stats) {
- // TODO(b/263890397): set qualified id join index processing latency and other
- // stats.
+ std::unique_ptr<Timer> index_timer = clock_.GetNewTimer();
if (!IsDocumentIdValid(document_id)) {
return absl_ports::InvalidArgumentError(
@@ -94,6 +96,11 @@ libtextclassifier3::Status QualifiedIdJoinablePropertyIndexingHandler::Handle(
}
}
+ if (put_document_stats != nullptr) {
+ put_document_stats->set_qualified_id_join_index_latency_ms(
+ index_timer->GetElapsedMilliseconds());
+ }
+
return libtextclassifier3::Status::OK;
}
diff --git a/icing/join/qualified-id-joinable-property-indexing-handler.h b/icing/join/qualified-id-join-indexing-handler.h
index 0265874..434403e 100644
--- a/icing/join/qualified-id-joinable-property-indexing-handler.h
+++ b/icing/join/qualified-id-join-indexing-handler.h
@@ -12,8 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#ifndef ICING_JOIN_QUALIFIED_ID_JOINABLE_PROPERTY_INDEXING_HANDLER_H_
-#define ICING_JOIN_QUALIFIED_ID_JOINABLE_PROPERTY_INDEXING_HANDLER_H_
+#ifndef ICING_JOIN_QUALIFIED_ID_JOIN_INDEXING_HANDLER_H_
+#define ICING_JOIN_QUALIFIED_ID_JOIN_INDEXING_HANDLER_H_
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/index/data-indexing-handler.h"
@@ -26,22 +26,21 @@
namespace icing {
namespace lib {
-class QualifiedIdJoinablePropertyIndexingHandler : public DataIndexingHandler {
+class QualifiedIdJoinIndexingHandler : public DataIndexingHandler {
public:
- // Creates a QualifiedIdJoinablePropertyIndexingHandler instance which does
- // not take ownership of any input components. All pointers must refer to
- // valid objects that outlive the created
- // QualifiedIdJoinablePropertyIndexingHandler instance.
+ // Creates a QualifiedIdJoinIndexingHandler instance which does not take
+ // ownership of any input components. All pointers must refer to valid objects
+ // that outlive the created QualifiedIdJoinIndexingHandler instance.
//
// Returns:
- // - A QualifiedIdJoinablePropertyIndexingHandler instance on success
+ // - A QualifiedIdJoinIndexingHandler instance on success
// - FAILED_PRECONDITION_ERROR if any of the input pointer is null
static libtextclassifier3::StatusOr<
- std::unique_ptr<QualifiedIdJoinablePropertyIndexingHandler>>
+ std::unique_ptr<QualifiedIdJoinIndexingHandler>>
Create(const Clock* clock,
QualifiedIdTypeJoinableIndex* qualified_id_join_index);
- ~QualifiedIdJoinablePropertyIndexingHandler() override = default;
+ ~QualifiedIdJoinIndexingHandler() override = default;
// Handles the joinable qualified id data indexing process: add data into the
// qualified id type joinable cache.
@@ -58,7 +57,7 @@ class QualifiedIdJoinablePropertyIndexingHandler : public DataIndexingHandler {
bool recovery_mode, PutDocumentStatsProto* put_document_stats) override;
private:
- explicit QualifiedIdJoinablePropertyIndexingHandler(
+ explicit QualifiedIdJoinIndexingHandler(
const Clock* clock, QualifiedIdTypeJoinableIndex* qualified_id_join_index)
: DataIndexingHandler(clock),
qualified_id_join_index_(*qualified_id_join_index) {}
@@ -69,4 +68,4 @@ class QualifiedIdJoinablePropertyIndexingHandler : public DataIndexingHandler {
} // namespace lib
} // namespace icing
-#endif // ICING_JOIN_QUALIFIED_ID_JOINABLE_PROPERTY_INDEXING_HANDLER_H_
+#endif // ICING_JOIN_QUALIFIED_ID_JOIN_INDEXING_HANDLER_H_
diff --git a/icing/join/qualified-id-joinable-property-indexing-handler_test.cc b/icing/join/qualified-id-join-indexing-handler_test.cc
index 846520e..daddc4c 100644
--- a/icing/join/qualified-id-joinable-property-indexing-handler_test.cc
+++ b/icing/join/qualified-id-join-indexing-handler_test.cc
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#include "icing/join/qualified-id-joinable-property-indexing-handler.h"
+#include "icing/join/qualified-id-join-indexing-handler.h"
#include <memory>
#include <string>
@@ -73,7 +73,7 @@ static constexpr JoinablePropertyId kQualifiedId2JoinablePropertyId = 1;
static constexpr DocumentId kDefaultDocumentId = 3;
-class QualifiedIdJoinablePropertyIndexingHandlerTest : public ::testing::Test {
+class QualifiedIdJoinIndexingHandlerTest : public ::testing::Test {
protected:
void SetUp() override {
if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
@@ -135,7 +135,9 @@ class QualifiedIdJoinablePropertyIndexingHandlerTest : public ::testing::Test {
JOINABLE_VALUE_TYPE_QUALIFIED_ID)
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
- ICING_ASSERT_OK(schema_store_->SetSchema(schema));
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
}
void TearDown() override {
@@ -157,18 +159,17 @@ class QualifiedIdJoinablePropertyIndexingHandlerTest : public ::testing::Test {
std::unique_ptr<SchemaStore> schema_store_;
};
-TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest,
- CreationWithNullPointerShouldFail) {
- EXPECT_THAT(QualifiedIdJoinablePropertyIndexingHandler::Create(
+TEST_F(QualifiedIdJoinIndexingHandlerTest, CreationWithNullPointerShouldFail) {
+ EXPECT_THAT(QualifiedIdJoinIndexingHandler::Create(
/*clock=*/nullptr, qualified_id_join_index_.get()),
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
- EXPECT_THAT(QualifiedIdJoinablePropertyIndexingHandler::Create(
+ EXPECT_THAT(QualifiedIdJoinIndexingHandler::Create(
&fake_clock_, /*qualified_id_join_index=*/nullptr),
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
}
-TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest, HandleJoinableProperty) {
+TEST_F(QualifiedIdJoinIndexingHandlerTest, HandleJoinableProperty) {
DocumentProto referenced_document =
DocumentBuilder()
.SetKey("pkg$db/ns", "ref_type/1")
@@ -192,9 +193,9 @@ TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest, HandleJoinableProperty) {
Eq(kInvalidDocumentId));
// Handle document.
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<QualifiedIdJoinablePropertyIndexingHandler> handler,
- QualifiedIdJoinablePropertyIndexingHandler::Create(
- &fake_clock_, qualified_id_join_index_.get()));
+ std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
+ QualifiedIdJoinIndexingHandler::Create(&fake_clock_,
+ qualified_id_join_index_.get()));
EXPECT_THAT(
handler->Handle(tokenized_document, kDefaultDocumentId,
/*recovery_mode=*/false, /*put_document_stats=*/nullptr),
@@ -207,8 +208,7 @@ TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest, HandleJoinableProperty) {
IsOkAndHolds("pkg$db/ns#ref_type/1"));
}
-TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest,
- HandleNestedJoinableProperty) {
+TEST_F(QualifiedIdJoinIndexingHandlerTest, HandleNestedJoinableProperty) {
DocumentProto referenced_document1 =
DocumentBuilder()
.SetKey("pkg$db/ns", "ref_type/1")
@@ -246,9 +246,9 @@ TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest,
Eq(kInvalidDocumentId));
// Handle nested_document.
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<QualifiedIdJoinablePropertyIndexingHandler> handler,
- QualifiedIdJoinablePropertyIndexingHandler::Create(
- &fake_clock_, qualified_id_join_index_.get()));
+ std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
+ QualifiedIdJoinIndexingHandler::Create(&fake_clock_,
+ qualified_id_join_index_.get()));
EXPECT_THAT(handler->Handle(tokenized_document, kDefaultDocumentId,
/*recovery_mode=*/false,
/*put_document_stats=*/nullptr),
@@ -264,7 +264,7 @@ TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest,
IsOkAndHolds("pkg$db/ns#ref_type/1"));
}
-TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest,
+TEST_F(QualifiedIdJoinIndexingHandlerTest,
HandleShouldSkipInvalidFormatQualifiedId) {
static constexpr std::string_view kInvalidFormatQualifiedId =
"invalid_format_qualified_id";
@@ -289,9 +289,9 @@ TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest,
// Index data should remain unchanged since there is no valid qualified id,
// but last_added_document_id should be updated.
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<QualifiedIdJoinablePropertyIndexingHandler> handler,
- QualifiedIdJoinablePropertyIndexingHandler::Create(
- &fake_clock_, qualified_id_join_index_.get()));
+ std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
+ QualifiedIdJoinIndexingHandler::Create(&fake_clock_,
+ qualified_id_join_index_.get()));
EXPECT_THAT(
handler->Handle(tokenized_document, kDefaultDocumentId,
/*recovery_mode=*/false, /*put_document_stats=*/nullptr),
@@ -303,8 +303,7 @@ TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest,
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
-TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest,
- HandleShouldSkipEmptyQualifiedId) {
+TEST_F(QualifiedIdJoinIndexingHandlerTest, HandleShouldSkipEmptyQualifiedId) {
// Create a document without any qualified id.
DocumentProto document = DocumentBuilder()
.SetKey("icing", "fake_type/1")
@@ -321,9 +320,9 @@ TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest,
// Handle document. Index data should remain unchanged since there is no
// qualified id, but last_added_document_id should be updated.
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<QualifiedIdJoinablePropertyIndexingHandler> handler,
- QualifiedIdJoinablePropertyIndexingHandler::Create(
- &fake_clock_, qualified_id_join_index_.get()));
+ std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
+ QualifiedIdJoinIndexingHandler::Create(&fake_clock_,
+ qualified_id_join_index_.get()));
EXPECT_THAT(
handler->Handle(tokenized_document, kDefaultDocumentId,
/*recovery_mode=*/false, /*put_document_stats=*/nullptr),
@@ -335,7 +334,7 @@ TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest,
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
-TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest,
+TEST_F(QualifiedIdJoinIndexingHandlerTest,
HandleInvalidDocumentIdShouldReturnInvalidArgumentError) {
DocumentProto referenced_document =
DocumentBuilder()
@@ -361,9 +360,9 @@ TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest,
Eq(kDefaultDocumentId));
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<QualifiedIdJoinablePropertyIndexingHandler> handler,
- QualifiedIdJoinablePropertyIndexingHandler::Create(
- &fake_clock_, qualified_id_join_index_.get()));
+ std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
+ QualifiedIdJoinIndexingHandler::Create(&fake_clock_,
+ qualified_id_join_index_.get()));
// Handling document with kInvalidDocumentId should cause a failure, and both
// index data and last_added_document_id should remain unchanged.
@@ -389,7 +388,7 @@ TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest,
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
-TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest,
+TEST_F(QualifiedIdJoinIndexingHandlerTest,
HandleOutOfOrderDocumentIdShouldReturnInvalidArgumentError) {
DocumentProto referenced_document =
DocumentBuilder()
@@ -415,9 +414,9 @@ TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest,
Eq(kDefaultDocumentId));
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<QualifiedIdJoinablePropertyIndexingHandler> handler,
- QualifiedIdJoinablePropertyIndexingHandler::Create(
- &fake_clock_, qualified_id_join_index_.get()));
+ std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
+ QualifiedIdJoinIndexingHandler::Create(&fake_clock_,
+ qualified_id_join_index_.get()));
// Handling document with document_id < last_added_document_id should cause a
// failure, and both index data and last_added_document_id should remain
@@ -447,7 +446,7 @@ TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest,
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
-TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest,
+TEST_F(QualifiedIdJoinIndexingHandlerTest,
HandleRecoveryModeShouldIgnoreDocsLELastAddedDocId) {
DocumentProto referenced_document =
DocumentBuilder()
@@ -473,9 +472,9 @@ TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest,
Eq(kDefaultDocumentId));
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<QualifiedIdJoinablePropertyIndexingHandler> handler,
- QualifiedIdJoinablePropertyIndexingHandler::Create(
- &fake_clock_, qualified_id_join_index_.get()));
+ std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
+ QualifiedIdJoinIndexingHandler::Create(&fake_clock_,
+ qualified_id_join_index_.get()));
// Handle document with document_id < last_added_document_id in recovery mode.
// We should not get any error, but the handler should ignore the document, so
diff --git a/icing/query/advanced_query_parser/query-visitor.cc b/icing/query/advanced_query_parser/query-visitor.cc
index c2cee47..664b072 100644
--- a/icing/query/advanced_query_parser/query-visitor.cc
+++ b/icing/query/advanced_query_parser/query-visitor.cc
@@ -33,6 +33,7 @@
#include "icing/index/iterator/doc-hit-info-iterator-none.h"
#include "icing/index/iterator/doc-hit-info-iterator-not.h"
#include "icing/index/iterator/doc-hit-info-iterator-or.h"
+#include "icing/index/iterator/doc-hit-info-iterator-property-in-schema.h"
#include "icing/index/iterator/doc-hit-info-iterator-section-restrict.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/query/advanced_query_parser/lexer.h"
@@ -224,7 +225,7 @@ void QueryVisitor::RegisterFunctions() {
Function property_defined_function =
Function::Create(DataType::kDocumentIterator, "propertyDefined",
- {Param(DataType::kText)}, std::move(property_defined))
+ {Param(DataType::kString)}, std::move(property_defined))
.ValueOrDie();
registered_functions_.insert(
{property_defined_function.name(), std::move(property_defined_function)});
@@ -301,20 +302,23 @@ libtextclassifier3::StatusOr<PendingValue> QueryVisitor::SearchFunction(
libtextclassifier3::StatusOr<PendingValue>
QueryVisitor::PropertyDefinedFunction(std::vector<PendingValue>&& args) {
- // The first arg is guaranteed to be a TEXT at this point. It should be safe
+ // The first arg is guaranteed to be a STRING at this point. It should be safe
// to call ValueOrDie.
+ const QueryTerm* member = args.at(0).string_val().ValueOrDie();
- // TODO(b/268680462): Consume this and implement the actual iterator.
- // const QueryTerm* member =
- args.at(0).text_val().ValueOrDie();
-
- std::unique_ptr<DocHitInfoIterator> iterator =
+ std::unique_ptr<DocHitInfoIterator> all_docs_iterator =
std::make_unique<DocHitInfoIteratorAllDocumentId>(
document_store_.last_added_document_id());
- features_.insert(kPropertyDefinedInSchemaCustomFunctionFeature);
+ std::set<std::string> target_sections = {std::move(member->term)};
+ std::unique_ptr<DocHitInfoIterator> property_in_schema_iterator =
+ std::make_unique<DocHitInfoIteratorPropertyInSchema>(
+ std::move(all_docs_iterator), &document_store_, &schema_store_,
+ std::move(target_sections));
- return PendingValue(std::move(iterator));
+ features_.insert(kListFilterQueryLanguageFeature);
+
+ return PendingValue(std::move(property_in_schema_iterator));
}
libtextclassifier3::StatusOr<int64_t> QueryVisitor::PopPendingIntValue() {
@@ -362,10 +366,8 @@ QueryVisitor::PopPendingIterator() {
return CreateTermIterator(std::move(string_value));
} else {
ICING_ASSIGN_OR_RETURN(QueryTerm text_value, PopPendingTextValue());
- ICING_ASSIGN_OR_RETURN(
- std::unique_ptr<Tokenizer::Iterator> token_itr,
- tokenizer_.Tokenize(text_value.term,
- LanguageSegmenter::AccessType::kForwardIterator));
+ ICING_ASSIGN_OR_RETURN(std::unique_ptr<Tokenizer::Iterator> token_itr,
+ tokenizer_.Tokenize(text_value.term));
std::string normalized_term;
std::vector<std::unique_ptr<DocHitInfoIterator>> iterators;
// The tokenizer will produce 1+ tokens out of the text. The prefix operator
diff --git a/icing/query/advanced_query_parser/query-visitor_test.cc b/icing/query/advanced_query_parser/query-visitor_test.cc
index c48d9ad..92eb3e7 100644
--- a/icing/query/advanced_query_parser/query-visitor_test.cc
+++ b/icing/query/advanced_query_parser/query-visitor_test.cc
@@ -792,7 +792,9 @@ TEST_P(QueryVisitorTest, NumericComparatorDoesntAffectLaterTerms) {
ICING_ASSERT_OK(schema_store_->SetSchema(
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("type"))
- .Build()));
+ .Build(),
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
// Index three documents:
// - Doc0: ["-2", "-1", "1", "2"] and [-2, -1, 1, 2]
@@ -1543,7 +1545,9 @@ TEST_P(QueryVisitorTest, SingleMinusTerm) {
ICING_ASSERT_OK(schema_store_->SetSchema(
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("type"))
- .Build()));
+ .Build(),
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK(document_store_->Put(
DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
@@ -1595,7 +1599,9 @@ TEST_P(QueryVisitorTest, SingleNotTerm) {
ICING_ASSERT_OK(schema_store_->SetSchema(
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("type"))
- .Build()));
+ .Build(),
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK(document_store_->Put(
DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
@@ -1643,7 +1649,9 @@ TEST_P(QueryVisitorTest, NestedNotTerms) {
ICING_ASSERT_OK(schema_store_->SetSchema(
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("type"))
- .Build()));
+ .Build(),
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK(document_store_->Put(
DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
@@ -1699,7 +1707,9 @@ TEST_P(QueryVisitorTest, DeeplyNestedNotTerms) {
ICING_ASSERT_OK(schema_store_->SetSchema(
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("type"))
- .Build()));
+ .Build(),
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK(document_store_->Put(
DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
@@ -1991,7 +2001,9 @@ TEST_P(QueryVisitorTest, AndOrNotPrecedence) {
.SetName("prop1")
.SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_OPTIONAL)))
- .Build()));
+ .Build(),
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK(document_store_->Put(
DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build()));
@@ -2073,7 +2085,10 @@ TEST_P(QueryVisitorTest, PropertyFilter) {
.SetDataTypeString(TERM_MATCH_PREFIX,
TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_OPTIONAL)))
- .Build()));
+ .Build(),
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
// Section ids are assigned alphabetically.
SectionId prop1_section_id = 0;
SectionId prop2_section_id = 1;
@@ -2145,7 +2160,10 @@ TEST_F(QueryVisitorTest, MultiPropertyFilter) {
.SetDataTypeString(TERM_MATCH_PREFIX,
TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_OPTIONAL)))
- .Build()));
+ .Build(),
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
// Section ids are assigned alphabetically.
SectionId prop1_section_id = 0;
SectionId prop2_section_id = 1;
@@ -2210,7 +2228,9 @@ TEST_P(QueryVisitorTest, PropertyFilterStringIsInvalid) {
.SetDataTypeString(TERM_MATCH_PREFIX,
TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_OPTIONAL)))
- .Build()));
+ .Build(),
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
// "prop1" is a STRING token, which cannot be a property name.
std::string query = CreateQuery(R"(("prop1":foo))");
@@ -2241,7 +2261,9 @@ TEST_P(QueryVisitorTest, PropertyFilterNonNormalized) {
.SetDataTypeString(TERM_MATCH_PREFIX,
TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_OPTIONAL)))
- .Build()));
+ .Build(),
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
// Section ids are assigned alphabetically.
SectionId prop1_section_id = 0;
SectionId prop2_section_id = 1;
@@ -2308,7 +2330,10 @@ TEST_P(QueryVisitorTest, PropertyFilterWithGrouping) {
.SetDataTypeString(TERM_MATCH_PREFIX,
TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_OPTIONAL)))
- .Build()));
+ .Build(),
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
// Section ids are assigned alphabetically.
SectionId prop1_section_id = 0;
SectionId prop2_section_id = 1;
@@ -2373,7 +2398,10 @@ TEST_P(QueryVisitorTest, ValidNestedPropertyFilter) {
.SetDataTypeString(TERM_MATCH_PREFIX,
TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_OPTIONAL)))
- .Build()));
+ .Build(),
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
// Section ids are assigned alphabetically.
SectionId prop1_section_id = 0;
SectionId prop2_section_id = 1;
@@ -2457,7 +2485,10 @@ TEST_P(QueryVisitorTest, InvalidNestedPropertyFilter) {
.SetDataTypeString(TERM_MATCH_PREFIX,
TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_OPTIONAL)))
- .Build()));
+ .Build(),
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
// Section ids are assigned alphabetically.
SectionId prop1_section_id = 0;
SectionId prop2_section_id = 1;
@@ -2537,7 +2568,10 @@ TEST_P(QueryVisitorTest, NotWithPropertyFilter) {
.SetDataTypeString(TERM_MATCH_PREFIX,
TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_OPTIONAL)))
- .Build()));
+ .Build(),
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
// Section ids are assigned alphabetically.
SectionId prop1_section_id = 0;
SectionId prop2_section_id = 1;
@@ -2622,7 +2656,10 @@ TEST_P(QueryVisitorTest, PropertyFilterWithNot) {
.SetDataTypeString(TERM_MATCH_PREFIX,
TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_OPTIONAL)))
- .Build()));
+ .Build(),
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
// Section ids are assigned alphabetically.
SectionId prop1_section_id = 0;
SectionId prop2_section_id = 1;
@@ -2713,7 +2750,10 @@ TEST_P(QueryVisitorTest, SegmentationTest) {
.SetDataTypeString(TERM_MATCH_PREFIX,
TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_OPTIONAL)))
- .Build()));
+ .Build(),
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
// Section ids are assigned alphabetically.
SectionId prop1_section_id = 0;
SectionId prop2_section_id = 1;
@@ -2803,7 +2843,9 @@ TEST_P(QueryVisitorTest, PropertyRestrictsPopCorrectly) {
.AddProperty(prop)
.AddProperty(PropertyConfigBuilder(prop).SetName("prop1"))
.AddProperty(PropertyConfigBuilder(prop).SetName("prop2")))
- .Build()));
+ .Build(),
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
SectionId prop0_id = 0;
SectionId prop1_id = 1;
@@ -2916,7 +2958,9 @@ TEST_P(QueryVisitorTest, UnsatisfiablePropertyRestrictsPopCorrectly) {
.AddProperty(prop)
.AddProperty(PropertyConfigBuilder(prop).SetName("prop1"))
.AddProperty(PropertyConfigBuilder(prop).SetName("prop2")))
- .Build()));
+ .Build(),
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
SectionId prop0_id = 0;
SectionId prop1_id = 1;
@@ -3139,7 +3183,10 @@ TEST_F(QueryVisitorTest, SearchFunctionNestedFunctionCalls) {
.SetDataTypeString(TERM_MATCH_PREFIX,
TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_OPTIONAL)))
- .Build()));
+ .Build(),
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
// Section ids are assigned alphabetically.
SectionId prop1_section_id = 0;
@@ -3265,7 +3312,10 @@ TEST_F(QueryVisitorTest, SearchFunctionNestedPropertyRestrictsNarrowing) {
.AddProperty(PropertyConfigBuilder(prop).SetName("prop5"))
.AddProperty(PropertyConfigBuilder(prop).SetName("prop6"))
.AddProperty(PropertyConfigBuilder(prop).SetName("prop7")))
- .Build()));
+ .Build(),
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
// Section ids are assigned alphabetically.
SectionId prop0_id = 0;
SectionId prop1_id = 1;
@@ -3442,7 +3492,10 @@ TEST_F(QueryVisitorTest, SearchFunctionNestedPropertyRestrictsExpanding) {
.AddProperty(PropertyConfigBuilder(prop).SetName("prop5"))
.AddProperty(PropertyConfigBuilder(prop).SetName("prop6"))
.AddProperty(PropertyConfigBuilder(prop).SetName("prop7")))
- .Build()));
+ .Build(),
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
// Section ids are assigned alphabetically.
SectionId prop0_id = 0;
SectionId prop1_id = 1;
@@ -3606,7 +3659,7 @@ TEST_F(QueryVisitorTest,
TEST_F(
QueryVisitorTest,
PropertyDefinedFunctionWithMoreThanOneTextArgumentReturnsInvalidArgument) {
- std::string query = "propertyDefined(foo, bar)";
+ std::string query = "propertyDefined(\"foo\", \"bar\")";
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
QueryVisitor query_visitor(
@@ -3620,9 +3673,9 @@ TEST_F(
}
TEST_F(QueryVisitorTest,
- PropertyDefinedFunctionWithStringArgumentReturnsInvalidArgument) {
- // The argument type is STRING, not TEXT here.
- std::string query = "propertyDefined(\"foo\")";
+ PropertyDefinedFunctionWithTextArgumentReturnsInvalidArgument) {
+ // The argument type is TEXT, not STRING here.
+ std::string query = "propertyDefined(foo)";
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
QueryVisitor query_visitor(
@@ -3650,7 +3703,7 @@ TEST_F(QueryVisitorTest,
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST_P(QueryVisitorTest, PropertyDefinedFunctionCurrentlyReturnsEverything) {
+TEST_P(QueryVisitorTest, PropertyDefinedFunctionReturnsMatchingDocuments) {
// Set up two schemas, one with a "url" field and one without.
ICING_ASSERT_OK(schema_store_->SetSchema(
SchemaBuilder()
@@ -3661,8 +3714,11 @@ TEST_P(QueryVisitorTest, PropertyDefinedFunctionCurrentlyReturnsEverything) {
.SetDataType(TYPE_STRING)
.SetCardinality(CARDINALITY_OPTIONAL)))
.AddType(SchemaTypeConfigBuilder().SetType("typeWithoutUrl"))
- .Build()));
+ .Build(),
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+ // Document 0 has the term "foo" and its schema has the url property.
ICING_ASSERT_OK(document_store_->Put(
DocumentBuilder().SetKey("ns", "uri0").SetSchema("typeWithUrl").Build()));
Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
@@ -3670,6 +3726,7 @@ TEST_P(QueryVisitorTest, PropertyDefinedFunctionCurrentlyReturnsEverything) {
editor.BufferTerm("foo");
editor.IndexAllBufferedTerms();
+ // Document 1 has the term "foo" and its schema DOESN'T have the url property.
ICING_ASSERT_OK(document_store_->Put(DocumentBuilder()
.SetKey("ns", "uri1")
.SetSchema("typeWithoutUrl")
@@ -3679,6 +3736,7 @@ TEST_P(QueryVisitorTest, PropertyDefinedFunctionCurrentlyReturnsEverything) {
editor.BufferTerm("foo");
editor.IndexAllBufferedTerms();
+ // Document 2 has the term "bar" and its schema has the url property.
ICING_ASSERT_OK(document_store_->Put(
DocumentBuilder().SetKey("ns", "uri2").SetSchema("typeWithUrl").Build()));
editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_PREFIX,
@@ -3686,7 +3744,60 @@ TEST_P(QueryVisitorTest, PropertyDefinedFunctionCurrentlyReturnsEverything) {
editor.BufferTerm("bar");
editor.IndexAllBufferedTerms();
- std::string query = CreateQuery("foo propertyDefined(url)");
+ std::string query = CreateQuery("foo propertyDefined(\"url\")");
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true);
+ root_node->Accept(&query_visitor);
+ ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+ std::move(query_visitor).ConsumeResults());
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
+
+ EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
+ UnorderedElementsAre(kDocumentId0));
+}
+
+TEST_P(QueryVisitorTest,
+ PropertyDefinedFunctionReturnsNothingIfNoMatchingProperties) {
+ // Set up two schemas, one with a "url" field and one without.
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("typeWithUrl")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("url")
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder().SetType("typeWithoutUrl"))
+ .Build(),
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ // Document 0 has the term "foo" and its schema has the url property.
+ ICING_ASSERT_OK(document_store_->Put(
+ DocumentBuilder().SetKey("ns", "uri0").SetSchema("typeWithUrl").Build()));
+ Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
+ TERM_MATCH_PREFIX, /*namespace_id=*/0);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ // Document 1 has the term "foo" and its schema DOESN'T have the url property.
+ ICING_ASSERT_OK(document_store_->Put(DocumentBuilder()
+ .SetKey("ns", "uri1")
+ .SetSchema("typeWithoutUrl")
+ .Build()));
+ editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_PREFIX,
+ /*namespace_id=*/0);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ // Attempt to query a non-existent property.
+ std::string query = CreateQuery("propertyDefined(\"nonexistentproperty\")");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
ParseQueryHelper(query));
QueryVisitor query_visitor(
@@ -3697,14 +3808,62 @@ TEST_P(QueryVisitorTest, PropertyDefinedFunctionCurrentlyReturnsEverything) {
root_node->Accept(&query_visitor);
ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
std::move(query_visitor).ConsumeResults());
- EXPECT_THAT(
- query_results.features_in_use,
- UnorderedElementsAre(kPropertyDefinedInSchemaCustomFunctionFeature,
- kListFilterQueryLanguageFeature));
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
+
+ EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()), IsEmpty());
+}
+
+TEST_P(QueryVisitorTest,
+ PropertyDefinedFunctionWithNegationMatchesDocsWithNoSuchProperty) {
+ // Set up two schemas, one with a "url" field and one without.
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("typeWithUrl")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("url")
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder().SetType("typeWithoutUrl"))
+ .Build(),
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ // Document 0 has the term "foo" and its schema has the url property.
+ ICING_ASSERT_OK(document_store_->Put(
+ DocumentBuilder().SetKey("ns", "uri0").SetSchema("typeWithUrl").Build()));
+ Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1,
+ TERM_MATCH_PREFIX, /*namespace_id=*/0);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ // Document 1 has the term "foo" and its schema DOESN'T have the url property.
+ ICING_ASSERT_OK(document_store_->Put(DocumentBuilder()
+ .SetKey("ns", "uri1")
+ .SetSchema("typeWithoutUrl")
+ .Build()));
+ editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_PREFIX,
+ /*namespace_id=*/0);
+ editor.BufferTerm("foo");
+ editor.IndexAllBufferedTerms();
+
+ std::string query = CreateQuery("foo AND NOT propertyDefined(\"url\")");
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node,
+ ParseQueryHelper(query));
+ QueryVisitor query_visitor(
+ index_.get(), numeric_index_.get(), document_store_.get(),
+ schema_store_.get(), normalizer_.get(), tokenizer_.get(), query,
+ DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX,
+ /*needs_term_frequency_info_=*/true);
+ root_node->Accept(&query_visitor);
+ ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results,
+ std::move(query_visitor).ConsumeResults());
+ EXPECT_THAT(query_results.features_in_use,
+ UnorderedElementsAre(kListFilterQueryLanguageFeature));
- // TODO(b/268680462): Update once the feature is actually implemented.
EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()),
- UnorderedElementsAre(kDocumentId0, kDocumentId1));
+ UnorderedElementsAre(kDocumentId1));
}
INSTANTIATE_TEST_SUITE_P(QueryVisitorTest, QueryVisitorTest,
diff --git a/icing/query/query-features.h b/icing/query/query-features.h
index 6e4fb94..158e13e 100644
--- a/icing/query/query-features.h
+++ b/icing/query/query-features.h
@@ -44,20 +44,13 @@ constexpr Feature kVerbatimSearchFeature =
// - expanding support for negation and property restriction expressions
// - prefix operator '*'
// - 'NOT' operator
+// - propertyDefined("url")
constexpr Feature kListFilterQueryLanguageFeature =
"LIST_FILTER_QUERY_LANGUAGE"; // Features#LIST_FILTER_QUERY_LANGUAGE
-// This feature enables the custom function hasPropertyDefined(member). For
-// example, a query "hasPropertyDefined(url)" will only return documents whose
-// schemas have defined a "url" property.
-// TODO(b/268680462): Update Features.java to sync with this Feature.
-constexpr Feature kPropertyDefinedInSchemaCustomFunctionFeature =
- "PROPERTY_DEFINED_IN_SCHEMA"; // Features#PROPERTY_DEFINED_IN_SCHEMA
-
inline std::unordered_set<Feature> GetQueryFeaturesSet() {
return {kNumericSearchFeature, kVerbatimSearchFeature,
- kListFilterQueryLanguageFeature,
- kPropertyDefinedInSchemaCustomFunctionFeature};
+ kListFilterQueryLanguageFeature};
}
} // namespace lib
diff --git a/icing/query/query-processor.cc b/icing/query/query-processor.cc
index 6760fad..c9704fe 100644
--- a/icing/query/query-processor.cc
+++ b/icing/query/query-processor.cc
@@ -346,7 +346,6 @@ libtextclassifier3::StatusOr<QueryResults> QueryProcessor::ParseRawQuery(
break;
}
case Token::Type::INVALID:
- ICING_LOG(ERROR) << "INVALID";
[[fallthrough]];
default:
// This wouldn't happen if tokenizer and query processor both work
diff --git a/icing/query/query-processor_benchmark.cc b/icing/query/query-processor_benchmark.cc
index 6826c22..3596082 100644
--- a/icing/query/query-processor_benchmark.cc
+++ b/icing/query/query-processor_benchmark.cc
@@ -144,7 +144,9 @@ void BM_QueryOneTerm(benchmark::State& state) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem, schema_dir, &clock));
- ICING_ASSERT_OK(schema_store->SetSchema(schema));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
DocumentStore::CreateResult create_result =
CreateDocumentStore(&filesystem, doc_store_dir, &clock,
@@ -270,7 +272,9 @@ void BM_QueryFiveTerms(benchmark::State& state) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem, schema_dir, &clock));
- ICING_ASSERT_OK(schema_store->SetSchema(schema));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
DocumentStore::CreateResult create_result =
CreateDocumentStore(&filesystem, doc_store_dir, &clock,
@@ -414,7 +418,9 @@ void BM_QueryDiacriticTerm(benchmark::State& state) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem, schema_dir, &clock));
- ICING_ASSERT_OK(schema_store->SetSchema(schema));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
DocumentStore::CreateResult create_result =
CreateDocumentStore(&filesystem, doc_store_dir, &clock,
@@ -543,7 +549,9 @@ void BM_QueryHiragana(benchmark::State& state) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem, schema_dir, &clock));
- ICING_ASSERT_OK(schema_store->SetSchema(schema));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
DocumentStore::CreateResult create_result =
CreateDocumentStore(&filesystem, doc_store_dir, &clock,
diff --git a/icing/query/query-processor_test.cc b/icing/query/query-processor_test.cc
index 47245fd..be20b04 100644
--- a/icing/query/query-processor_test.cc
+++ b/icing/query/query-processor_test.cc
@@ -220,7 +220,10 @@ TEST_P(QueryProcessorTest, EmptyGroupMatchAllDocuments) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
document_store_->Put(DocumentBuilder()
@@ -264,7 +267,10 @@ TEST_P(QueryProcessorTest, EmptyQueryMatchAllDocuments) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
document_store_->Put(DocumentBuilder()
@@ -300,7 +306,10 @@ TEST_P(QueryProcessorTest, QueryTermNormalized) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
@@ -357,7 +366,10 @@ TEST_P(QueryProcessorTest, OneTermPrefixMatch) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
@@ -409,7 +421,10 @@ TEST_P(QueryProcessorTest, OneTermPrefixMatchWithMaxSectionID) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
@@ -463,7 +478,10 @@ TEST_P(QueryProcessorTest, OneTermExactMatch) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
@@ -515,7 +533,10 @@ TEST_P(QueryProcessorTest, AndSameTermExactMatch) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// just inserting the documents so that the DocHitInfoIterators will see
@@ -569,7 +590,10 @@ TEST_P(QueryProcessorTest, AndTwoTermExactMatch) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// just inserting the documents so that the DocHitInfoIterators will see
@@ -626,7 +650,10 @@ TEST_P(QueryProcessorTest, AndSameTermPrefixMatch) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// just inserting the documents so that the DocHitInfoIterators will see
@@ -680,7 +707,10 @@ TEST_P(QueryProcessorTest, AndTwoTermPrefixMatch) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// just inserting the documents so that the DocHitInfoIterators will see
@@ -738,7 +768,10 @@ TEST_P(QueryProcessorTest, AndTwoTermPrefixAndExactMatch) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// just inserting the documents so that the DocHitInfoIterators will see
@@ -796,7 +829,10 @@ TEST_P(QueryProcessorTest, OrTwoTermExactMatch) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// just inserting the documents so that the DocHitInfoIterators will see
@@ -867,7 +903,10 @@ TEST_P(QueryProcessorTest, OrTwoTermPrefixMatch) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// just inserting the documents so that the DocHitInfoIterators will see
@@ -938,7 +977,10 @@ TEST_P(QueryProcessorTest, OrTwoTermPrefixAndExactMatch) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// just inserting the documents so that the DocHitInfoIterators will see
@@ -1007,7 +1049,10 @@ TEST_P(QueryProcessorTest, CombinedAndOrTerms) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// just inserting the documents so that the DocHitInfoIterators will see
@@ -1175,7 +1220,10 @@ TEST_P(QueryProcessorTest, OneGroup) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// just inserting the documents so that the DocHitInfoIterators will see
@@ -1238,7 +1286,10 @@ TEST_P(QueryProcessorTest, TwoGroups) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// just inserting the documents so that the DocHitInfoIterators will see
@@ -1304,7 +1355,10 @@ TEST_P(QueryProcessorTest, ManyLevelNestedGrouping) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// just inserting the documents so that the DocHitInfoIterators will see
@@ -1367,7 +1421,10 @@ TEST_P(QueryProcessorTest, OneLevelNestedGrouping) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// just inserting the documents so that the DocHitInfoIterators will see
@@ -1432,7 +1489,10 @@ TEST_P(QueryProcessorTest, ExcludeTerm) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// just inserting the documents so that they'll bump the
@@ -1483,7 +1543,10 @@ TEST_P(QueryProcessorTest, ExcludeNonexistentTerm) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// just inserting the documents so that they'll bump the
@@ -1532,7 +1595,10 @@ TEST_P(QueryProcessorTest, ExcludeAnd) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// just inserting the documents so that they'll bump the
@@ -1613,7 +1679,10 @@ TEST_P(QueryProcessorTest, ExcludeOr) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// just inserting the documents so that they'll bump the
@@ -1697,7 +1766,10 @@ TEST_P(QueryProcessorTest, WithoutTermFrequency) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// just inserting the documents so that the DocHitInfoIterators will see
@@ -1793,7 +1865,10 @@ TEST_P(QueryProcessorTest, DeletedFilter) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
@@ -1854,7 +1929,10 @@ TEST_P(QueryProcessorTest, NamespaceFilter) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
@@ -1917,7 +1995,10 @@ TEST_P(QueryProcessorTest, SchemaTypeFilter) {
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.AddType(SchemaTypeConfigBuilder().SetType("message"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
@@ -1981,7 +2062,10 @@ TEST_P(QueryProcessorTest, PropertyFilterForOneDocument) {
.Build();
// First and only indexed property, so it gets a section_id of 0
int subject_section_id = 0;
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
@@ -2050,7 +2134,10 @@ TEST_P(QueryProcessorTest, PropertyFilterAcrossSchemaTypes) {
// alphabetically.
int email_foo_section_id = 1;
int message_foo_section_id = 0;
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
@@ -2120,7 +2207,10 @@ TEST_P(QueryProcessorTest, PropertyFilterWithinSchemaType) {
.Build();
int email_foo_section_id = 0;
int message_foo_section_id = 0;
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
@@ -2208,7 +2298,10 @@ TEST_P(QueryProcessorTest, NestedPropertyFilter) {
TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
@@ -2269,7 +2362,10 @@ TEST_P(QueryProcessorTest, PropertyFilterRespectsDifferentSectionIds) {
.Build();
int email_foo_section_id = 0;
int message_foo_section_id = 0;
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
@@ -2329,7 +2425,10 @@ TEST_P(QueryProcessorTest, NonexistentPropertyFilterReturnsEmptyResults) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
@@ -2383,7 +2482,10 @@ TEST_P(QueryProcessorTest, UnindexedPropertyFilterReturnsEmptyResults) {
.SetDataType(TYPE_STRING)
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
@@ -2440,7 +2542,10 @@ TEST_P(QueryProcessorTest, PropertyFilterTermAndUnrestrictedTerm) {
.Build();
int email_foo_section_id = 0;
int message_foo_section_id = 0;
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
@@ -2503,7 +2608,10 @@ TEST_P(QueryProcessorTest, DocumentBeforeTtlNotFilteredOut) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// Arbitrary value, just has to be less than the document's creation
// timestamp + ttl
@@ -2561,7 +2669,10 @@ TEST_P(QueryProcessorTest, DocumentPastTtlFilteredOut) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// Arbitrary value, just has to be greater than the document's creation
// timestamp + ttl
@@ -2634,7 +2745,10 @@ TEST_P(QueryProcessorTest, NumericFilter) {
// SectionIds are assigned alphabetically
SectionId cost_section_id = 0;
SectionId price_section_id = 1;
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
DocumentId document_one_id,
@@ -2729,7 +2843,10 @@ TEST_P(QueryProcessorTest, NumericFilterWithoutEnablingFeatureFails) {
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
SectionId price_section_id = 0;
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
DocumentId document_one_id,
diff --git a/icing/query/suggestion-processor_test.cc b/icing/query/suggestion-processor_test.cc
index 7d45de7..4937f39 100644
--- a/icing/query/suggestion-processor_test.cc
+++ b/icing/query/suggestion-processor_test.cc
@@ -163,7 +163,10 @@ TEST_F(SuggestionProcessorTest, MultipleTermsTest_And) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
@@ -206,7 +209,10 @@ TEST_F(SuggestionProcessorTest, MultipleTermsTest_AndNary) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
@@ -253,7 +259,10 @@ TEST_F(SuggestionProcessorTest, MultipleTermsTest_Or) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
@@ -302,7 +311,10 @@ TEST_F(SuggestionProcessorTest, MultipleTermsTest_OrNary) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
@@ -364,7 +376,10 @@ TEST_F(SuggestionProcessorTest, MultipleTermsTest_NormalizedTerm) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
@@ -421,7 +436,10 @@ TEST_F(SuggestionProcessorTest, NonExistentPrefixTest) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
@@ -453,7 +471,10 @@ TEST_F(SuggestionProcessorTest, PrefixTrailingSpaceTest) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
@@ -485,7 +506,10 @@ TEST_F(SuggestionProcessorTest, NormalizePrefixTest) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
@@ -531,7 +555,10 @@ TEST_F(SuggestionProcessorTest, ParenthesesOperatorPrefixTest) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
@@ -573,7 +600,10 @@ TEST_F(SuggestionProcessorTest, OtherSpecialPrefixTest) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
@@ -629,7 +659,10 @@ TEST_F(SuggestionProcessorTest, InvalidPrefixTest) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
diff --git a/icing/result/projection-tree.cc b/icing/result/projection-tree.cc
index fded576..9896491 100644
--- a/icing/result/projection-tree.cc
+++ b/icing/result/projection-tree.cc
@@ -22,8 +22,9 @@
namespace icing {
namespace lib {
-ProjectionTree::ProjectionTree(const TypePropertyMask& type_field_mask) {
- for (const std::string& field_mask : type_field_mask.paths()) {
+ProjectionTree::ProjectionTree(
+ const SchemaStore::ExpandedTypePropertyMask& type_field_mask) {
+ for (const std::string& field_mask : type_field_mask.paths) {
Node* current_node = &root_;
for (std::string_view sub_field_mask :
property_util::SplitPropertyPathExpr(field_mask)) {
diff --git a/icing/result/projection-tree.h b/icing/result/projection-tree.h
index 5916fe6..cdf268a 100644
--- a/icing/result/projection-tree.h
+++ b/icing/result/projection-tree.h
@@ -19,14 +19,13 @@
#include <vector>
#include "icing/proto/search.pb.h"
+#include "icing/schema/schema-store.h"
namespace icing {
namespace lib {
class ProjectionTree {
public:
- static constexpr std::string_view kSchemaTypeWildcard = "*";
-
struct Node {
explicit Node(std::string name = "") : name(std::move(name)) {}
@@ -38,7 +37,8 @@ class ProjectionTree {
}
};
- explicit ProjectionTree(const TypePropertyMask& type_field_mask);
+ explicit ProjectionTree(
+ const SchemaStore::ExpandedTypePropertyMask& type_field_mask);
const Node& root() const { return root_; }
diff --git a/icing/result/projection-tree_test.cc b/icing/result/projection-tree_test.cc
index 2b0f966..46d0c12 100644
--- a/icing/result/projection-tree_test.cc
+++ b/icing/result/projection-tree_test.cc
@@ -17,6 +17,7 @@
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "icing/proto/search.pb.h"
+#include "icing/schema/schema-store.h"
namespace icing {
namespace lib {
@@ -28,72 +29,87 @@ using ::testing::IsEmpty;
using ::testing::SizeIs;
TEST(ProjectionTreeTest, CreateEmptyFieldMasks) {
- TypePropertyMask type_field_mask;
- ProjectionTree tree(type_field_mask);
+ ProjectionTree tree({});
EXPECT_THAT(tree.root().name, IsEmpty());
EXPECT_THAT(tree.root().children, IsEmpty());
}
TEST(ProjectionTreeTest, CreateTreeTopLevel) {
- TypePropertyMask type_field_mask;
- type_field_mask.add_paths("subject");
+ SchemaStore::ExpandedTypePropertyMask type_field_mask{"", {"subject"}};
ProjectionTree tree(type_field_mask);
EXPECT_THAT(tree.root().name, IsEmpty());
ASSERT_THAT(tree.root().children, SizeIs(1));
- ASSERT_THAT(tree.root().children.at(0).name, Eq("subject"));
- ASSERT_THAT(tree.root().children.at(0).children, IsEmpty());
+ EXPECT_THAT(tree.root().children.at(0).name, Eq("subject"));
+ EXPECT_THAT(tree.root().children.at(0).children, IsEmpty());
}
TEST(ProjectionTreeTest, CreateTreeMultipleTopLevel) {
- TypePropertyMask type_field_mask;
- type_field_mask.add_paths("subject");
- type_field_mask.add_paths("body");
+ SchemaStore::ExpandedTypePropertyMask type_field_mask{"",
+ {"subject", "body"}};
ProjectionTree tree(type_field_mask);
EXPECT_THAT(tree.root().name, IsEmpty());
ASSERT_THAT(tree.root().children, SizeIs(2));
- ASSERT_THAT(tree.root().children.at(0).name, Eq("subject"));
- ASSERT_THAT(tree.root().children.at(0).children, IsEmpty());
- ASSERT_THAT(tree.root().children.at(1).name, Eq("body"));
- ASSERT_THAT(tree.root().children.at(1).children, IsEmpty());
+
+ const ProjectionTree::Node* child0 = &tree.root().children.at(0);
+ const ProjectionTree::Node* child1 = &tree.root().children.at(1);
+ if (child0->name != "subject") {
+ std::swap(child0, child1);
+ }
+
+ EXPECT_THAT(child0->name, Eq("subject"));
+ EXPECT_THAT(child0->children, IsEmpty());
+ EXPECT_THAT(child1->name, Eq("body"));
+ EXPECT_THAT(child1->children, IsEmpty());
}
TEST(ProjectionTreeTest, CreateTreeNested) {
- TypePropertyMask type_field_mask;
- type_field_mask.add_paths("subject.body");
- type_field_mask.add_paths("body");
+ SchemaStore::ExpandedTypePropertyMask type_field_mask{
+ "", {"subject.body", "body"}};
ProjectionTree tree(type_field_mask);
EXPECT_THAT(tree.root().name, IsEmpty());
ASSERT_THAT(tree.root().children, SizeIs(2));
- ASSERT_THAT(tree.root().children.at(0).name, Eq("subject"));
- ASSERT_THAT(tree.root().children.at(0).children, SizeIs(1));
- ASSERT_THAT(tree.root().children.at(0).children.at(0).name, Eq("body"));
- ASSERT_THAT(tree.root().children.at(0).children.at(0).children, IsEmpty());
- ASSERT_THAT(tree.root().children.at(1).name, Eq("body"));
- ASSERT_THAT(tree.root().children.at(1).children, IsEmpty());
+
+ const ProjectionTree::Node* child0 = &tree.root().children.at(0);
+ const ProjectionTree::Node* child1 = &tree.root().children.at(1);
+ if (child0->name != "subject.body") {
+ std::swap(child0, child1);
+ }
+
+ EXPECT_THAT(child0->name, Eq("subject"));
+ ASSERT_THAT(child0->children, SizeIs(1));
+ EXPECT_THAT(child0->children.at(0).name, Eq("body"));
+ EXPECT_THAT(child0->children.at(0).children, IsEmpty());
+ EXPECT_THAT(child1->name, Eq("body"));
+ EXPECT_THAT(child1->children, IsEmpty());
}
TEST(ProjectionTreeTest, CreateTreeNestedSharedNode) {
- TypePropertyMask type_field_mask;
- type_field_mask.add_paths("sender.name.first");
- type_field_mask.add_paths("sender.emailAddress");
+ SchemaStore::ExpandedTypePropertyMask type_field_mask{
+ "", {"sender.name.first", "sender.emailAddress"}};
ProjectionTree tree(type_field_mask);
EXPECT_THAT(tree.root().name, IsEmpty());
ASSERT_THAT(tree.root().children, SizeIs(1));
- ASSERT_THAT(tree.root().children.at(0).name, Eq("sender"));
+ EXPECT_THAT(tree.root().children.at(0).name, Eq("sender"));
ASSERT_THAT(tree.root().children.at(0).children, SizeIs(2));
- ASSERT_THAT(tree.root().children.at(0).children.at(0).name, Eq("name"));
- ASSERT_THAT(tree.root().children.at(0).children.at(0).children, SizeIs(1));
- ASSERT_THAT(tree.root().children.at(0).children.at(0).children.at(0).name,
- Eq("first"));
- ASSERT_THAT(tree.root().children.at(0).children.at(0).children.at(0).children,
- IsEmpty());
- ASSERT_THAT(tree.root().children.at(0).children.at(1).name,
- Eq("emailAddress"));
- ASSERT_THAT(tree.root().children.at(0).children.at(1).children, IsEmpty());
+
+ const ProjectionTree::Node* child0_child0 =
+ &tree.root().children.at(0).children.at(0);
+ const ProjectionTree::Node* child0_child1 =
+ &tree.root().children.at(0).children.at(1);
+ if (child0_child0->name != "name") {
+ std::swap(child0_child0, child0_child1);
+ }
+
+ EXPECT_THAT(child0_child0->name, Eq("name"));
+ ASSERT_THAT(child0_child0->children, SizeIs(1));
+ EXPECT_THAT(child0_child0->children.at(0).name, Eq("first"));
+ EXPECT_THAT(child0_child0->children.at(0).children, IsEmpty());
+ EXPECT_THAT(child0_child1->name, Eq("emailAddress"));
+ EXPECT_THAT(child0_child1->children, IsEmpty());
}
} // namespace
diff --git a/icing/result/result-adjustment-info.cc b/icing/result/result-adjustment-info.cc
index 763cd10..00ac379 100644
--- a/icing/result/result-adjustment-info.cc
+++ b/icing/result/result-adjustment-info.cc
@@ -22,6 +22,7 @@
#include "icing/proto/term.pb.h"
#include "icing/result/projection-tree.h"
#include "icing/result/snippet-context.h"
+#include "icing/schema/schema-store.h"
namespace icing {
namespace lib {
@@ -46,15 +47,16 @@ SnippetContext CreateSnippetContext(const SearchSpecProto& search_spec,
ResultAdjustmentInfo::ResultAdjustmentInfo(
const SearchSpecProto& search_spec, const ScoringSpecProto& scoring_spec,
- const ResultSpecProto& result_spec,
+ const ResultSpecProto& result_spec, const SchemaStore* schema_store,
SectionRestrictQueryTermsMap query_terms)
: snippet_context(CreateSnippetContext(search_spec, result_spec,
std::move(query_terms))),
remaining_num_to_snippet(snippet_context.snippet_spec.num_to_snippet()) {
- for (const TypePropertyMask& type_field_mask :
- result_spec.type_property_masks()) {
+ for (const SchemaStore::ExpandedTypePropertyMask& type_field_mask :
+ schema_store->ExpandTypePropertyMasks(
+ result_spec.type_property_masks())) {
projection_tree_map.insert(
- {type_field_mask.schema_type(), ProjectionTree(type_field_mask)});
+ {type_field_mask.schema_type, ProjectionTree(type_field_mask)});
}
}
diff --git a/icing/result/result-adjustment-info.h b/icing/result/result-adjustment-info.h
index 98fa7f5..e859492 100644
--- a/icing/result/result-adjustment-info.h
+++ b/icing/result/result-adjustment-info.h
@@ -22,6 +22,7 @@
#include "icing/proto/search.pb.h"
#include "icing/result/projection-tree.h"
#include "icing/result/snippet-context.h"
+#include "icing/schema/schema-store.h"
namespace icing {
namespace lib {
@@ -42,6 +43,7 @@ struct ResultAdjustmentInfo {
explicit ResultAdjustmentInfo(const SearchSpecProto& search_spec,
const ScoringSpecProto& scoring_spec,
const ResultSpecProto& result_spec,
+ const SchemaStore* schema_store,
SectionRestrictQueryTermsMap query_terms);
};
diff --git a/icing/result/result-adjustment-info_test.cc b/icing/result/result-adjustment-info_test.cc
index 1c5aea1..cbce557 100644
--- a/icing/result/result-adjustment-info_test.cc
+++ b/icing/result/result-adjustment-info_test.cc
@@ -16,14 +16,19 @@
#include <string>
#include <unordered_set>
+#include <vector>
#include "gtest/gtest.h"
-#include "icing/portable/equals-proto.h"
#include "icing/proto/scoring.pb.h"
#include "icing/proto/search.pb.h"
#include "icing/proto/term.pb.h"
#include "icing/result/projection-tree.h"
#include "icing/result/snippet-context.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/schema-store.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/tmp-directory.h"
namespace icing {
namespace lib {
@@ -31,11 +36,44 @@ namespace lib {
namespace {
using ::icing::lib::portable_equals_proto::EqualsProto;
+using ::testing::AnyOf;
using ::testing::Eq;
using ::testing::IsEmpty;
using ::testing::Pair;
using ::testing::UnorderedElementsAre;
+class ResultAdjustmentInfoTest : public testing::Test {
+ protected:
+ ResultAdjustmentInfoTest() : test_dir_(GetTestTempDir() + "/icing") {
+ filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
+ }
+
+ void SetUp() override {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
+
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Email"))
+ .AddType(SchemaTypeConfigBuilder().SetType("Phone"))
+ .Build();
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
+ }
+
+ void TearDown() override {
+ filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+ }
+
+ const Filesystem filesystem_;
+ const std::string test_dir_;
+ std::unique_ptr<SchemaStore> schema_store_;
+ FakeClock fake_clock_;
+};
+
SearchSpecProto CreateSearchSpec(TermMatchType::Code match_type) {
SearchSpecProto search_spec;
search_spec.set_term_match_type(match_type);
@@ -57,7 +95,8 @@ ResultSpecProto CreateResultSpec(
return result_spec;
}
-TEST(ResultAdjustmentInfoTest, ShouldConstructSnippetContextAccordingToSpecs) {
+TEST_F(ResultAdjustmentInfoTest,
+ ShouldConstructSnippetContextAccordingToSpecs) {
ResultSpecProto result_spec =
CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE);
result_spec.mutable_snippet_spec()->set_num_to_snippet(5);
@@ -70,7 +109,7 @@ TEST(ResultAdjustmentInfoTest, ShouldConstructSnippetContextAccordingToSpecs) {
ResultAdjustmentInfo result_adjustment_info(
CreateSearchSpec(TermMatchType::EXACT_ONLY),
CreateScoringSpec(/*is_descending_order=*/true), result_spec,
- query_terms_map);
+ schema_store_.get(), query_terms_map);
const SnippetContext snippet_context = result_adjustment_info.snippet_context;
// Snippet context should be derived from the specs above.
@@ -84,7 +123,7 @@ TEST(ResultAdjustmentInfoTest, ShouldConstructSnippetContextAccordingToSpecs) {
EXPECT_THAT(result_adjustment_info.remaining_num_to_snippet, Eq(5));
}
-TEST(ResultAdjustmentInfoTest, NoSnippetingShouldReturnNull) {
+TEST_F(ResultAdjustmentInfoTest, NoSnippetingShouldReturnNull) {
ResultSpecProto result_spec =
CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE);
// Setting num_to_snippet to 0 so that snippeting info won't be
@@ -99,7 +138,7 @@ TEST(ResultAdjustmentInfoTest, NoSnippetingShouldReturnNull) {
ResultAdjustmentInfo result_adjustment_info(
CreateSearchSpec(TermMatchType::EXACT_ONLY),
CreateScoringSpec(/*is_descending_order=*/true), result_spec,
- query_terms_map);
+ schema_store_.get(), query_terms_map);
EXPECT_THAT(result_adjustment_info.snippet_context.query_terms, IsEmpty());
EXPECT_THAT(
@@ -110,8 +149,8 @@ TEST(ResultAdjustmentInfoTest, NoSnippetingShouldReturnNull) {
EXPECT_THAT(result_adjustment_info.remaining_num_to_snippet, Eq(0));
}
-TEST(ResultAdjustmentInfoTest,
- ShouldConstructProjectionTreeMapAccordingToSpecs) {
+TEST_F(ResultAdjustmentInfoTest,
+ ShouldConstructProjectionTreeMapAccordingToSpecs) {
// Create a ResultSpec with type property mask.
ResultSpecProto result_spec =
CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE);
@@ -127,20 +166,30 @@ TEST(ResultAdjustmentInfoTest,
TypePropertyMask* wildcard_type_property_mask =
result_spec.add_type_property_masks();
wildcard_type_property_mask->set_schema_type(
- std::string(ProjectionTree::kSchemaTypeWildcard));
+ std::string(SchemaStore::kSchemaTypeWildcard));
wildcard_type_property_mask->add_paths("wild.card");
ResultAdjustmentInfo result_adjustment_info(
CreateSearchSpec(TermMatchType::EXACT_ONLY),
CreateScoringSpec(/*is_descending_order=*/true), result_spec,
+ schema_store_.get(),
/*query_terms=*/{});
+ ProjectionTree email_projection_tree =
+ ProjectionTree({"Email", {"sender.name", "sender.emailAddress"}});
+ ProjectionTree alternative_email_projection_tree =
+ ProjectionTree({"Email", {"sender.emailAddress", "sender.name"}});
+ ProjectionTree phone_projection_tree = ProjectionTree({"Phone", {"caller"}});
+ ProjectionTree wildcard_projection_tree = ProjectionTree(
+ {std::string(SchemaStore::kSchemaTypeWildcard), {"wild.card"}});
+
EXPECT_THAT(result_adjustment_info.projection_tree_map,
UnorderedElementsAre(
- Pair("Email", ProjectionTree(*email_type_property_mask)),
- Pair("Phone", ProjectionTree(*phone_type_property_mask)),
- Pair(std::string(ProjectionTree::kSchemaTypeWildcard),
- ProjectionTree(*wildcard_type_property_mask))));
+ Pair("Email", AnyOf(email_projection_tree,
+ alternative_email_projection_tree)),
+ Pair("Phone", phone_projection_tree),
+ Pair(std::string(SchemaStore::kSchemaTypeWildcard),
+ wildcard_projection_tree)));
}
} // namespace
diff --git a/icing/result/result-retriever-v2.cc b/icing/result/result-retriever-v2.cc
index a617f45..c7a8fcd 100644
--- a/icing/result/result-retriever-v2.cc
+++ b/icing/result/result-retriever-v2.cc
@@ -14,12 +14,16 @@
#include "icing/result/result-retriever-v2.h"
+#include <cstddef>
+#include <cstdint>
#include <memory>
+#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/mutex.h"
#include "icing/proto/document.pb.h"
#include "icing/proto/search.pb.h"
#include "icing/result/page-result.h"
@@ -29,12 +33,15 @@
#include "icing/result/result-state-v2.h"
#include "icing/result/snippet-context.h"
#include "icing/result/snippet-retriever.h"
+#include "icing/schema/schema-store.h"
#include "icing/schema/section.h"
#include "icing/scoring/scored-document-hit.h"
+#include "icing/store/document-filter-data.h"
#include "icing/store/document-store.h"
#include "icing/store/namespace-id.h"
#include "icing/tokenization/language-segmenter.h"
#include "icing/transform/normalizer.h"
+#include "icing/util/logging.h"
#include "icing/util/status-macros.h"
namespace icing {
@@ -54,7 +61,7 @@ void ApplyProjection(const ResultAdjustmentInfo* adjustment_info,
} else {
auto wildcard_projection_tree_itr =
adjustment_info->projection_tree_map.find(
- std::string(ProjectionTree::kSchemaTypeWildcard));
+ std::string(SchemaStore::kSchemaTypeWildcard));
if (wildcard_projection_tree_itr !=
adjustment_info->projection_tree_map.end()) {
projector::Project(wildcard_projection_tree_itr->second.root().children,
@@ -199,6 +206,11 @@ std::pair<PageResult, bool> ResultRetrieverV2::RetrieveNextPage(
// Retrieve child documents
for (const ScoredDocumentHit& child_scored_document_hit :
next_best_document_hit.child_scored_document_hits()) {
+ if (result.joined_results_size() >=
+ result_state.max_joined_children_per_parent_to_return()) {
+ break;
+ }
+
libtextclassifier3::StatusOr<DocumentProto> child_document_or =
doc_store_.Get(child_scored_document_hit.document_id());
if (!child_document_or.ok()) {
diff --git a/icing/result/result-retriever-v2.h b/icing/result/result-retriever-v2.h
index 48fb88d..0499ae1 100644
--- a/icing/result/result-retriever-v2.h
+++ b/icing/result/result-retriever-v2.h
@@ -15,19 +15,20 @@
#ifndef ICING_RESULT_RETRIEVER_V2_H_
#define ICING_RESULT_RETRIEVER_V2_H_
+#include <cstdint>
#include <memory>
#include <unordered_map>
#include <utility>
#include <vector>
#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/proto/search.pb.h"
#include "icing/result/page-result.h"
#include "icing/result/result-state-v2.h"
#include "icing/result/snippet-retriever.h"
#include "icing/schema/schema-store.h"
#include "icing/scoring/scored-document-hit.h"
#include "icing/store/document-store.h"
-#include "icing/store/namespace-id.h"
#include "icing/tokenization/language-segmenter.h"
#include "icing/transform/normalizer.h"
diff --git a/icing/result/result-retriever-v2_group-result-limiter_test.cc b/icing/result/result-retriever-v2_group-result-limiter_test.cc
index d4aaa38..c9e0587 100644
--- a/icing/result/result-retriever-v2_group-result-limiter_test.cc
+++ b/icing/result/result-retriever-v2_group-result-limiter_test.cc
@@ -83,7 +83,9 @@ class ResultRetrieverV2GroupResultLimiterTest : public testing::Test {
schema.add_types()->set_schema_type("Document");
schema.add_types()->set_schema_type("Message");
schema.add_types()->set_schema_type("Person");
- ICING_ASSERT_OK(schema_store_->SetSchema(std::move(schema)));
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ std::move(schema), /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
diff --git a/icing/result/result-retriever-v2_projection_test.cc b/icing/result/result-retriever-v2_projection_test.cc
index 94580d4..377e14c 100644
--- a/icing/result/result-retriever-v2_projection_test.cc
+++ b/icing/result/result-retriever-v2_projection_test.cc
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+#include <limits>
#include <memory>
#include <vector>
@@ -109,8 +110,77 @@ class ResultRetrieverV2ProjectionTest : public testing::Test {
.SetDataTypeString(TERM_MATCH_PREFIX,
TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("Artist")
+ .AddParentType("Person")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emailAddress")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("Musician")
+ .AddParentType("Artist")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emailAddress")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("WithPhone")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("phoneNumber")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("phoneModel")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("PersonWithPhone")
+ .AddParentType("Person")
+ .AddParentType("WithPhone")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emailAddress")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("phoneNumber")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("phoneModel")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build())
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
@@ -232,7 +302,7 @@ TEST_F(ResultRetrieverV2ProjectionTest, ProjectionTopLevelLeadNodeFieldPath) {
std::make_unique<ResultAdjustmentInfo>(
CreateSearchSpec(TermMatchType::EXACT_ONLY),
CreateScoringSpec(/*is_descending_order=*/false), result_spec,
- SectionRestrictQueryTermsMap()),
+ schema_store_.get(), SectionRestrictQueryTermsMap()),
/*child_adjustment_info=*/nullptr, result_spec, *document_store_);
ICING_ASSERT_OK_AND_ASSIGN(
@@ -330,7 +400,7 @@ TEST_F(ResultRetrieverV2ProjectionTest, ProjectionNestedLeafNodeFieldPath) {
std::make_unique<ResultAdjustmentInfo>(
CreateSearchSpec(TermMatchType::EXACT_ONLY),
CreateScoringSpec(/*is_descending_order=*/false), result_spec,
- SectionRestrictQueryTermsMap()),
+ schema_store_.get(), SectionRestrictQueryTermsMap()),
/*child_adjustment_info=*/nullptr, result_spec, *document_store_);
ICING_ASSERT_OK_AND_ASSIGN(
@@ -439,7 +509,7 @@ TEST_F(ResultRetrieverV2ProjectionTest, ProjectionIntermediateNodeFieldPath) {
std::make_unique<ResultAdjustmentInfo>(
CreateSearchSpec(TermMatchType::EXACT_ONLY),
CreateScoringSpec(/*is_descending_order=*/false), result_spec,
- SectionRestrictQueryTermsMap()),
+ schema_store_.get(), SectionRestrictQueryTermsMap()),
/*child_adjustment_info=*/nullptr, result_spec, *document_store_);
ICING_ASSERT_OK_AND_ASSIGN(
@@ -552,7 +622,7 @@ TEST_F(ResultRetrieverV2ProjectionTest, ProjectionMultipleNestedFieldPaths) {
std::make_unique<ResultAdjustmentInfo>(
CreateSearchSpec(TermMatchType::EXACT_ONLY),
CreateScoringSpec(/*is_descending_order=*/false), result_spec,
- SectionRestrictQueryTermsMap()),
+ schema_store_.get(), SectionRestrictQueryTermsMap()),
/*child_adjustment_info=*/nullptr, result_spec, *document_store_);
ICING_ASSERT_OK_AND_ASSIGN(
@@ -648,7 +718,7 @@ TEST_F(ResultRetrieverV2ProjectionTest, ProjectionEmptyFieldPath) {
std::make_unique<ResultAdjustmentInfo>(
CreateSearchSpec(TermMatchType::EXACT_ONLY),
CreateScoringSpec(/*is_descending_order=*/false), result_spec,
- SectionRestrictQueryTermsMap()),
+ schema_store_.get(), SectionRestrictQueryTermsMap()),
/*child_adjustment_info=*/nullptr, result_spec, *document_store_);
ICING_ASSERT_OK_AND_ASSIGN(
@@ -727,7 +797,7 @@ TEST_F(ResultRetrieverV2ProjectionTest, ProjectionInvalidFieldPath) {
std::make_unique<ResultAdjustmentInfo>(
CreateSearchSpec(TermMatchType::EXACT_ONLY),
CreateScoringSpec(/*is_descending_order=*/false), result_spec,
- SectionRestrictQueryTermsMap()),
+ schema_store_.get(), SectionRestrictQueryTermsMap()),
/*child_adjustment_info=*/nullptr, result_spec, *document_store_);
ICING_ASSERT_OK_AND_ASSIGN(
@@ -807,7 +877,7 @@ TEST_F(ResultRetrieverV2ProjectionTest, ProjectionValidAndInvalidFieldPath) {
std::make_unique<ResultAdjustmentInfo>(
CreateSearchSpec(TermMatchType::EXACT_ONLY),
CreateScoringSpec(/*is_descending_order=*/false), result_spec,
- SectionRestrictQueryTermsMap()),
+ schema_store_.get(), SectionRestrictQueryTermsMap()),
/*child_adjustment_info=*/nullptr, result_spec, *document_store_);
ICING_ASSERT_OK_AND_ASSIGN(
@@ -889,7 +959,7 @@ TEST_F(ResultRetrieverV2ProjectionTest, ProjectionMultipleTypesNoWildcards) {
std::make_unique<ResultAdjustmentInfo>(
CreateSearchSpec(TermMatchType::EXACT_ONLY),
CreateScoringSpec(/*is_descending_order=*/false), result_spec,
- SectionRestrictQueryTermsMap()),
+ schema_store_.get(), SectionRestrictQueryTermsMap()),
/*child_adjustment_info=*/nullptr, result_spec, *document_store_);
ICING_ASSERT_OK_AND_ASSIGN(
@@ -963,7 +1033,7 @@ TEST_F(ResultRetrieverV2ProjectionTest, ProjectionMultipleTypesWildcard) {
TypePropertyMask* wildcard_type_property_mask =
result_spec.add_type_property_masks();
wildcard_type_property_mask->set_schema_type(
- std::string(ProjectionTree::kSchemaTypeWildcard));
+ std::string(SchemaStore::kSchemaTypeWildcard));
wildcard_type_property_mask->add_paths("name");
// 4. Create ResultState with custom ResultSpec.
@@ -975,7 +1045,7 @@ TEST_F(ResultRetrieverV2ProjectionTest, ProjectionMultipleTypesWildcard) {
std::make_unique<ResultAdjustmentInfo>(
CreateSearchSpec(TermMatchType::EXACT_ONLY),
CreateScoringSpec(/*is_descending_order=*/false), result_spec,
- SectionRestrictQueryTermsMap()),
+ schema_store_.get(), SectionRestrictQueryTermsMap()),
/*child_adjustment_info=*/nullptr, result_spec, *document_store_);
ICING_ASSERT_OK_AND_ASSIGN(
@@ -1053,7 +1123,7 @@ TEST_F(ResultRetrieverV2ProjectionTest,
TypePropertyMask* wildcard_type_property_mask =
result_spec.add_type_property_masks();
wildcard_type_property_mask->set_schema_type(
- std::string(ProjectionTree::kSchemaTypeWildcard));
+ std::string(SchemaStore::kSchemaTypeWildcard));
wildcard_type_property_mask->add_paths("name");
// 4. Create ResultState with custom ResultSpec.
@@ -1065,7 +1135,7 @@ TEST_F(ResultRetrieverV2ProjectionTest,
std::make_unique<ResultAdjustmentInfo>(
CreateSearchSpec(TermMatchType::EXACT_ONLY),
CreateScoringSpec(/*is_descending_order=*/false), result_spec,
- SectionRestrictQueryTermsMap()),
+ schema_store_.get(), SectionRestrictQueryTermsMap()),
/*child_adjustment_info=*/nullptr, result_spec, *document_store_);
ICING_ASSERT_OK_AND_ASSIGN(
@@ -1152,7 +1222,7 @@ TEST_F(ResultRetrieverV2ProjectionTest,
TypePropertyMask* wildcard_type_property_mask =
result_spec.add_type_property_masks();
wildcard_type_property_mask->set_schema_type(
- std::string(ProjectionTree::kSchemaTypeWildcard));
+ std::string(SchemaStore::kSchemaTypeWildcard));
wildcard_type_property_mask->add_paths("name");
// 4. Create ResultState with custom ResultSpec.
@@ -1164,7 +1234,7 @@ TEST_F(ResultRetrieverV2ProjectionTest,
std::make_unique<ResultAdjustmentInfo>(
CreateSearchSpec(TermMatchType::EXACT_ONLY),
CreateScoringSpec(/*is_descending_order=*/false), result_spec,
- SectionRestrictQueryTermsMap()),
+ schema_store_.get(), SectionRestrictQueryTermsMap()),
/*child_adjustment_info=*/nullptr, result_spec, *document_store_);
ICING_ASSERT_OK_AND_ASSIGN(
@@ -1255,7 +1325,7 @@ TEST_F(ResultRetrieverV2ProjectionTest,
TypePropertyMask* wildcard_type_property_mask =
result_spec.add_type_property_masks();
wildcard_type_property_mask->set_schema_type(
- std::string(ProjectionTree::kSchemaTypeWildcard));
+ std::string(SchemaStore::kSchemaTypeWildcard));
wildcard_type_property_mask->add_paths("sender");
// 4. Create ResultState with custom ResultSpec.
@@ -1267,7 +1337,7 @@ TEST_F(ResultRetrieverV2ProjectionTest,
std::make_unique<ResultAdjustmentInfo>(
CreateSearchSpec(TermMatchType::EXACT_ONLY),
CreateScoringSpec(/*is_descending_order=*/false), result_spec,
- SectionRestrictQueryTermsMap()),
+ schema_store_.get(), SectionRestrictQueryTermsMap()),
/*child_adjustment_info=*/nullptr, result_spec, *document_store_);
ICING_ASSERT_OK_AND_ASSIGN(
@@ -1369,6 +1439,8 @@ TEST_F(ResultRetrieverV2ProjectionTest, ProjectionJoinDocuments) {
// 4. Create parent ResultSpec with type property mask.
ResultSpecProto parent_result_spec = CreateResultSpec(/*num_per_page=*/2);
+ parent_result_spec.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int>::max());
TypePropertyMask* type_property_mask =
parent_result_spec.add_type_property_masks();
type_property_mask->set_schema_type("Person");
@@ -1389,12 +1461,12 @@ TEST_F(ResultRetrieverV2ProjectionTest, ProjectionJoinDocuments) {
std::make_unique<ResultAdjustmentInfo>(
CreateSearchSpec(TermMatchType::EXACT_ONLY),
CreateScoringSpec(/*is_descending_order=*/false), parent_result_spec,
- SectionRestrictQueryTermsMap()),
+ schema_store_.get(), SectionRestrictQueryTermsMap()),
/*child_adjustment_info=*/
std::make_unique<ResultAdjustmentInfo>(
CreateSearchSpec(TermMatchType::EXACT_ONLY),
CreateScoringSpec(/*is_descending_order=*/false), child_result_spec,
- SectionRestrictQueryTermsMap()),
+ schema_store_.get(), SectionRestrictQueryTermsMap()),
parent_result_spec, *document_store_);
ICING_ASSERT_OK_AND_ASSIGN(
@@ -1446,6 +1518,384 @@ TEST_F(ResultRetrieverV2ProjectionTest, ProjectionJoinDocuments) {
EqualsProto(projected_email_document2));
}
+TEST_F(ResultRetrieverV2ProjectionTest, ProjectionPolymorphism) {
+ // 1. Add two documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .AddStringProperty("name", "Joe Fox")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ document_store_->Put(document_one));
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Artist")
+ .AddStringProperty("name", "Joe Artist")
+ .AddStringProperty("emailAddress", "artist@aol.com")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ document_store_->Put(document_two));
+
+ // 2. Setup the scored results.
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id1, kSectionIdMaskAll, /*score=*/0},
+ {document_id2, kSectionIdMaskAll, /*score=*/0}};
+
+ // 3. Create a ResultSpec with type property mask.
+ ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2);
+ // Since Artist is a child type of Person, the TypePropertyMask for Person
+ // also applies to Artist.
+ TypePropertyMask* person_type_property_mask =
+ result_spec.add_type_property_masks();
+ person_type_property_mask->set_schema_type("Person");
+ person_type_property_mask->add_paths("name");
+
+ // 4. Create ResultState with custom ResultSpec.
+ ResultStateV2 result_state(
+ std::make_unique<
+ PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+ std::move(scored_document_hits), /*is_descending=*/false),
+ /*parent_adjustment_info=*/
+ std::make_unique<ResultAdjustmentInfo>(
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+ schema_store_.get(), SectionRestrictQueryTermsMap()),
+ /*child_adjustment_info=*/nullptr, result_spec, *document_store_);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetrieverV2> result_retriever,
+ ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // 5. Verify that the returned Person and Artist results only contain the
+ // 'name' property.
+ PageResult page_result =
+ result_retriever->RetrieveNextPage(result_state).first;
+ ASSERT_THAT(page_result.results, SizeIs(2));
+
+ DocumentProto projected_document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .AddStringProperty("name", "Joe Fox")
+ .Build();
+ EXPECT_THAT(page_result.results.at(0).document(),
+ EqualsProto(projected_document_one));
+
+ DocumentProto projected_document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Artist")
+ .AddStringProperty("name", "Joe Artist")
+ .Build();
+ EXPECT_THAT(page_result.results.at(1).document(),
+ EqualsProto(projected_document_two));
+}
+
+TEST_F(ResultRetrieverV2ProjectionTest, ProjectionTransitivePolymorphism) {
+ // 1. Add two documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .AddStringProperty("name", "Joe Fox")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ document_store_->Put(document_one));
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Musician")
+ .AddStringProperty("name", "Joe Musician")
+ .AddStringProperty("emailAddress", "Musician@aol.com")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ document_store_->Put(document_two));
+
+ // 2. Setup the scored results.
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id1, kSectionIdMaskAll, /*score=*/0},
+ {document_id2, kSectionIdMaskAll, /*score=*/0}};
+
+ // 3. Create a ResultSpec with type property mask.
+ ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2);
+ // Since Musician is a transitive child type of Person, the TypePropertyMask
+ // for Person also applies to Musician.
+ TypePropertyMask* person_type_property_mask =
+ result_spec.add_type_property_masks();
+ person_type_property_mask->set_schema_type("Person");
+ person_type_property_mask->add_paths("name");
+
+ // 4. Create ResultState with custom ResultSpec.
+ ResultStateV2 result_state(
+ std::make_unique<
+ PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+ std::move(scored_document_hits), /*is_descending=*/false),
+ /*parent_adjustment_info=*/
+ std::make_unique<ResultAdjustmentInfo>(
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+ schema_store_.get(), SectionRestrictQueryTermsMap()),
+ /*child_adjustment_info=*/nullptr, result_spec, *document_store_);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetrieverV2> result_retriever,
+ ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // 5. Verify that the returned Person and Musician results only contain the
+ // 'name' property.
+ PageResult page_result =
+ result_retriever->RetrieveNextPage(result_state).first;
+ ASSERT_THAT(page_result.results, SizeIs(2));
+
+ DocumentProto projected_document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .AddStringProperty("name", "Joe Fox")
+ .Build();
+ EXPECT_THAT(page_result.results.at(0).document(),
+ EqualsProto(projected_document_one));
+
+ DocumentProto projected_document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Musician")
+ .AddStringProperty("name", "Joe Musician")
+ .Build();
+ EXPECT_THAT(page_result.results.at(1).document(),
+ EqualsProto(projected_document_two));
+}
+
+TEST_F(ResultRetrieverV2ProjectionTest,
+ ProjectionPolymorphismChildMissingProperty) {
+ // 1. Add an artist document with missing 'emailAddress', which is allowed
+ // since 'emailAddress' in the parent type 'Person' is defined as optional.
+ DocumentProto document = DocumentBuilder()
+ .SetKey("namespace", "uri")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Artist")
+ .AddStringProperty("name", "Joe Artist")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+ document_store_->Put(document));
+
+ // 2. Setup the scored results.
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id, kSectionIdMaskAll, /*score=*/0}};
+
+ // 3. Create a ResultSpec with type property mask for the missing property
+ // 'emailAddress' in the Person type. Since Artist is a child type of Person,
+ // the TypePropertyMask for Person also applies to Artist.
+ ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2);
+ TypePropertyMask* person_type_property_mask =
+ result_spec.add_type_property_masks();
+ person_type_property_mask->set_schema_type("Person");
+ person_type_property_mask->add_paths("emailAddress");
+
+ // 4. Create ResultState with custom ResultSpec.
+ ResultStateV2 result_state(
+ std::make_unique<
+ PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+ std::move(scored_document_hits), /*is_descending=*/false),
+ /*parent_adjustment_info=*/
+ std::make_unique<ResultAdjustmentInfo>(
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+ schema_store_.get(), SectionRestrictQueryTermsMap()),
+ /*child_adjustment_info=*/nullptr, result_spec, *document_store_);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetrieverV2> result_retriever,
+ ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // 5. Verify that the returned person document does not contain any property,
+ // since 'emailAddress' is missing.
+ PageResult page_result =
+ result_retriever->RetrieveNextPage(result_state).first;
+ ASSERT_THAT(page_result.results, SizeIs(1));
+ DocumentProto projected_document = DocumentBuilder()
+ .SetKey("namespace", "uri")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Artist")
+ .Build();
+ EXPECT_THAT(page_result.results.at(0).document(),
+ EqualsProto(projected_document));
+}
+
+TEST_F(ResultRetrieverV2ProjectionTest, ProjectionPolymorphismMerge) {
+ // 1. Add two documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .AddStringProperty("name", "Joe Fox")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ document_store_->Put(document_one));
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Artist")
+ .AddStringProperty("name", "Joe Artist")
+ .AddStringProperty("emailAddress", "artist@aol.com")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ document_store_->Put(document_two));
+
+ // 2. Setup the scored results.
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id1, kSectionIdMaskAll, /*score=*/0},
+ {document_id2, kSectionIdMaskAll, /*score=*/0}};
+
+ // 3. Create a ResultSpec with type property mask.
+ ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2);
+ TypePropertyMask* person_type_property_mask =
+ result_spec.add_type_property_masks();
+ person_type_property_mask->set_schema_type("Person");
+ person_type_property_mask->add_paths("name");
+ // Since Artist is a child type of Person, the TypePropertyMask for Person
+ // will be merged to Artist's TypePropertyMask by polymorphism, so that 'name'
+ // will also show in Artist's projection results.
+ TypePropertyMask* artist_type_property_mask =
+ result_spec.add_type_property_masks();
+ artist_type_property_mask->set_schema_type("Artist");
+ artist_type_property_mask->add_paths("emailAddress");
+
+ // 4. Create ResultState with custom ResultSpec.
+ ResultStateV2 result_state(
+ std::make_unique<
+ PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+ std::move(scored_document_hits), /*is_descending=*/false),
+ /*parent_adjustment_info=*/
+ std::make_unique<ResultAdjustmentInfo>(
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+ schema_store_.get(), SectionRestrictQueryTermsMap()),
+ /*child_adjustment_info=*/nullptr, result_spec, *document_store_);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetrieverV2> result_retriever,
+ ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // 5. Verify that the returned Person results only contain the 'name'
+ // property and the returned Artist results contain both the 'name' and
+ // 'emailAddress' properties.
+ PageResult page_result =
+ result_retriever->RetrieveNextPage(result_state).first;
+ ASSERT_THAT(page_result.results, SizeIs(2));
+
+ DocumentProto projected_document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .AddStringProperty("name", "Joe Fox")
+ .Build();
+ EXPECT_THAT(page_result.results.at(0).document(),
+ EqualsProto(projected_document_one));
+
+ DocumentProto projected_document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Artist")
+ .AddStringProperty("name", "Joe Artist")
+ .AddStringProperty("emailAddress", "artist@aol.com")
+ .Build();
+ EXPECT_THAT(page_result.results.at(1).document(),
+ EqualsProto(projected_document_two));
+}
+
+TEST_F(ResultRetrieverV2ProjectionTest, ProjectionMultipleParentPolymorphism) {
+ // 1. Add a document
+ DocumentProto document = DocumentBuilder()
+ .SetKey("namespace", "uri")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("PersonWithPhone")
+ .AddStringProperty("name", "name")
+ .AddStringProperty("emailAddress", "email")
+ .AddStringProperty("phoneNumber", "12345")
+ .AddStringProperty("phoneModel", "pixel")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+ document_store_->Put(document));
+
+ // 2. Setup the scored results.
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id, kSectionIdMaskAll, /*score=*/0}};
+
+ // 3. Create a ResultSpec with type property mask.
+ ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/1);
+ // Since PersonWithPhone is a child type of Person, the TypePropertyMask
+ // also applies to PersonWithPhone.
+ TypePropertyMask* person_type_property_mask =
+ result_spec.add_type_property_masks();
+ person_type_property_mask->set_schema_type("Person");
+ person_type_property_mask->add_paths("name");
+ // Since PersonWithPhone is a child type of WithPhone, the
+ // TypePropertyMask also applies to PersonWithPhone.
+ TypePropertyMask* with_phone_type_property_mask =
+ result_spec.add_type_property_masks();
+ with_phone_type_property_mask->set_schema_type("WithPhone");
+ with_phone_type_property_mask->add_paths("phoneNumber");
+
+ // 4. Create ResultState with custom ResultSpec.
+ ResultStateV2 result_state(
+ std::make_unique<
+ PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
+ std::move(scored_document_hits), /*is_descending=*/false),
+ /*parent_adjustment_info=*/
+ std::make_unique<ResultAdjustmentInfo>(
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+ schema_store_.get(), SectionRestrictQueryTermsMap()),
+ /*child_adjustment_info=*/nullptr, result_spec, *document_store_);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetrieverV2> result_retriever,
+ ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // 5. Verify that the returned document only contains the 'name' and the
+ // 'phoneNumber' property.
+ PageResult page_result =
+ result_retriever->RetrieveNextPage(result_state).first;
+ ASSERT_THAT(page_result.results, SizeIs(1));
+
+ DocumentProto projected_document =
+ DocumentBuilder()
+ .SetKey("namespace", "uri")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("PersonWithPhone")
+ .AddStringProperty("name", "name")
+ .AddStringProperty("phoneNumber", "12345")
+ .Build();
+ EXPECT_THAT(page_result.results.at(0).document(),
+ EqualsProto(projected_document));
+}
+
} // namespace
} // namespace lib
diff --git a/icing/result/result-retriever-v2_snippet_test.cc b/icing/result/result-retriever-v2_snippet_test.cc
index 3dce0ef..b2ba8f7 100644
--- a/icing/result/result-retriever-v2_snippet_test.cc
+++ b/icing/result/result-retriever-v2_snippet_test.cc
@@ -102,7 +102,10 @@ class ResultRetrieverV2SnippetTest : public testing::Test {
.SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
@@ -236,7 +239,7 @@ TEST_F(ResultRetrieverV2SnippetTest,
std::make_unique<ResultAdjustmentInfo>(
CreateSearchSpec(TermMatchType::EXACT_ONLY),
CreateScoringSpec(/*is_descending_order=*/true), result_spec,
- SectionRestrictQueryTermsMap()),
+ schema_store_.get(), SectionRestrictQueryTermsMap()),
/*child_adjustment_info=*/nullptr, result_spec, *document_store_);
PageResult page_result =
result_retriever->RetrieveNextPage(result_state).first;
@@ -285,6 +288,7 @@ TEST_F(ResultRetrieverV2SnippetTest, SimpleSnippeted) {
std::make_unique<ResultAdjustmentInfo>(
CreateSearchSpec(TermMatchType::EXACT_ONLY),
CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+ schema_store_.get(),
SectionRestrictQueryTermsMap({{"", {"foo", "bar"}}})),
/*child_adjustment_info=*/nullptr, result_spec, *document_store_);
@@ -393,6 +397,7 @@ TEST_F(ResultRetrieverV2SnippetTest, OnlyOneDocumentSnippeted) {
std::make_unique<ResultAdjustmentInfo>(
CreateSearchSpec(TermMatchType::EXACT_ONLY),
CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+ schema_store_.get(),
SectionRestrictQueryTermsMap({{"", {"foo", "bar"}}})),
/*child_adjustment_info=*/nullptr, result_spec, *document_store_);
@@ -468,6 +473,7 @@ TEST_F(ResultRetrieverV2SnippetTest, ShouldSnippetAllResults) {
std::make_unique<ResultAdjustmentInfo>(
CreateSearchSpec(TermMatchType::EXACT_ONLY),
CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+ schema_store_.get(),
SectionRestrictQueryTermsMap({{"", {"foo", "bar"}}})),
/*child_adjustment_info=*/nullptr, result_spec, *document_store_);
@@ -520,6 +526,7 @@ TEST_F(ResultRetrieverV2SnippetTest, ShouldSnippetSomeResults) {
std::make_unique<ResultAdjustmentInfo>(
CreateSearchSpec(TermMatchType::EXACT_ONLY),
CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+ schema_store_.get(),
SectionRestrictQueryTermsMap({{"", {"foo", "bar"}}})),
/*child_adjustment_info=*/nullptr, result_spec, *document_store_);
{
@@ -575,6 +582,7 @@ TEST_F(ResultRetrieverV2SnippetTest, ShouldNotSnippetAnyResults) {
std::make_unique<ResultAdjustmentInfo>(
CreateSearchSpec(TermMatchType::EXACT_ONLY),
CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+ schema_store_.get(),
SectionRestrictQueryTermsMap({{"", {"foo", "bar"}}})),
/*child_adjustment_info=*/nullptr, result_spec, *document_store_);
{
@@ -632,6 +640,7 @@ TEST_F(ResultRetrieverV2SnippetTest,
std::make_unique<ResultAdjustmentInfo>(
CreateSearchSpec(TermMatchType::EXACT_ONLY),
CreateScoringSpec(/*is_descending_order=*/false), result_spec,
+ schema_store_.get(),
SectionRestrictQueryTermsMap({{"", {"foo", "bar"}}})),
/*child_adjustment_info=*/nullptr, result_spec, *document_store_);
@@ -718,6 +727,8 @@ TEST_F(ResultRetrieverV2SnippetTest, JoinSnippeted) {
// Create parent ResultSpec with custom snippet spec.
ResultSpecProto parent_result_spec = CreateResultSpec(/*num_per_page=*/3);
+ parent_result_spec.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
*parent_result_spec.mutable_snippet_spec() = CreateSnippetSpec();
// Create child ResultSpec with custom snippet spec.
@@ -735,11 +746,13 @@ TEST_F(ResultRetrieverV2SnippetTest, JoinSnippeted) {
std::make_unique<ResultAdjustmentInfo>(
CreateSearchSpec(TermMatchType::EXACT_ONLY),
CreateScoringSpec(/*is_descending_order=*/false), parent_result_spec,
+ schema_store_.get(),
SectionRestrictQueryTermsMap({{"", {"person"}}})),
/*child_adjustment_info=*/
std::make_unique<ResultAdjustmentInfo>(
CreateSearchSpec(TermMatchType::EXACT_ONLY),
CreateScoringSpec(/*is_descending_order=*/false), child_result_spec,
+ schema_store_.get(),
SectionRestrictQueryTermsMap({{"", {"foo", "bar"}}})),
parent_result_spec, *document_store_);
@@ -939,6 +952,8 @@ TEST_F(ResultRetrieverV2SnippetTest, ShouldSnippetAllJoinedResults) {
ResultSpecProto::SnippetSpecProto parent_snippet_spec = CreateSnippetSpec();
parent_snippet_spec.set_num_to_snippet(1);
ResultSpecProto parent_result_spec = CreateResultSpec(/*num_per_page=*/3);
+ parent_result_spec.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
*parent_result_spec.mutable_snippet_spec() = std::move(parent_snippet_spec);
// Create child ResultSpec with custom snippet spec.
@@ -957,11 +972,13 @@ TEST_F(ResultRetrieverV2SnippetTest, ShouldSnippetAllJoinedResults) {
std::make_unique<ResultAdjustmentInfo>(
CreateSearchSpec(TermMatchType::EXACT_ONLY),
CreateScoringSpec(/*is_descending_order=*/false), parent_result_spec,
+ schema_store_.get(),
SectionRestrictQueryTermsMap({{"", {"person"}}})),
/*child_adjustment_info=*/
std::make_unique<ResultAdjustmentInfo>(
CreateSearchSpec(TermMatchType::EXACT_ONLY),
CreateScoringSpec(/*is_descending_order=*/false), child_result_spec,
+ schema_store_.get(),
SectionRestrictQueryTermsMap({{"", {"foo", "bar"}}})),
parent_result_spec, *document_store_);
@@ -1051,6 +1068,8 @@ TEST_F(ResultRetrieverV2SnippetTest, ShouldSnippetSomeJoinedResults) {
ResultSpecProto::SnippetSpecProto parent_snippet_spec = CreateSnippetSpec();
parent_snippet_spec.set_num_to_snippet(3);
ResultSpecProto parent_result_spec = CreateResultSpec(/*num_per_page=*/3);
+ parent_result_spec.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
*parent_result_spec.mutable_snippet_spec() = std::move(parent_snippet_spec);
// Create child ResultSpec with custom snippet spec.
@@ -1069,11 +1088,13 @@ TEST_F(ResultRetrieverV2SnippetTest, ShouldSnippetSomeJoinedResults) {
std::make_unique<ResultAdjustmentInfo>(
CreateSearchSpec(TermMatchType::EXACT_ONLY),
CreateScoringSpec(/*is_descending_order=*/false), parent_result_spec,
+ schema_store_.get(),
SectionRestrictQueryTermsMap({{"", {"person"}}})),
/*child_adjustment_info=*/
std::make_unique<ResultAdjustmentInfo>(
CreateSearchSpec(TermMatchType::EXACT_ONLY),
CreateScoringSpec(/*is_descending_order=*/false), child_result_spec,
+ schema_store_.get(),
SectionRestrictQueryTermsMap({{"", {"foo", "bar"}}})),
parent_result_spec, *document_store_);
diff --git a/icing/result/result-retriever-v2_test.cc b/icing/result/result-retriever-v2_test.cc
index 462d535..411562b 100644
--- a/icing/result/result-retriever-v2_test.cc
+++ b/icing/result/result-retriever-v2_test.cc
@@ -15,17 +15,27 @@
#include "icing/result/result-retriever-v2.h"
#include <atomic>
+#include <cstddef>
+#include <cstdint>
#include <memory>
+#include <string>
#include <unordered_map>
+#include <utility>
#include <vector>
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
+#include "icing/absl_ports/mutex.h"
#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
#include "icing/file/mock-filesystem.h"
+#include "icing/file/portable-file-backed-proto-log.h"
#include "icing/portable/equals-proto.h"
#include "icing/portable/platform.h"
#include "icing/proto/document.pb.h"
+#include "icing/proto/document_wrapper.pb.h"
#include "icing/proto/schema.pb.h"
#include "icing/proto/search.pb.h"
#include "icing/result/page-result.h"
@@ -35,15 +45,19 @@
#include "icing/schema/section.h"
#include "icing/scoring/priority-queue-scored-document-hits-ranker.h"
#include "icing/scoring/scored-document-hit.h"
+#include "icing/store/document-filter-data.h"
#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/fake-clock.h"
#include "icing/testing/icu-data-file-helper.h"
#include "icing/testing/test-data.h"
#include "icing/testing/tmp-directory.h"
#include "icing/tokenization/language-segmenter-factory.h"
+#include "icing/tokenization/language-segmenter.h"
#include "icing/transform/normalizer-factory.h"
#include "icing/transform/normalizer.h"
+#include "icing/util/clock.h"
#include "unicode/uloc.h"
namespace icing {
@@ -134,7 +148,10 @@ class ResultRetrieverV2Test : public ::testing::Test {
TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
num_total_hits_ = 0;
}
@@ -394,6 +411,159 @@ TEST_F(ResultRetrieverV2Test, ShouldIgnoreNonInternalErrors) {
ElementsAre(EqualsProto(result1), EqualsProto(result2)));
}
+TEST_F(ResultRetrieverV2Test,
+ ShouldLimitNumChildDocumentsByMaxJoinedChildPerParent) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ // 1. Add 2 Person document
+ DocumentProto person_document1 =
+ DocumentBuilder()
+ .SetKey("namespace", "Person/1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .AddStringProperty("name", "Joe Fox")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId person_document_id1,
+ doc_store->Put(person_document1));
+
+ DocumentProto person_document2 =
+ DocumentBuilder()
+ .SetKey("namespace", "Person/2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .AddStringProperty("name", "Meg Ryan")
+ .AddStringProperty("emailAddress", "shopgirl@aol.com")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId person_document_id2,
+ doc_store->Put(person_document2));
+
+ // 2. Add 4 Email documents
+ DocumentProto email_document1 = DocumentBuilder()
+ .SetKey("namespace", "Email/1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("name", "Test 1")
+ .AddStringProperty("body", "Test 1")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id1,
+ doc_store->Put(email_document1));
+
+ DocumentProto email_document2 = DocumentBuilder()
+ .SetKey("namespace", "Email/2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("name", "Test 2")
+ .AddStringProperty("body", "Test 2")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id2,
+ doc_store->Put(email_document2));
+
+ DocumentProto email_document3 = DocumentBuilder()
+ .SetKey("namespace", "Email/3")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("name", "Test 3")
+ .AddStringProperty("body", "Test 3")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id3,
+ doc_store->Put(email_document3));
+
+ DocumentProto email_document4 = DocumentBuilder()
+ .SetKey("namespace", "Email/4")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("name", "Test 4")
+ .AddStringProperty("body", "Test 4")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id4,
+ doc_store->Put(email_document4));
+
+ // 3. Setup the joined scored results.
+ std::vector<SectionId> person_hit_section_ids = {
+ GetSectionId("Person", "name")};
+ std::vector<SectionId> email_hit_section_ids = {
+ GetSectionId("Email", "name"), GetSectionId("Email", "body")};
+ SectionIdMask person_hit_section_id_mask =
+ CreateSectionIdMask(person_hit_section_ids);
+ SectionIdMask email_hit_section_id_mask =
+ CreateSectionIdMask(email_hit_section_ids);
+
+ ScoredDocumentHit person1_scored_doc_hit(
+ person_document_id1, person_hit_section_id_mask, /*score=*/1);
+ ScoredDocumentHit person2_scored_doc_hit(
+ person_document_id2, person_hit_section_id_mask, /*score=*/2);
+ ScoredDocumentHit email1_scored_doc_hit(
+ email_document_id1, email_hit_section_id_mask, /*score=*/3);
+ ScoredDocumentHit email2_scored_doc_hit(
+ email_document_id2, email_hit_section_id_mask, /*score=*/4);
+ ScoredDocumentHit email3_scored_doc_hit(
+ email_document_id3, email_hit_section_id_mask, /*score=*/5);
+ ScoredDocumentHit email4_scored_doc_hit(
+ email_document_id4, email_hit_section_id_mask, /*score=*/6);
+ // Create JoinedScoredDocumentHits mapping:
+ // - Person1 to Email1
+ // - Person2 to Email2, Email3, Email4
+ std::vector<JoinedScoredDocumentHit> joined_scored_document_hits = {
+ JoinedScoredDocumentHit(
+ /*final_score=*/1,
+ /*parent_scored_document_hit=*/person1_scored_doc_hit,
+ /*child_scored_document_hits=*/{email1_scored_doc_hit}),
+ JoinedScoredDocumentHit(
+ /*final_score=*/3,
+ /*parent_scored_document_hit=*/person2_scored_doc_hit,
+ /*child_scored_document_hits=*/
+ {email4_scored_doc_hit, email3_scored_doc_hit,
+ email2_scored_doc_hit})};
+
+ // 4. Retrieve result with max_joined_children_per_parent_to_return = 2.
+ ResultSpecProto result_spec =
+ CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE);
+ result_spec.set_max_joined_children_per_parent_to_return(2);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetrieverV2> result_retriever,
+ ResultRetrieverV2::Create(doc_store.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+ ResultStateV2 result_state(
+ std::make_unique<
+ PriorityQueueScoredDocumentHitsRanker<JoinedScoredDocumentHit>>(
+ std::move(joined_scored_document_hits), /*is_descending=*/true),
+ /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr,
+ result_spec, *doc_store);
+
+ // Result1: person2 with child docs = [email4, email3]
+ SearchResultProto::ResultProto result1;
+ *result1.mutable_document() = person_document2;
+ result1.set_score(3);
+ SearchResultProto::ResultProto* child1 = result1.add_joined_results();
+ *child1->mutable_document() = email_document4;
+ child1->set_score(6);
+ SearchResultProto::ResultProto* child2 = result1.add_joined_results();
+ *child2->mutable_document() = email_document3;
+ child2->set_score(5);
+
+ // Result2: person1 with child docs = [email1]
+ SearchResultProto::ResultProto result2;
+ *result2.mutable_document() = person_document1;
+ result2.set_score(1);
+ SearchResultProto::ResultProto* child3 = result2.add_joined_results();
+ *child3->mutable_document() = email_document1;
+ child3->set_score(3);
+
+ auto [page_result, has_more_results] =
+ result_retriever->RetrieveNextPage(result_state);
+ EXPECT_THAT(page_result.results,
+ ElementsAre(EqualsProto(result1), EqualsProto(result2)));
+ // No more results.
+ EXPECT_FALSE(has_more_results);
+}
+
TEST_F(ResultRetrieverV2Test, ShouldIgnoreInternalErrors) {
MockFilesystem mock_filesystem;
EXPECT_CALL(mock_filesystem,
diff --git a/icing/result/result-state-manager_test.cc b/icing/result/result-state-manager_test.cc
index 44bfe2d..ce4589b 100644
--- a/icing/result/result-state-manager_test.cc
+++ b/icing/result/result-state-manager_test.cc
@@ -98,7 +98,9 @@ class ResultStateManagerTest : public testing::Test {
SchemaStore::Create(&filesystem_, test_dir_, clock_.get()));
SchemaProto schema;
schema.add_types()->set_schema_type("Document");
- ICING_ASSERT_OK(schema_store_->SetSchema(std::move(schema)));
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ std::move(schema), /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(normalizer_, normalizer_factory::Create(
/*max_term_byte_size=*/10000));
@@ -162,6 +164,9 @@ class ResultStateManagerTest : public testing::Test {
DocumentStore& document_store() { return *document_store_; }
const DocumentStore& document_store() const { return *document_store_; }
+ SchemaStore& schema_store() { return *schema_store_; }
+ const SchemaStore& schema_store() const { return *schema_store_; }
+
const ResultRetrieverV2& result_retriever() const {
return *result_retriever_;
}
@@ -436,7 +441,8 @@ TEST_F(ResultStateManagerTest,
std::move(scored_document_hits1), /*is_descending=*/true),
/*parent_adjustment_info=*/
std::make_unique<ResultAdjustmentInfo>(search_spec, scoring_spec,
- result_spec, query_terms),
+ result_spec, &schema_store(),
+ query_terms),
/*child_adjustment_info=*/nullptr, result_spec, document_store(),
result_retriever()));
ASSERT_THAT(page_result_info1.first, Not(Eq(kInvalidNextPageToken)));
@@ -451,7 +457,8 @@ TEST_F(ResultStateManagerTest,
std::move(scored_document_hits2), /*is_descending=*/true),
/*parent_adjustment_info=*/
std::make_unique<ResultAdjustmentInfo>(search_spec, scoring_spec,
- result_spec, query_terms),
+ result_spec, &schema_store(),
+ query_terms),
/*child_adjustment_info=*/nullptr, result_spec, document_store(),
result_retriever()));
diff --git a/icing/result/result-state-manager_thread-safety_test.cc b/icing/result/result-state-manager_thread-safety_test.cc
index 670578f..06eaaf4 100644
--- a/icing/result/result-state-manager_thread-safety_test.cc
+++ b/icing/result/result-state-manager_thread-safety_test.cc
@@ -91,7 +91,9 @@ class ResultStateManagerThreadSafetyTest : public testing::Test {
SchemaStore::Create(&filesystem_, test_dir_, clock_.get()));
SchemaProto schema;
schema.add_types()->set_schema_type("Document");
- ICING_ASSERT_OK(schema_store_->SetSchema(std::move(schema)));
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ std::move(schema), /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(normalizer_, normalizer_factory::Create(
/*max_term_byte_size=*/10000));
diff --git a/icing/result/result-state-v2.cc b/icing/result/result-state-v2.cc
index 9459910..3aa9359 100644
--- a/icing/result/result-state-v2.cc
+++ b/icing/result/result-state-v2.cc
@@ -40,6 +40,8 @@ ResultStateV2::ResultStateV2(
num_per_page_(result_spec.num_per_page()),
num_total_bytes_per_page_threshold_(
result_spec.num_total_bytes_per_page_threshold()),
+ max_joined_children_per_parent_to_return_(
+ result_spec.max_joined_children_per_parent_to_return()),
num_total_hits_(nullptr),
result_group_type_(result_spec.result_group_type()) {
for (const ResultSpecProto::ResultGrouping& result_grouping :
diff --git a/icing/result/result-state-v2.h b/icing/result/result-state-v2.h
index b01bee9..919710e 100644
--- a/icing/result/result-state-v2.h
+++ b/icing/result/result-state-v2.h
@@ -22,6 +22,7 @@
#include <vector>
#include "icing/absl_ports/mutex.h"
+#include "icing/absl_ports/thread_annotations.h"
#include "icing/proto/search.pb.h"
#include "icing/result/result-adjustment-info.h"
#include "icing/scoring/scored-document-hits-ranker.h"
@@ -86,7 +87,7 @@ class ResultStateV2 {
return entry_id_group_id_map_;
}
- int num_per_page() const ICING_SHARED_LOCKS_REQUIRED(mutex) {
+ int32_t num_per_page() const ICING_SHARED_LOCKS_REQUIRED(mutex) {
return num_per_page_;
}
@@ -95,6 +96,11 @@ class ResultStateV2 {
return num_total_bytes_per_page_threshold_;
}
+ int32_t max_joined_children_per_parent_to_return() const
+ ICING_SHARED_LOCKS_REQUIRED(mutex) {
+ return max_joined_children_per_parent_to_return_;
+ }
+
ResultSpecProto::ResultGroupingType result_group_type()
ICING_SHARED_LOCKS_REQUIRED(mutex) {
return result_group_type_;
@@ -139,7 +145,7 @@ class ResultStateV2 {
ICING_GUARDED_BY(mutex);
// Number of results to return in each page.
- int num_per_page_ ICING_GUARDED_BY(mutex);
+ int32_t num_per_page_ ICING_GUARDED_BY(mutex);
// The threshold of total bytes of all documents to cutoff, in order to limit
// # of bytes in a single page.
@@ -149,6 +155,10 @@ class ResultStateV2 {
// threshold too much.
int32_t num_total_bytes_per_page_threshold_ ICING_GUARDED_BY(mutex);
+ // Max # of joined child documents to be attached in the result for each
+ // parent document.
+ int32_t max_joined_children_per_parent_to_return_ ICING_GUARDED_BY(mutex);
+
// Pointer to a global counter to sum up the size of scored_document_hits in
// all ResultStates.
// Does not own.
diff --git a/icing/result/result-state-v2_test.cc b/icing/result/result-state-v2_test.cc
index 35b6401..ab29d6e 100644
--- a/icing/result/result-state-v2_test.cc
+++ b/icing/result/result-state-v2_test.cc
@@ -16,19 +16,26 @@
#include <atomic>
#include <cstdint>
+#include <limits>
#include <memory>
#include <string>
+#include <utility>
#include <vector>
+#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "icing/absl_ports/mutex.h"
#include "icing/file/filesystem.h"
+#include "icing/file/portable-file-backed-proto-log.h"
#include "icing/proto/document.pb.h"
+#include "icing/proto/document_wrapper.pb.h"
#include "icing/proto/schema.pb.h"
#include "icing/proto/search.pb.h"
#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
#include "icing/scoring/priority-queue-scored-document-hits-ranker.h"
#include "icing/scoring/scored-document-hit.h"
+#include "icing/store/document-id.h"
#include "icing/store/document-store.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/tmp-directory.h"
@@ -61,7 +68,9 @@ class ResultStateV2Test : public ::testing::Test {
SchemaStore::Create(&filesystem_, schema_store_base_dir_, &clock_));
SchemaProto schema;
schema.add_types()->set_schema_type("Document");
- ICING_ASSERT_OK(schema_store_->SetSchema(std::move(schema)));
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ std::move(schema), /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
doc_store_base_dir_ = GetTestTempDir() + "/document_store";
filesystem_.CreateDirectoryRecursively(doc_store_base_dir_.c_str());
@@ -113,6 +122,7 @@ TEST_F(ResultStateV2Test, ShouldInitializeValuesAccordingToSpecs) {
ResultSpecProto result_spec =
CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE);
result_spec.set_num_total_bytes_per_page_threshold(4096);
+ result_spec.set_max_joined_children_per_parent_to_return(2048);
// Adjustment info is not important in this test.
ResultStateV2 result_state(
@@ -128,6 +138,8 @@ TEST_F(ResultStateV2Test, ShouldInitializeValuesAccordingToSpecs) {
EXPECT_THAT(result_state.num_per_page(), Eq(result_spec.num_per_page()));
EXPECT_THAT(result_state.num_total_bytes_per_page_threshold(),
Eq(result_spec.num_total_bytes_per_page_threshold()));
+ EXPECT_THAT(result_state.max_joined_children_per_parent_to_return(),
+ Eq(result_spec.max_joined_children_per_parent_to_return()));
}
TEST_F(ResultStateV2Test, ShouldInitializeValuesAccordingToDefaultSpecs) {
@@ -152,6 +164,9 @@ TEST_F(ResultStateV2Test, ShouldInitializeValuesAccordingToDefaultSpecs) {
Eq(default_result_spec.num_per_page()));
EXPECT_THAT(result_state.num_total_bytes_per_page_threshold(),
Eq(default_result_spec.num_total_bytes_per_page_threshold()));
+ EXPECT_THAT(
+ result_state.max_joined_children_per_parent_to_return(),
+ Eq(default_result_spec.max_joined_children_per_parent_to_return()));
}
TEST_F(ResultStateV2Test,
diff --git a/icing/result/snippet-retriever.cc b/icing/result/snippet-retriever.cc
index 2c4023c..fcaba4c 100644
--- a/icing/result/snippet-retriever.cc
+++ b/icing/result/snippet-retriever.cc
@@ -488,9 +488,7 @@ void GetEntriesFromProperty(const PropertyProto* current_property,
current_property->string_values_size(), /*index=*/i, property_path));
std::string_view value = current_property->string_values(i);
std::unique_ptr<Tokenizer::Iterator> iterator =
- tokenizer
- ->Tokenize(value, LanguageSegmenter::AccessType::kForwardIterator)
- .ValueOrDie();
+ tokenizer->Tokenize(value).ValueOrDie();
// All iterators are moved through positions sequentially. Constructing them
// each time resets them to the beginning of the string. This means that,
// for t tokens and in a string of n chars, each MoveToUtf8 call from the
diff --git a/icing/result/snippet-retriever_benchmark.cc b/icing/result/snippet-retriever_benchmark.cc
index 9af8efa..e574325 100644
--- a/icing/result/snippet-retriever_benchmark.cc
+++ b/icing/result/snippet-retriever_benchmark.cc
@@ -104,7 +104,9 @@ void BM_SnippetOneProperty(benchmark::State& state) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem, schema_dir, &clock));
- ICING_ASSERT_OK(schema_store->SetSchema(schema));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
auto snippet_retriever =
SnippetRetriever::Create(schema_store.get(), language_segmenter.get(),
@@ -231,7 +233,9 @@ void BM_SnippetRfcOneProperty(benchmark::State& state) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem, schema_dir, &clock));
- ICING_ASSERT_OK(schema_store->SetSchema(schema));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
auto snippet_retriever =
SnippetRetriever::Create(schema_store.get(), language_segmenter.get(),
diff --git a/icing/result/snippet-retriever_test.cc b/icing/result/snippet-retriever_test.cc
index 80d00d5..8d81b43 100644
--- a/icing/result/snippet-retriever_test.cc
+++ b/icing/result/snippet-retriever_test.cc
@@ -113,7 +113,9 @@ class SnippetRetrieverTest : public testing::Test {
TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
- ICING_ASSERT_OK(schema_store_->SetSchema(schema));
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(normalizer_, normalizer_factory::Create(
/*max_term_byte_size=*/10000));
@@ -1021,7 +1023,8 @@ TEST_F(SnippetRetrieverTest, SnippetingTestOneLevel) {
.SetCardinality(CARDINALITY_REPEATED)))
.Build();
ICING_ASSERT_OK(schema_store_->SetSchema(
- schema, /*ignore_errors_and_delete_documents=*/true));
+ schema, /*ignore_errors_and_delete_documents=*/true,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(
snippet_retriever_,
SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(),
@@ -1111,7 +1114,8 @@ TEST_F(SnippetRetrieverTest, SnippetingTestMultiLevel) {
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
ICING_ASSERT_OK(schema_store_->SetSchema(
- schema, /*ignore_errors_and_delete_documents=*/true));
+ schema, /*ignore_errors_and_delete_documents=*/true,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(
snippet_retriever_,
SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(),
@@ -1217,7 +1221,8 @@ TEST_F(SnippetRetrieverTest, SnippetingTestMultiLevelRepeated) {
.SetCardinality(CARDINALITY_REPEATED)))
.Build();
ICING_ASSERT_OK(schema_store_->SetSchema(
- schema, /*ignore_errors_and_delete_documents=*/true));
+ schema, /*ignore_errors_and_delete_documents=*/true,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(
snippet_retriever_,
SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(),
@@ -1331,7 +1336,8 @@ TEST_F(SnippetRetrieverTest, SnippetingTestMultiLevelSingleValue) {
.SetCardinality(CARDINALITY_REPEATED)))
.Build();
ICING_ASSERT_OK(schema_store_->SetSchema(
- schema, /*ignore_errors_and_delete_documents=*/true));
+ schema, /*ignore_errors_and_delete_documents=*/true,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(
snippet_retriever_,
SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(),
@@ -1604,7 +1610,8 @@ TEST_F(SnippetRetrieverTest, SnippettingVerbatimAscii) {
.SetCardinality(CARDINALITY_REPEATED)))
.Build();
ICING_ASSERT_OK(schema_store_->SetSchema(
- schema, /*ignore_errors_and_delete_documents=*/true));
+ schema, /*ignore_errors_and_delete_documents=*/true,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(
snippet_retriever_,
SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(),
@@ -1657,7 +1664,8 @@ TEST_F(SnippetRetrieverTest, SnippettingVerbatimCJK) {
.SetCardinality(CARDINALITY_REPEATED)))
.Build();
ICING_ASSERT_OK(schema_store_->SetSchema(
- schema, /*ignore_errors_and_delete_documents=*/true));
+ schema, /*ignore_errors_and_delete_documents=*/true,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(
snippet_retriever_,
SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(),
@@ -1715,7 +1723,8 @@ TEST_F(SnippetRetrieverTest, SnippettingRfc822Ascii) {
.SetCardinality(CARDINALITY_REPEATED)))
.Build();
ICING_ASSERT_OK(schema_store_->SetSchema(
- schema, /*ignore_errors_and_delete_documents=*/true));
+ schema, /*ignore_errors_and_delete_documents=*/true,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(
snippet_retriever_,
@@ -1790,7 +1799,8 @@ TEST_F(SnippetRetrieverTest, SnippettingRfc822CJK) {
.SetCardinality(CARDINALITY_REPEATED)))
.Build();
ICING_ASSERT_OK(schema_store_->SetSchema(
- schema, /*ignore_errors_and_delete_documents=*/true));
+ schema, /*ignore_errors_and_delete_documents=*/true,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(
snippet_retriever_,
@@ -1835,13 +1845,11 @@ TEST_F(SnippetRetrieverTest, SnippettingRfc822CJK) {
TEST_F(SnippetRetrieverTest, SnippettingUrlAscii) {
SchemaProto schema =
SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder()
- .SetType("urlType")
- .AddProperty(PropertyConfigBuilder()
- .SetName("url")
- .SetDataTypeString(MATCH_PREFIX,
- TOKENIZER_URL)
- .SetCardinality(CARDINALITY_REPEATED)))
+ .AddType(SchemaTypeConfigBuilder().SetType("urlType").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("url")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_URL)
+ .SetCardinality(CARDINALITY_REPEATED)))
.Build();
ICING_ASSERT_OK(schema_store_->SetSchema(
schema, /*ignore_errors_and_delete_documents=*/true));
diff --git a/icing/schema-builder.h b/icing/schema-builder.h
index 1dceb62..9e384c5 100644
--- a/icing/schema-builder.h
+++ b/icing/schema-builder.h
@@ -158,8 +158,8 @@ class SchemaTypeConfigBuilder {
return *this;
}
- SchemaTypeConfigBuilder& SetParentType(std::string_view parent_type) {
- type_config_.set_parent_type(std::string(parent_type));
+ SchemaTypeConfigBuilder& AddParentType(std::string_view parent_type) {
+ type_config_.add_parent_types(std::string(parent_type));
return *this;
}
diff --git a/icing/schema/backup-schema-producer.cc b/icing/schema/backup-schema-producer.cc
new file mode 100644
index 0000000..d0a0554
--- /dev/null
+++ b/icing/schema/backup-schema-producer.cc
@@ -0,0 +1,164 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/schema/backup-schema-producer.h"
+
+#include <string_view>
+#include <unordered_map>
+#include <vector>
+
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/schema/property-util.h"
+#include "icing/schema/section.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+// Creates a map of property to indexed id count based on the list of indexed
+// properties provided by metadata_list.
+// For all non-document properties, the value will always be 1.
+// For document properties, the value will be the number of nested properties
+// that are indexed with that document type.
+std::unordered_map<std::string_view, int> CreateIndexedIdCountMap(
+ const std::vector<SectionMetadata>* metadata_list) {
+ std::unordered_map<std::string_view, int> property_indexed_id_count_map;
+ for (const SectionMetadata& metadata : *metadata_list) {
+ std::string_view top_level_property;
+ size_t separator_pos =
+ metadata.path.find(property_util::kPropertyPathSeparator);
+ if (separator_pos == std::string::npos) {
+ top_level_property = metadata.path;
+ } else {
+ top_level_property =
+ std::string_view(metadata.path.c_str(), separator_pos);
+ }
+ int& count = property_indexed_id_count_map[top_level_property];
+ ++count;
+ }
+ return property_indexed_id_count_map;
+}
+
+// Returns the indices (within schema.types()) of all types that are rollback
+// incompatible (old code cannot handle these types if they are unmodified).
+//
+// Currently, this means types that:
+// 1. Use RFC822 tokenization for any properties
+// 2. Use more than 16 indexed properties
+libtextclassifier3::StatusOr<std::vector<int>>
+GetRollbackIncompatibleTypeIndices(const SchemaProto& schema,
+ const SectionManager& type_manager) {
+ std::vector<int> invalid_type_indices;
+ for (int i = 0; i < schema.types_size(); ++i) {
+ const SchemaTypeConfigProto& type = schema.types(i);
+ bool rollback_incompatible = false;
+ for (const PropertyConfigProto& property : type.properties()) {
+ if (property.string_indexing_config().tokenizer_type() ==
+ StringIndexingConfig::TokenizerType::RFC822) {
+ rollback_incompatible = true;
+ break;
+ }
+ }
+ if (rollback_incompatible) {
+ invalid_type_indices.push_back(i);
+ continue;
+ }
+
+ ICING_ASSIGN_OR_RETURN(const std::vector<SectionMetadata>* metadata_list,
+ type_manager.GetMetadataList(type.schema_type()));
+ if (metadata_list->size() > kOldTotalNumSections) {
+ invalid_type_indices.push_back(i);
+ }
+ }
+ return invalid_type_indices;
+}
+
+} // namespace
+
+/* static */ libtextclassifier3::StatusOr<BackupSchemaProducer>
+BackupSchemaProducer::Create(const SchemaProto& schema,
+ const SectionManager& type_manager) {
+ ICING_ASSIGN_OR_RETURN(
+ std::vector<int> invalid_type_indices,
+ GetRollbackIncompatibleTypeIndices(schema, type_manager));
+ if (invalid_type_indices.empty()) {
+ return BackupSchemaProducer();
+ }
+
+ SchemaProto backup_schema(schema);
+ std::unordered_map<std::string_view, int> type_indexed_property_count;
+ for (int i : invalid_type_indices) {
+ SchemaTypeConfigProto* type = backup_schema.mutable_types(i);
+
+ // This should never cause an error - every type should have an entry in the
+ // type_manager.
+ ICING_ASSIGN_OR_RETURN(const std::vector<SectionMetadata>* metadata_list,
+ type_manager.GetMetadataList(type->schema_type()));
+ int num_indexed_sections = metadata_list->size();
+ std::unordered_map<std::string_view, int> property_indexed_id_count_map;
+ if (num_indexed_sections > kOldTotalNumSections) {
+ property_indexed_id_count_map = CreateIndexedIdCountMap(metadata_list);
+ }
+
+ // Step 1. Switch all properties with RFC tokenizer as unindexed.
+ for (PropertyConfigProto& property : *type->mutable_properties()) {
+ // If the property uses the RFC tokenizer, then we need to set it to NONE
+ // and set match type UNKNOWN.
+ if (property.string_indexing_config().tokenizer_type() ==
+ StringIndexingConfig::TokenizerType::RFC822) {
+ property.clear_string_indexing_config();
+ --num_indexed_sections;
+ property_indexed_id_count_map.erase(property.property_name());
+ }
+ }
+
+ // Step 2. If there are any types that exceed the old indexed property
+ // limit, then mark indexed properties as unindexed until we're back under
+ // the limit.
+ if (num_indexed_sections <= kOldTotalNumSections) {
+ continue;
+ }
+
+ // We expect that the last properties were the ones added most recently and
+ // are the least crucial, so we do removal in reverse order. This is a bit
+ // arbitrary, but we don't really have sufficient information to make this
+ // judgment anyways.
+ for (auto itr = type->mutable_properties()->rbegin();
+ itr != type->mutable_properties()->rend(); ++itr) {
+ auto indexed_count_itr =
+ property_indexed_id_count_map.find(itr->property_name());
+ if (indexed_count_itr == property_indexed_id_count_map.end()) {
+ continue;
+ }
+
+ // Mark this property as unindexed and subtract all indexed property ids
+ // consumed by this property.
+ PropertyConfigProto& property = *itr;
+ property.clear_document_indexing_config();
+ property.clear_string_indexing_config();
+ property.clear_integer_indexing_config();
+ num_indexed_sections -= indexed_count_itr->second;
+ if (num_indexed_sections <= kOldTotalNumSections) {
+ break;
+ }
+ }
+ }
+ return BackupSchemaProducer(std::move(backup_schema));
+}
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/schema/backup-schema-producer.h b/icing/schema/backup-schema-producer.h
new file mode 100644
index 0000000..61dcde6
--- /dev/null
+++ b/icing/schema/backup-schema-producer.h
@@ -0,0 +1,55 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_SCHEMA_BACKUP_SCHEMA_PRODUCER_H_
+#define ICING_SCHEMA_BACKUP_SCHEMA_PRODUCER_H_
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/schema/section-manager.h"
+#include "icing/schema/section.h"
+
+namespace icing {
+namespace lib {
+
+class BackupSchemaProducer {
+ public:
+ // Creates a BackupSchemaProducer based off of schema.
+ // If schema doesn't require a backup schema (because it is fully
+ // rollback-proof) then no copies will be made and `is_backup_necessary` will
+ // return false.
+ // If schema *does* require a backup schema, then `is_backup_necessary` will
+ // return true and the backup schema can be retrieved by calling `Produce`.
+ // Returns:
+ // - On success, a BackupSchemaProducer
+ // - INTERNAL_ERROR if the schema is inconsistent with the type_manager.
+ static libtextclassifier3::StatusOr<BackupSchemaProducer> Create(
+ const SchemaProto& schema, const SectionManager& type_manager);
+
+ SchemaProto Produce() && { return std::move(cached_schema_); }
+
+ bool is_backup_necessary() const { return !cached_schema_.types().empty(); }
+
+ private:
+ BackupSchemaProducer() = default;
+ explicit BackupSchemaProducer(SchemaProto&& schema)
+ : cached_schema_(std::move(schema)) {}
+
+ SchemaProto cached_schema_;
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_SCHEMA_BACKUP_SCHEMA_PRODUCER_H_
diff --git a/icing/schema/backup-schema-producer_test.cc b/icing/schema/backup-schema-producer_test.cc
new file mode 100644
index 0000000..424fec0
--- /dev/null
+++ b/icing/schema/backup-schema-producer_test.cc
@@ -0,0 +1,630 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/schema/backup-schema-producer.h"
+
+#include <string>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/file/filesystem.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/schema-type-manager.h"
+#include "icing/schema/schema-util.h"
+#include "icing/store/document-filter-data.h"
+#include "icing/store/dynamic-trie-key-mapper.h"
+#include "icing/store/key-mapper.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::Eq;
+
+class BackupSchemaProducerTest : public ::testing::Test {
+ protected:
+ void SetUp() override {
+ test_dir_ = GetTestTempDir() + "/icing";
+ schema_store_dir_ = test_dir_ + "/schema_store";
+ filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str());
+ }
+
+ void TearDown() override {
+ ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(test_dir_.c_str()));
+ }
+
+ Filesystem filesystem_;
+ std::string test_dir_;
+ std::string schema_store_dir_;
+};
+
+TEST_F(BackupSchemaProducerTest, EmptySchema) {
+ SchemaProto empty;
+ SchemaUtil::TypeConfigMap type_config_map;
+ SchemaUtil::BuildTypeConfigMap(empty, &type_config_map);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DynamicTrieKeyMapper<SchemaTypeId>> type_id_mapper,
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(filesystem_, schema_store_dir_,
+ /*maximum_size_bytes=*/10000));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map, type_id_mapper.get()));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ BackupSchemaProducer backup_producer,
+ BackupSchemaProducer::Create(empty,
+ schema_type_manager->section_manager()));
+ EXPECT_THAT(backup_producer.is_backup_necessary(), Eq(false));
+}
+
+TEST_F(BackupSchemaProducerTest, NoIndexedPropertySchema) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("TypeA")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("prop1")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataType(TYPE_STRING))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("prop2")
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .SetDataType(TYPE_INT64)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("TypeB")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("prop3")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument(
+ "TypeA", /*index_nested_properties=*/false))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("prop4")
+ .SetCardinality(CARDINALITY_REPEATED)
+ .SetDataType(TYPE_STRING)))
+ .Build();
+
+ SchemaUtil::TypeConfigMap type_config_map;
+ SchemaUtil::BuildTypeConfigMap(schema, &type_config_map);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DynamicTrieKeyMapper<SchemaTypeId>> type_id_mapper,
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(filesystem_, schema_store_dir_,
+ /*maximum_size_bytes=*/10000));
+ ASSERT_THAT(type_id_mapper->Put("TypeA", 0), IsOk());
+ ASSERT_THAT(type_id_mapper->Put("TypeB", 1), IsOk());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map, type_id_mapper.get()));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ BackupSchemaProducer backup_producer,
+ BackupSchemaProducer::Create(schema,
+ schema_type_manager->section_manager()));
+ EXPECT_THAT(backup_producer.is_backup_necessary(), Eq(false));
+}
+
+TEST_F(BackupSchemaProducerTest, RollbackCompatibleSchema) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("TypeA")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("prop1")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("prop2")
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("TypeB")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("prop3")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument(
+ "TypeA", /*index_nested_properties=*/true))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("prop4")
+ .SetCardinality(CARDINALITY_REPEATED)
+ .SetDataTypeString(TERM_MATCH_EXACT,
+ TOKENIZER_VERBATIM)))
+ .Build();
+
+ SchemaUtil::TypeConfigMap type_config_map;
+ SchemaUtil::BuildTypeConfigMap(schema, &type_config_map);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DynamicTrieKeyMapper<SchemaTypeId>> type_id_mapper,
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(filesystem_, schema_store_dir_,
+ /*maximum_size_bytes=*/10000));
+ ASSERT_THAT(type_id_mapper->Put("TypeA", 0), IsOk());
+ ASSERT_THAT(type_id_mapper->Put("TypeB", 1), IsOk());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map, type_id_mapper.get()));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ BackupSchemaProducer backup_producer,
+ BackupSchemaProducer::Create(schema,
+ schema_type_manager->section_manager()));
+ EXPECT_THAT(backup_producer.is_backup_necessary(), Eq(false));
+}
+
+TEST_F(BackupSchemaProducerTest, RemoveRfc822) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("TypeA").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("prop1")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822)))
+ .Build();
+
+ SchemaUtil::TypeConfigMap type_config_map;
+ SchemaUtil::BuildTypeConfigMap(schema, &type_config_map);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DynamicTrieKeyMapper<SchemaTypeId>> type_id_mapper,
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(filesystem_, schema_store_dir_,
+ /*maximum_size_bytes=*/10000));
+ ASSERT_THAT(type_id_mapper->Put("TypeA", 0), IsOk());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map, type_id_mapper.get()));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ BackupSchemaProducer backup_producer,
+ BackupSchemaProducer::Create(schema,
+ schema_type_manager->section_manager()));
+ EXPECT_THAT(backup_producer.is_backup_necessary(), Eq(true));
+ SchemaProto backup = std::move(backup_producer).Produce();
+
+ SchemaProto expected_backup =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("TypeA").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("prop1")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataType(TYPE_STRING)))
+ .Build();
+ EXPECT_THAT(backup, testing::EqualsProto(expected_backup));
+}
+
+TEST_F(BackupSchemaProducerTest, MakeExtraStringIndexedPropertiesUnindexed) {
+ PropertyConfigBuilder indexed_string_property_builder =
+ PropertyConfigBuilder()
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN);
+ SchemaTypeConfigProto type =
+ SchemaTypeConfigBuilder()
+ .SetType("TypeA")
+ .AddProperty(indexed_string_property_builder.SetName("prop0"))
+ .AddProperty(indexed_string_property_builder.SetName("prop1"))
+ .AddProperty(indexed_string_property_builder.SetName("prop2"))
+ .AddProperty(indexed_string_property_builder.SetName("prop3"))
+ .AddProperty(indexed_string_property_builder.SetName("prop4"))
+ .AddProperty(indexed_string_property_builder.SetName("prop5"))
+ .AddProperty(indexed_string_property_builder.SetName("prop6"))
+ .AddProperty(indexed_string_property_builder.SetName("prop7"))
+ .AddProperty(indexed_string_property_builder.SetName("prop8"))
+ .AddProperty(indexed_string_property_builder.SetName("prop9"))
+ .AddProperty(indexed_string_property_builder.SetName("prop10"))
+ .AddProperty(indexed_string_property_builder.SetName("prop11"))
+ .AddProperty(indexed_string_property_builder.SetName("prop12"))
+ .AddProperty(indexed_string_property_builder.SetName("prop13"))
+ .AddProperty(indexed_string_property_builder.SetName("prop14"))
+ .AddProperty(indexed_string_property_builder.SetName("prop15"))
+ .AddProperty(indexed_string_property_builder.SetName("prop16"))
+ .AddProperty(indexed_string_property_builder.SetName("prop17"))
+ .AddProperty(indexed_string_property_builder.SetName("prop18"))
+ .AddProperty(indexed_string_property_builder.SetName("prop19"))
+ .Build();
+ SchemaProto schema = SchemaBuilder().AddType(type).Build();
+
+ SchemaUtil::TypeConfigMap type_config_map;
+ SchemaUtil::BuildTypeConfigMap(schema, &type_config_map);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DynamicTrieKeyMapper<SchemaTypeId>> type_id_mapper,
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(filesystem_, schema_store_dir_,
+ /*maximum_size_bytes=*/10000));
+ ASSERT_THAT(type_id_mapper->Put("TypeA", 0), IsOk());
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map, type_id_mapper.get()));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ BackupSchemaProducer backup_producer,
+ BackupSchemaProducer::Create(schema,
+ schema_type_manager->section_manager()));
+ EXPECT_THAT(backup_producer.is_backup_necessary(), Eq(true));
+ SchemaProto backup = std::move(backup_producer).Produce();
+
+ PropertyConfigBuilder unindexed_string_property_builder =
+ PropertyConfigBuilder()
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataType(TYPE_STRING);
+ SchemaTypeConfigProto expected_type =
+ SchemaTypeConfigBuilder()
+ .SetType("TypeA")
+ .AddProperty(indexed_string_property_builder.SetName("prop0"))
+ .AddProperty(indexed_string_property_builder.SetName("prop1"))
+ .AddProperty(indexed_string_property_builder.SetName("prop2"))
+ .AddProperty(indexed_string_property_builder.SetName("prop3"))
+ .AddProperty(indexed_string_property_builder.SetName("prop4"))
+ .AddProperty(indexed_string_property_builder.SetName("prop5"))
+ .AddProperty(indexed_string_property_builder.SetName("prop6"))
+ .AddProperty(indexed_string_property_builder.SetName("prop7"))
+ .AddProperty(indexed_string_property_builder.SetName("prop8"))
+ .AddProperty(indexed_string_property_builder.SetName("prop9"))
+ .AddProperty(indexed_string_property_builder.SetName("prop10"))
+ .AddProperty(indexed_string_property_builder.SetName("prop11"))
+ .AddProperty(indexed_string_property_builder.SetName("prop12"))
+ .AddProperty(indexed_string_property_builder.SetName("prop13"))
+ .AddProperty(indexed_string_property_builder.SetName("prop14"))
+ .AddProperty(indexed_string_property_builder.SetName("prop15"))
+ .AddProperty(unindexed_string_property_builder.SetName("prop16"))
+ .AddProperty(unindexed_string_property_builder.SetName("prop17"))
+ .AddProperty(unindexed_string_property_builder.SetName("prop18"))
+ .AddProperty(unindexed_string_property_builder.SetName("prop19"))
+ .Build();
+ SchemaProto expected_backup = SchemaBuilder().AddType(expected_type).Build();
+ EXPECT_THAT(backup, testing::EqualsProto(expected_backup));
+}
+
+TEST_F(BackupSchemaProducerTest, MakeExtraIntIndexedPropertiesUnindexed) {
+ PropertyConfigBuilder indexed_int_property_builder =
+ PropertyConfigBuilder()
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE);
+ SchemaTypeConfigProto type =
+ SchemaTypeConfigBuilder()
+ .SetType("TypeA")
+ .AddProperty(indexed_int_property_builder.SetName("prop0"))
+ .AddProperty(indexed_int_property_builder.SetName("prop1"))
+ .AddProperty(indexed_int_property_builder.SetName("prop2"))
+ .AddProperty(indexed_int_property_builder.SetName("prop3"))
+ .AddProperty(indexed_int_property_builder.SetName("prop4"))
+ .AddProperty(indexed_int_property_builder.SetName("prop5"))
+ .AddProperty(indexed_int_property_builder.SetName("prop6"))
+ .AddProperty(indexed_int_property_builder.SetName("prop7"))
+ .AddProperty(indexed_int_property_builder.SetName("prop8"))
+ .AddProperty(indexed_int_property_builder.SetName("prop9"))
+ .AddProperty(indexed_int_property_builder.SetName("prop10"))
+ .AddProperty(indexed_int_property_builder.SetName("prop11"))
+ .AddProperty(indexed_int_property_builder.SetName("prop12"))
+ .AddProperty(indexed_int_property_builder.SetName("prop13"))
+ .AddProperty(indexed_int_property_builder.SetName("prop14"))
+ .AddProperty(indexed_int_property_builder.SetName("prop15"))
+ .AddProperty(indexed_int_property_builder.SetName("prop16"))
+ .AddProperty(indexed_int_property_builder.SetName("prop17"))
+ .AddProperty(indexed_int_property_builder.SetName("prop18"))
+ .AddProperty(indexed_int_property_builder.SetName("prop19"))
+ .Build();
+ SchemaProto schema = SchemaBuilder().AddType(type).Build();
+
+ SchemaUtil::TypeConfigMap type_config_map;
+ SchemaUtil::BuildTypeConfigMap(schema, &type_config_map);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DynamicTrieKeyMapper<SchemaTypeId>> type_id_mapper,
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(filesystem_, schema_store_dir_,
+ /*maximum_size_bytes=*/10000));
+ ASSERT_THAT(type_id_mapper->Put("TypeA", 0), IsOk());
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map, type_id_mapper.get()));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ BackupSchemaProducer backup_producer,
+ BackupSchemaProducer::Create(schema,
+ schema_type_manager->section_manager()));
+ EXPECT_THAT(backup_producer.is_backup_necessary(), Eq(true));
+ SchemaProto backup = std::move(backup_producer).Produce();
+
+ PropertyConfigBuilder unindexed_int_property_builder =
+ PropertyConfigBuilder()
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataType(TYPE_INT64);
+ SchemaTypeConfigProto expected_type =
+ SchemaTypeConfigBuilder()
+ .SetType("TypeA")
+ .AddProperty(indexed_int_property_builder.SetName("prop0"))
+ .AddProperty(indexed_int_property_builder.SetName("prop1"))
+ .AddProperty(indexed_int_property_builder.SetName("prop2"))
+ .AddProperty(indexed_int_property_builder.SetName("prop3"))
+ .AddProperty(indexed_int_property_builder.SetName("prop4"))
+ .AddProperty(indexed_int_property_builder.SetName("prop5"))
+ .AddProperty(indexed_int_property_builder.SetName("prop6"))
+ .AddProperty(indexed_int_property_builder.SetName("prop7"))
+ .AddProperty(indexed_int_property_builder.SetName("prop8"))
+ .AddProperty(indexed_int_property_builder.SetName("prop9"))
+ .AddProperty(indexed_int_property_builder.SetName("prop10"))
+ .AddProperty(indexed_int_property_builder.SetName("prop11"))
+ .AddProperty(indexed_int_property_builder.SetName("prop12"))
+ .AddProperty(indexed_int_property_builder.SetName("prop13"))
+ .AddProperty(indexed_int_property_builder.SetName("prop14"))
+ .AddProperty(indexed_int_property_builder.SetName("prop15"))
+ .AddProperty(unindexed_int_property_builder.SetName("prop16"))
+ .AddProperty(unindexed_int_property_builder.SetName("prop17"))
+ .AddProperty(unindexed_int_property_builder.SetName("prop18"))
+ .AddProperty(unindexed_int_property_builder.SetName("prop19"))
+ .Build();
+ SchemaProto expected_backup = SchemaBuilder().AddType(expected_type).Build();
+ EXPECT_THAT(backup, testing::EqualsProto(expected_backup));
+}
+
+TEST_F(BackupSchemaProducerTest, MakeExtraDocumentIndexedPropertiesUnindexed) {
+ PropertyConfigBuilder indexed_string_property_builder =
+ PropertyConfigBuilder()
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN);
+ SchemaTypeConfigProto typeB =
+ SchemaTypeConfigBuilder()
+ .SetType("TypeB")
+ .AddProperty(indexed_string_property_builder.SetName("prop0"))
+ .AddProperty(indexed_string_property_builder.SetName("prop1"))
+ .AddProperty(indexed_string_property_builder.SetName("prop2"))
+ .AddProperty(indexed_string_property_builder.SetName("prop3"))
+ .AddProperty(indexed_string_property_builder.SetName("prop4"))
+ .AddProperty(indexed_string_property_builder.SetName("prop5"))
+ .AddProperty(indexed_string_property_builder.SetName("prop6"))
+ .AddProperty(indexed_string_property_builder.SetName("prop7"))
+ .AddProperty(indexed_string_property_builder.SetName("prop8"))
+ .AddProperty(indexed_string_property_builder.SetName("prop9"))
+ .Build();
+
+ PropertyConfigBuilder indexed_document_property_builder =
+ PropertyConfigBuilder()
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("TypeB", /*index_nested_properties=*/true);
+ SchemaTypeConfigProto typeA =
+ SchemaTypeConfigBuilder()
+ .SetType("TypeA")
+ .AddProperty(indexed_document_property_builder.SetName("propA"))
+ .AddProperty(indexed_document_property_builder.SetName("propB"))
+ .Build();
+
+ SchemaProto schema = SchemaBuilder().AddType(typeA).AddType(typeB).Build();
+
+ SchemaUtil::TypeConfigMap type_config_map;
+ SchemaUtil::BuildTypeConfigMap(schema, &type_config_map);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DynamicTrieKeyMapper<SchemaTypeId>> type_id_mapper,
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(filesystem_, schema_store_dir_,
+ /*maximum_size_bytes=*/10000));
+ ASSERT_THAT(type_id_mapper->Put("TypeA", 0), IsOk());
+ ASSERT_THAT(type_id_mapper->Put("TypeB", 1), IsOk());
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map, type_id_mapper.get()));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ BackupSchemaProducer backup_producer,
+ BackupSchemaProducer::Create(schema,
+ schema_type_manager->section_manager()));
+ EXPECT_THAT(backup_producer.is_backup_necessary(), Eq(true));
+ SchemaProto backup = std::move(backup_producer).Produce();
+
+ PropertyConfigProto unindexed_document_property =
+ PropertyConfigBuilder()
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataType(TYPE_DOCUMENT)
+ .Build();
+ unindexed_document_property.set_schema_type("TypeB");
+ PropertyConfigBuilder unindexed_document_property_builder(
+ unindexed_document_property);
+ SchemaTypeConfigProto expected_typeA =
+ SchemaTypeConfigBuilder()
+ .SetType("TypeA")
+ .AddProperty(indexed_document_property_builder.SetName("propA"))
+ .AddProperty(unindexed_document_property_builder.SetName("propB"))
+ .Build();
+ SchemaProto expected_backup =
+ SchemaBuilder().AddType(expected_typeA).AddType(typeB).Build();
+ EXPECT_THAT(backup, testing::EqualsProto(expected_backup));
+}
+
+TEST_F(BackupSchemaProducerTest, MakeRfcPropertiesUnindexedFirst) {
+ PropertyConfigBuilder indexed_string_property_builder =
+ PropertyConfigBuilder()
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN);
+ SchemaTypeConfigProto typeA =
+ SchemaTypeConfigBuilder()
+ .SetType("TypeA")
+ .AddProperty(indexed_string_property_builder.SetName("prop0"))
+ .AddProperty(indexed_string_property_builder.SetName("prop1"))
+ .AddProperty(indexed_string_property_builder.SetName("prop2"))
+ .AddProperty(indexed_string_property_builder.SetName("prop3"))
+ .AddProperty(indexed_string_property_builder.SetName("prop4"))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("propRfc")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822))
+ .AddProperty(indexed_string_property_builder.SetName("prop6"))
+ .AddProperty(indexed_string_property_builder.SetName("prop7"))
+ .AddProperty(indexed_string_property_builder.SetName("prop8"))
+ .AddProperty(indexed_string_property_builder.SetName("prop9"))
+ .AddProperty(indexed_string_property_builder.SetName("prop10"))
+ .AddProperty(indexed_string_property_builder.SetName("prop11"))
+ .AddProperty(indexed_string_property_builder.SetName("prop12"))
+ .AddProperty(indexed_string_property_builder.SetName("prop13"))
+ .AddProperty(indexed_string_property_builder.SetName("prop14"))
+ .AddProperty(indexed_string_property_builder.SetName("prop15"))
+ .AddProperty(indexed_string_property_builder.SetName("prop16"))
+ .Build();
+
+ SchemaProto schema = SchemaBuilder().AddType(typeA).Build();
+
+ SchemaUtil::TypeConfigMap type_config_map;
+ SchemaUtil::BuildTypeConfigMap(schema, &type_config_map);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DynamicTrieKeyMapper<SchemaTypeId>> type_id_mapper,
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(filesystem_, schema_store_dir_,
+ /*maximum_size_bytes=*/10000));
+ ASSERT_THAT(type_id_mapper->Put("TypeA", 0), IsOk());
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map, type_id_mapper.get()));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ BackupSchemaProducer backup_producer,
+ BackupSchemaProducer::Create(schema,
+ schema_type_manager->section_manager()));
+ EXPECT_THAT(backup_producer.is_backup_necessary(), Eq(true));
+ SchemaProto backup = std::move(backup_producer).Produce();
+
+ SchemaTypeConfigProto expected_typeA =
+ SchemaTypeConfigBuilder()
+ .SetType("TypeA")
+ .AddProperty(indexed_string_property_builder.SetName("prop0"))
+ .AddProperty(indexed_string_property_builder.SetName("prop1"))
+ .AddProperty(indexed_string_property_builder.SetName("prop2"))
+ .AddProperty(indexed_string_property_builder.SetName("prop3"))
+ .AddProperty(indexed_string_property_builder.SetName("prop4"))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("propRfc")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataType(TYPE_STRING))
+ .AddProperty(indexed_string_property_builder.SetName("prop6"))
+ .AddProperty(indexed_string_property_builder.SetName("prop7"))
+ .AddProperty(indexed_string_property_builder.SetName("prop8"))
+ .AddProperty(indexed_string_property_builder.SetName("prop9"))
+ .AddProperty(indexed_string_property_builder.SetName("prop10"))
+ .AddProperty(indexed_string_property_builder.SetName("prop11"))
+ .AddProperty(indexed_string_property_builder.SetName("prop12"))
+ .AddProperty(indexed_string_property_builder.SetName("prop13"))
+ .AddProperty(indexed_string_property_builder.SetName("prop14"))
+ .AddProperty(indexed_string_property_builder.SetName("prop15"))
+ .AddProperty(indexed_string_property_builder.SetName("prop16"))
+ .Build();
+ SchemaProto expected_backup = SchemaBuilder().AddType(expected_typeA).Build();
+ EXPECT_THAT(backup, testing::EqualsProto(expected_backup));
+}
+
+TEST_F(BackupSchemaProducerTest, MakeExtraPropertiesUnindexedMultipleTypes) {
+ PropertyConfigBuilder indexed_string_property_builder =
+ PropertyConfigBuilder()
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN);
+ PropertyConfigBuilder indexed_int_property_builder =
+ PropertyConfigBuilder()
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE);
+ SchemaTypeConfigProto typeB =
+ SchemaTypeConfigBuilder()
+ .SetType("TypeB")
+ .AddProperty(indexed_string_property_builder.SetName("prop0"))
+ .AddProperty(indexed_int_property_builder.SetName("prop1"))
+ .AddProperty(indexed_string_property_builder.SetName("prop2"))
+ .AddProperty(indexed_int_property_builder.SetName("prop3"))
+ .AddProperty(indexed_string_property_builder.SetName("prop4"))
+ .AddProperty(indexed_int_property_builder.SetName("prop5"))
+ .AddProperty(indexed_string_property_builder.SetName("prop6"))
+ .AddProperty(indexed_int_property_builder.SetName("prop7"))
+ .AddProperty(indexed_string_property_builder.SetName("prop8"))
+ .AddProperty(indexed_int_property_builder.SetName("prop9"))
+ .Build();
+
+ PropertyConfigBuilder indexed_document_property_builder =
+ PropertyConfigBuilder()
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("TypeB", /*index_nested_properties=*/true);
+ SchemaTypeConfigProto typeA =
+ SchemaTypeConfigBuilder()
+ .SetType("TypeA")
+ .AddProperty(indexed_string_property_builder.SetName("propA"))
+ .AddProperty(indexed_int_property_builder.SetName("propB"))
+ .AddProperty(indexed_string_property_builder.SetName("propC"))
+ .AddProperty(indexed_int_property_builder.SetName("propD"))
+ .AddProperty(indexed_string_property_builder.SetName("propE"))
+ .AddProperty(indexed_int_property_builder.SetName("propF"))
+ .AddProperty(indexed_string_property_builder.SetName("propG"))
+ .AddProperty(indexed_int_property_builder.SetName("propH"))
+ .AddProperty(indexed_document_property_builder.SetName("propI"))
+ .AddProperty(indexed_string_property_builder.SetName("propJ"))
+ .AddProperty(indexed_int_property_builder.SetName("propK"))
+ .Build();
+
+ SchemaProto schema = SchemaBuilder().AddType(typeA).AddType(typeB).Build();
+
+ SchemaUtil::TypeConfigMap type_config_map;
+ SchemaUtil::BuildTypeConfigMap(schema, &type_config_map);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DynamicTrieKeyMapper<SchemaTypeId>> type_id_mapper,
+ DynamicTrieKeyMapper<SchemaTypeId>::Create(filesystem_, schema_store_dir_,
+ /*maximum_size_bytes=*/10000));
+ ASSERT_THAT(type_id_mapper->Put("TypeA", 0), IsOk());
+ ASSERT_THAT(type_id_mapper->Put("TypeB", 1), IsOk());
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaTypeManager> schema_type_manager,
+ SchemaTypeManager::Create(type_config_map, type_id_mapper.get()));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ BackupSchemaProducer backup_producer,
+ BackupSchemaProducer::Create(schema,
+ schema_type_manager->section_manager()));
+ EXPECT_THAT(backup_producer.is_backup_necessary(), Eq(true));
+ SchemaProto backup = std::move(backup_producer).Produce();
+
+ PropertyConfigBuilder unindexed_string_property_builder =
+ PropertyConfigBuilder()
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataType(TYPE_STRING);
+ PropertyConfigBuilder unindexed_int_property_builder =
+ PropertyConfigBuilder()
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataType(TYPE_INT64);
+ PropertyConfigProto unindexed_document_property =
+ PropertyConfigBuilder()
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataType(TYPE_DOCUMENT)
+ .Build();
+ unindexed_document_property.set_schema_type("TypeB");
+ PropertyConfigBuilder unindexed_document_property_builder(
+ unindexed_document_property);
+
+ SchemaTypeConfigProto expected_typeA =
+ SchemaTypeConfigBuilder()
+ .SetType("TypeA")
+ .AddProperty(indexed_string_property_builder.SetName("propA"))
+ .AddProperty(indexed_int_property_builder.SetName("propB"))
+ .AddProperty(indexed_string_property_builder.SetName("propC"))
+ .AddProperty(indexed_int_property_builder.SetName("propD"))
+ .AddProperty(indexed_string_property_builder.SetName("propE"))
+ .AddProperty(indexed_int_property_builder.SetName("propF"))
+ .AddProperty(indexed_string_property_builder.SetName("propG"))
+ .AddProperty(indexed_int_property_builder.SetName("propH"))
+ .AddProperty(unindexed_document_property_builder.SetName("propI"))
+ .AddProperty(unindexed_string_property_builder.SetName("propJ"))
+ .AddProperty(unindexed_int_property_builder.SetName("propK"))
+ .Build();
+ SchemaProto expected_backup =
+ SchemaBuilder().AddType(expected_typeA).AddType(typeB).Build();
+ EXPECT_THAT(backup, testing::EqualsProto(expected_backup));
+}
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/schema/schema-property-iterator.cc b/icing/schema/schema-property-iterator.cc
index 455b61b..e1078c2 100644
--- a/icing/schema/schema-property-iterator.cc
+++ b/icing/schema/schema-property-iterator.cc
@@ -58,9 +58,13 @@ libtextclassifier3::Status SchemaPropertyIterator::Advance() {
if (parent_type_config_names_.count(
nested_type_config_iter->second.schema_type()) > 0) {
- // Cycle detected. Abort the iteration.
- return absl_ports::InvalidArgumentError(
- "Detect nested schema cycle dependency");
+ // Cycle detected. The schema definition is guaranteed to be valid here
+ // since it must have already been validated during SchemaUtil::Validate,
+ // which would have rejected any schema with bad cycles.
+ //
+ // We do not need to iterate this type further so we simply move on to
+ // other properties in the parent type.
+ continue;
}
std::string curr_property_path = levels_.back().GetCurrentPropertyPath();
diff --git a/icing/schema/schema-property-iterator.h b/icing/schema/schema-property-iterator.h
index 696dc72..f60a56e 100644
--- a/icing/schema/schema-property-iterator.h
+++ b/icing/schema/schema-property-iterator.h
@@ -33,6 +33,9 @@ namespace lib {
// (non-document-type) properties will be returned, and for document type
// properties, the iterator will traverse down to the next nested level of
// schema.
+//
+// REQUIRED: The schema in which this SchemaTypeConfigProto is defined must have
+// already passed the validation step during SetSchema.
class SchemaPropertyIterator {
public:
explicit SchemaPropertyIterator(
diff --git a/icing/schema/schema-property-iterator_test.cc b/icing/schema/schema-property-iterator_test.cc
index e14eabb..080d574 100644
--- a/icing/schema/schema-property-iterator_test.cc
+++ b/icing/schema/schema-property-iterator_test.cc
@@ -15,13 +15,10 @@
#include "icing/schema/schema-property-iterator.h"
#include <string>
-#include <utility>
-#include <vector>
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
-#include "icing/portable/equals-proto.h"
#include "icing/proto/schema.pb.h"
#include "icing/schema-builder.h"
#include "icing/schema/schema-util.h"
@@ -239,53 +236,6 @@ TEST(SchemaPropertyIteratorTest,
StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
}
-TEST(SchemaPropertyIteratorTest,
- SchemaTypeConfigWithCycleDependencyShouldGetInvalidArgumentError) {
- std::string schema_type_name1 = "SchemaOne";
- std::string schema_type_name2 = "SchemaTwo";
-
- SchemaTypeConfigProto schema_type_config1 =
- SchemaTypeConfigBuilder()
- .SetType(schema_type_name1)
- .AddProperty(
- PropertyConfigBuilder().SetName("Foo").SetDataTypeDocument(
- schema_type_name2, /*index_nested_properties=*/true))
- .Build();
- SchemaTypeConfigProto schema_type_config2 =
- SchemaTypeConfigBuilder()
- .SetType(schema_type_name2)
- .AddProperty(
- PropertyConfigBuilder().SetName("Bar").SetDataTypeDocument(
- schema_type_name1, /*index_nested_properties=*/true))
- .Build();
- SchemaUtil::TypeConfigMap type_config_map = {
- {schema_type_name1, schema_type_config1},
- {schema_type_name2, schema_type_config2}};
-
- SchemaPropertyIterator iterator(schema_type_config1, type_config_map);
- EXPECT_THAT(iterator.Advance(),
- StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
-}
-
-TEST(SchemaPropertyIteratorTest,
- SchemaTypeConfigWithSelfDependencyShouldGetInvalidArgumentError) {
- std::string schema_type_name = "SchemaOne";
-
- SchemaTypeConfigProto schema_type_config =
- SchemaTypeConfigBuilder()
- .SetType(schema_type_name)
- .AddProperty(
- PropertyConfigBuilder().SetName("Foo").SetDataTypeDocument(
- schema_type_name, /*index_nested_properties=*/true))
- .Build();
- SchemaUtil::TypeConfigMap type_config_map = {
- {schema_type_name, schema_type_config}};
-
- SchemaPropertyIterator iterator(schema_type_config, type_config_map);
- EXPECT_THAT(iterator.Advance(),
- StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
-}
-
TEST(SchemaPropertyIteratorTest, NestedIndexable) {
std::string schema_type_name1 = "SchemaOne";
std::string schema_type_name2 = "SchemaTwo";
@@ -464,6 +414,435 @@ TEST(SchemaPropertyIteratorTest, NestedIndexable) {
StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
}
+TEST(SchemaPropertyIteratorTest, SingleLevelCycle) {
+ std::string schema_a = "A";
+ std::string schema_b = "B";
+
+ // Create schema with A -> B -> B -> B...
+ SchemaTypeConfigProto schema_type_config_a =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_a)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("schemaAprop1")
+ .SetDataTypeDocument(
+ schema_b, /*index_nested_properties=*/true))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schemaAprop2")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .Build();
+ SchemaTypeConfigProto schema_type_config_b =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_b)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("schemaBprop1")
+ .SetDataTypeDocument(
+ schema_b, /*index_nested_properties=*/false))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schemaBprop2")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .Build();
+
+ SchemaUtil::TypeConfigMap type_config_map = {
+ {schema_a, schema_type_config_a}, {schema_b, schema_type_config_b}};
+
+ // Order of iteration for schema A:
+ // {"schemaAprop1.schemaBprop2", "schemaAprop2"}, both indexable
+ SchemaPropertyIterator schema_a_iterator(schema_type_config_a,
+ type_config_map);
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop1.schemaBprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentNestedIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(), Eq("schemaAprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_a.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentNestedIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ // Order of iteration for schema B:
+ // {"schemaBprop2"}, indexable.
+ SchemaPropertyIterator schema_b_iterator(schema_type_config_b,
+ type_config_map);
+
+ EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(), Eq("schemaBprop2"));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(1)));
+ EXPECT_THAT(schema_b_iterator.GetCurrentNestedIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_b_iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+}
+
+TEST(SchemaPropertyIteratorTest, MultipleLevelCycle) {
+ std::string schema_a = "A";
+ std::string schema_b = "B";
+ std::string schema_c = "C";
+
+ // Create schema with A -> B -> C -> A -> B -> C...
+ SchemaTypeConfigProto schema_type_config_a =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_a)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("schemaAprop1")
+ .SetDataTypeDocument(
+ schema_b, /*index_nested_properties=*/true))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schemaAprop2")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .Build();
+ SchemaTypeConfigProto schema_type_config_b =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_b)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("schemaBprop1")
+ .SetDataTypeDocument(
+ schema_c, /*index_nested_properties=*/true))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schemaBprop2")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .Build();
+ SchemaTypeConfigProto schema_type_config_c =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_c)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("schemaCprop1")
+ .SetDataTypeDocument(
+ schema_a, /*index_nested_properties=*/false))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schemaCprop2")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .Build();
+
+ SchemaUtil::TypeConfigMap type_config_map = {
+ {schema_a, schema_type_config_a},
+ {schema_b, schema_type_config_b},
+ {schema_c, schema_type_config_c}};
+
+ // Order of iteration for schema A:
+ // {"schemaAprop1.schemaBprop1.schemaCprop2", "schemaAprop1.schemaBprop2",
+ // "schemaAprop2"}, all indexable
+ SchemaPropertyIterator schema_a_iterator(schema_type_config_a,
+ type_config_map);
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop1.schemaBprop1.schemaCprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_c.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentNestedIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop1.schemaBprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentNestedIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(), Eq("schemaAprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_a.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentNestedIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ // Order of iteration for schema B:
+ // {"schemaBprop1.schemaCprop1.schemaAprop2", "schemaBprop1.schemaCprop2",
+ // "schemaBprop2"}
+ //
+ // Indexable properties: {"schemaBprop1.schemaCprop2", "schemaBprop2"}
+ SchemaPropertyIterator schema_b_iterator(schema_type_config_b,
+ type_config_map);
+
+ EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+ Eq("schemaBprop1.schemaCprop1.schemaAprop2"));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_a.properties(1)));
+ EXPECT_THAT(schema_b_iterator.GetCurrentNestedIndexable(), IsFalse());
+
+ EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+ Eq("schemaBprop1.schemaCprop2"));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_c.properties(1)));
+ EXPECT_THAT(schema_b_iterator.GetCurrentNestedIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(), Eq("schemaBprop2"));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(1)));
+ EXPECT_THAT(schema_b_iterator.GetCurrentNestedIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_b_iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ // Order of iteration for schema C:
+ // {"schemaCprop1.schemaAprop1.schemaBprop2", "schemaCprop1.schemaAprop2",
+ // "schemaCprop2"}
+ //
+ // Indexable properties: {"schemaCprop2"}
+ SchemaPropertyIterator schema_c_iterator(schema_type_config_c,
+ type_config_map);
+
+ EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(),
+ Eq("schemaCprop1.schemaAprop1.schemaBprop2"));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(1)));
+ EXPECT_THAT(schema_c_iterator.GetCurrentNestedIndexable(), IsFalse());
+
+ EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(),
+ Eq("schemaCprop1.schemaAprop2"));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_a.properties(1)));
+ EXPECT_THAT(schema_c_iterator.GetCurrentNestedIndexable(), IsFalse());
+
+ EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(), Eq("schemaCprop2"));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_c.properties(1)));
+ EXPECT_THAT(schema_c_iterator.GetCurrentNestedIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_c_iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+}
+
+TEST(SchemaPropertyIteratorTest, MultipleCycles) {
+ std::string schema_a = "A";
+ std::string schema_b = "B";
+ std::string schema_c = "C";
+ std::string schema_d = "D";
+
+ // Create schema with D <-> A -> B -> C -> A -> B -> C -> A...
+ // Schema type A has two cycles: A-B-C-A and A-D-A
+ SchemaTypeConfigProto schema_type_config_a =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_a)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("schemaAprop1")
+ .SetDataTypeDocument(
+ schema_b, /*index_nested_properties=*/true))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schemaAprop2")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("schemaAprop3")
+ .SetDataTypeDocument(
+ schema_d, /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto schema_type_config_b =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_b)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("schemaBprop1")
+ .SetDataTypeDocument(
+ schema_c, /*index_nested_properties=*/true))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schemaBprop2")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .Build();
+ SchemaTypeConfigProto schema_type_config_c =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_c)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("schemaCprop1")
+ .SetDataTypeDocument(
+ schema_a, /*index_nested_properties=*/false))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schemaCprop2")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .Build();
+ SchemaTypeConfigProto schema_type_config_d =
+ SchemaTypeConfigBuilder()
+ .SetType(schema_d)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("schemaDprop1")
+ .SetDataTypeDocument(
+ schema_a, /*index_nested_properties=*/false))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("schemaDprop2")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .Build();
+
+ SchemaUtil::TypeConfigMap type_config_map = {
+ {schema_a, schema_type_config_a},
+ {schema_b, schema_type_config_b},
+ {schema_c, schema_type_config_c},
+ {schema_d, schema_type_config_d}};
+
+ // Order of iteration for schema A:
+ // {"schemaAprop1.schemaBprop1.schemaCprop2", "schemaAprop1.schemaBprop2",
+ // "schemaAprop2", "schemaAprop3.schemaDprop2"}, all indexable
+ SchemaPropertyIterator schema_a_iterator(schema_type_config_a,
+ type_config_map);
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop1.schemaBprop1.schemaCprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_c.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentNestedIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop1.schemaBprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentNestedIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(), Eq("schemaAprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_a.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentNestedIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(),
+ Eq("schemaAprop3.schemaDprop2"));
+ EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_d.properties(1)));
+ EXPECT_THAT(schema_a_iterator.GetCurrentNestedIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_a_iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ // Order of iteration for schema B:
+ // {"schemaBprop1.schemaCprop1.schemaAprop2",
+ // "schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop2",
+ // "schemaBprop1.schemaCprop2", "schemaBprop2"}
+ //
+ // Indexable properties: {"schemaBprop1.schemaCprop2", "schemaBprop2"}
+ SchemaPropertyIterator schema_b_iterator(schema_type_config_b,
+ type_config_map);
+
+ EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+ Eq("schemaBprop1.schemaCprop1.schemaAprop2"));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_a.properties(1)));
+ EXPECT_THAT(schema_b_iterator.GetCurrentNestedIndexable(), IsFalse());
+
+ EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+ Eq("schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop2"));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_d.properties(1)));
+ EXPECT_THAT(schema_b_iterator.GetCurrentNestedIndexable(), IsFalse());
+
+ EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(),
+ Eq("schemaBprop1.schemaCprop2"));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_c.properties(1)));
+ EXPECT_THAT(schema_b_iterator.GetCurrentNestedIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_b_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(), Eq("schemaBprop2"));
+ EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(1)));
+ EXPECT_THAT(schema_b_iterator.GetCurrentNestedIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_b_iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ // Order of iteration for schema C:
+ // {"schemaCprop1.schemaAprop1.schemaBprop2", "schemaCprop1.schemaAprop2",
+ // "schemaCprop1.schemaAprop3.schemaDprop2", "schemaCprop2"}
+ //
+ // Indexable properties: {"schemaCprop2"}
+ SchemaPropertyIterator schema_c_iterator(schema_type_config_c,
+ type_config_map);
+
+ EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(),
+ Eq("schemaCprop1.schemaAprop1.schemaBprop2"));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(1)));
+ EXPECT_THAT(schema_c_iterator.GetCurrentNestedIndexable(), IsFalse());
+
+ EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(),
+ Eq("schemaCprop1.schemaAprop2"));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_a.properties(1)));
+ EXPECT_THAT(schema_c_iterator.GetCurrentNestedIndexable(), IsFalse());
+
+ EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(),
+ Eq("schemaCprop1.schemaAprop3.schemaDprop2"));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_d.properties(1)));
+ EXPECT_THAT(schema_c_iterator.GetCurrentNestedIndexable(), IsFalse());
+
+ EXPECT_THAT(schema_c_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(), Eq("schemaCprop2"));
+ EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_c.properties(1)));
+ EXPECT_THAT(schema_c_iterator.GetCurrentNestedIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_c_iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ // Order of iteration for schema D:
+ // {"schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop2",
+ // "schemaDprop1.schemaAprop1.schemaBprop2", "schemaDprop1.schemaAprop2",
+ // "schemaDprop2"}
+ //
+ // Indexable properties: {"schemaDprop2"}
+ SchemaPropertyIterator schema_d_iterator(schema_type_config_d,
+ type_config_map);
+
+ EXPECT_THAT(schema_d_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyPath(),
+ Eq("schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop2"));
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_c.properties(1)));
+ EXPECT_THAT(schema_d_iterator.GetCurrentNestedIndexable(), IsFalse());
+
+ EXPECT_THAT(schema_d_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyPath(),
+ Eq("schemaDprop1.schemaAprop1.schemaBprop2"));
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_b.properties(1)));
+ EXPECT_THAT(schema_d_iterator.GetCurrentNestedIndexable(), IsFalse());
+
+ EXPECT_THAT(schema_d_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyPath(),
+ Eq("schemaDprop1.schemaAprop2"));
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_a.properties(1)));
+ EXPECT_THAT(schema_d_iterator.GetCurrentNestedIndexable(), IsFalse());
+
+ EXPECT_THAT(schema_d_iterator.Advance(), IsOk());
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyPath(), Eq("schemaDprop2"));
+ EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(),
+ EqualsProto(schema_type_config_d.properties(1)));
+ EXPECT_THAT(schema_d_iterator.GetCurrentNestedIndexable(), IsTrue());
+
+ EXPECT_THAT(schema_d_iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+}
+
} // namespace
} // namespace lib
diff --git a/icing/schema/schema-store.cc b/icing/schema/schema-store.cc
index 065157e..bcc7c2c 100644
--- a/icing/schema/schema-store.cc
+++ b/icing/schema/schema-store.cc
@@ -15,11 +15,14 @@
#include "icing/schema/schema-store.h"
#include <algorithm>
+#include <cinttypes>
#include <cstdint>
+#include <limits>
#include <memory>
#include <string>
#include <string_view>
#include <unordered_map>
+#include <unordered_set>
#include <utility>
#include <vector>
@@ -30,12 +33,16 @@
#include "icing/file/destructible-directory.h"
#include "icing/file/file-backed-proto.h"
#include "icing/file/filesystem.h"
+#include "icing/file/version-util.h"
#include "icing/proto/debug.pb.h"
#include "icing/proto/document.pb.h"
#include "icing/proto/logging.pb.h"
#include "icing/proto/schema.pb.h"
+#include "icing/proto/search.pb.h"
#include "icing/proto/storage.pb.h"
+#include "icing/schema/backup-schema-producer.h"
#include "icing/schema/joinable-property.h"
+#include "icing/schema/property-util.h"
#include "icing/schema/schema-type-manager.h"
#include "icing/schema/schema-util.h"
#include "icing/schema/section.h"
@@ -52,6 +59,7 @@ namespace {
constexpr char kSchemaStoreHeaderFilename[] = "schema_store_header";
constexpr char kSchemaFilename[] = "schema.pb";
+constexpr char kOverlaySchemaFilename[] = "overlay_schema.pb";
constexpr char kSchemaTypeMapperFilename[] = "schema_type_mapper";
// A DynamicTrieKeyMapper stores its data across 3 arrays internally. Giving
@@ -59,15 +67,19 @@ constexpr char kSchemaTypeMapperFilename[] = "schema_type_mapper";
// 384KiB.
constexpr int32_t kSchemaTypeMapperMaxSize = 3 * 128 * 1024; // 384 KiB
-const std::string MakeHeaderFilename(const std::string& base_dir) {
+std::string MakeHeaderFilename(const std::string& base_dir) {
return absl_ports::StrCat(base_dir, "/", kSchemaStoreHeaderFilename);
}
-const std::string MakeSchemaFilename(const std::string& base_dir) {
+std::string MakeSchemaFilename(const std::string& base_dir) {
return absl_ports::StrCat(base_dir, "/", kSchemaFilename);
}
-const std::string MakeSchemaTypeMapperFilename(const std::string& base_dir) {
+std::string MakeOverlaySchemaFilename(const std::string& base_dir) {
+ return absl_ports::StrCat(base_dir, "/", kOverlaySchemaFilename);
+}
+
+std::string MakeSchemaTypeMapperFilename(const std::string& base_dir) {
return absl_ports::StrCat(base_dir, "/", kSchemaTypeMapperFilename);
}
@@ -108,6 +120,61 @@ std::unordered_set<SchemaTypeId> SchemaTypeIdsChanged(
} // namespace
+/* static */ libtextclassifier3::StatusOr<SchemaStore::Header>
+SchemaStore::Header::Read(const Filesystem* filesystem,
+ const std::string& path) {
+ Header header;
+ ScopedFd sfd(filesystem->OpenForRead(path.c_str()));
+ if (!sfd.is_valid()) {
+ return absl_ports::NotFoundError("SchemaStore header doesn't exist");
+ }
+
+ // If file is sizeof(LegacyHeader), then it must be LegacyHeader.
+ int64_t file_size = filesystem->GetFileSize(sfd.get());
+ if (file_size == sizeof(LegacyHeader)) {
+ LegacyHeader legacy_header;
+ if (!filesystem->Read(path.c_str(), &legacy_header,
+ sizeof(legacy_header))) {
+ return absl_ports::InternalError(
+ absl_ports::StrCat("Couldn't read: ", path));
+ }
+ if (legacy_header.magic != Header::kMagic) {
+ return absl_ports::InternalError(
+ absl_ports::StrCat("Invalid header kMagic for file: ", path));
+ }
+ header.set_checksum(legacy_header.checksum);
+ } else if (file_size == sizeof(Header)) {
+ if (!filesystem->Read(path.c_str(), &header, sizeof(header))) {
+ return absl_ports::InternalError(
+ absl_ports::StrCat("Couldn't read: ", path));
+ }
+ if (header.magic() != Header::kMagic) {
+ return absl_ports::InternalError(
+ absl_ports::StrCat("Invalid header kMagic for file: ", path));
+ }
+ } else {
+ int legacy_header_size = sizeof(LegacyHeader);
+ int header_size = sizeof(Header);
+ return absl_ports::InternalError(IcingStringUtil::StringPrintf(
+ "Unexpected header size %" PRId64 ". Expected %d or %d", file_size,
+ legacy_header_size, header_size));
+ }
+ return header;
+}
+
+libtextclassifier3::Status SchemaStore::Header::Write(
+ const Filesystem* filesystem, const std::string& path) {
+ ScopedFd scoped_fd(filesystem->OpenForWrite(path.c_str()));
+ // This should overwrite the header.
+ if (!scoped_fd.is_valid() ||
+ !filesystem->Write(scoped_fd.get(), this, sizeof(*this)) ||
+ !filesystem->DataSync(scoped_fd.get())) {
+ return absl_ports::InternalError(
+ absl_ports::StrCat("Failed to write SchemaStore header: ", path));
+ }
+ return libtextclassifier3::Status::OK;
+}
+
libtextclassifier3::StatusOr<std::unique_ptr<SchemaStore>> SchemaStore::Create(
const Filesystem* filesystem, const std::string& base_dir,
const Clock* clock, InitializeStatsProto* initialize_stats) {
@@ -140,6 +207,106 @@ libtextclassifier3::StatusOr<std::unique_ptr<SchemaStore>> SchemaStore::Create(
return schema_store;
}
+/* static */ libtextclassifier3::Status SchemaStore::DiscardOverlaySchema(
+ const Filesystem* filesystem, const std::string& base_dir, Header& header) {
+ std::string header_filename = MakeHeaderFilename(base_dir);
+ if (header.overlay_created()) {
+ header.SetOverlayInfo(
+ /*overlay_created=*/false,
+ /*min_overlay_version_compatibility=*/ std::numeric_limits<
+ int32_t>::max());
+ ICING_RETURN_IF_ERROR(header.Write(filesystem, header_filename));
+ }
+ std::string schema_overlay_filename = MakeOverlaySchemaFilename(base_dir);
+ if (!filesystem->DeleteFile(schema_overlay_filename.c_str())) {
+ return absl_ports::InternalError(
+ "Unable to delete stale schema overlay file.");
+ }
+ return libtextclassifier3::Status::OK;
+}
+
+/* static */ libtextclassifier3::Status SchemaStore::MigrateSchema(
+ const Filesystem* filesystem, const std::string& base_dir,
+ version_util::StateChange version_state_change, int32_t new_version) {
+ if (!filesystem->DirectoryExists(base_dir.c_str())) {
+ // Situations when schema store directory doesn't exist:
+ // - Initializing new Icing instance: don't have to do anything now. The
+ // directory will be created later.
+ // - Lose schema store: there is nothing we can do now. The logic will be
+ // handled later by initializing.
+ //
+ // Therefore, just simply return OK here.
+ return libtextclassifier3::Status::OK;
+ }
+
+ std::string overlay_schema_filename = MakeOverlaySchemaFilename(base_dir);
+ if (!filesystem->FileExists(overlay_schema_filename.c_str())) {
+ // The overlay doesn't exist. So there should be nothing particularly
+ // interesting to worry about.
+ return libtextclassifier3::Status::OK;
+ }
+
+ std::string header_filename = MakeHeaderFilename(base_dir);
+ libtextclassifier3::StatusOr<Header> header_or;
+ switch (version_state_change) {
+ // No necessary actions for normal upgrades or no version change. The data
+ // that was produced by the previous version is fully compatible with this
+ // version and there's no stale data for us to clean up.
+ // The same is true for a normal rollforward. A normal rollforward implies
+ // that the previous version was one that understood the concept of the
+ // overlay schema and would have already discarded it if it was unusable.
+ case version_util::StateChange::kVersionZeroUpgrade:
+ // fallthrough
+ case version_util::StateChange::kUpgrade:
+ // fallthrough
+ case version_util::StateChange::kRollForward:
+ // fallthrough
+ case version_util::StateChange::kCompatible:
+ return libtextclassifier3::Status::OK;
+ case version_util::StateChange::kVersionZeroRollForward:
+ // We've rolled forward. The schema overlay file, if it exists, is
+ // possibly stale. We must throw it out.
+ header_or = Header::Read(filesystem, header_filename);
+ if (!header_or.ok()) {
+ return header_or.status();
+ }
+ return SchemaStore::DiscardOverlaySchema(filesystem, base_dir,
+ header_or.ValueOrDie());
+ case version_util::StateChange::kRollBack:
+ header_or = Header::Read(filesystem, header_filename);
+ if (!header_or.ok()) {
+ return header_or.status();
+ }
+ if (header_or.ValueOrDie().min_overlay_version_compatibility() <=
+ new_version) {
+ // We've been rolled back, but the overlay schema claims that it
+ // supports this version. So we can safely return.
+ return libtextclassifier3::Status::OK;
+ }
+ // We've been rolled back to a version that the overlay schema doesn't
+ // support. We must throw it out.
+ return SchemaStore::DiscardOverlaySchema(filesystem, base_dir,
+ header_or.ValueOrDie());
+ case version_util::StateChange::kUndetermined:
+ // It's not clear what version we're on, but the base schema should always
+ // be safe to use. Throw out the overlay.
+ header_or = Header::Read(filesystem, header_filename);
+ if (!header_or.ok()) {
+ return header_or.status();
+ }
+ return SchemaStore::DiscardOverlaySchema(filesystem, base_dir,
+ header_or.ValueOrDie());
+ }
+ return libtextclassifier3::Status::OK;
+}
+
+/* static */ libtextclassifier3::Status SchemaStore::DiscardDerivedFiles(
+ const Filesystem* filesystem, const std::string& base_dir) {
+ // Schema type mapper
+ return DynamicTrieKeyMapper<SchemaTypeId>::Delete(
+ *filesystem, MakeSchemaTypeMapperFilename(base_dir));
+}
+
SchemaStore::SchemaStore(const Filesystem* filesystem, std::string base_dir,
const Clock* clock)
: filesystem_(filesystem),
@@ -158,6 +325,7 @@ SchemaStore::~SchemaStore() {
}
libtextclassifier3::Status SchemaStore::Initialize(SchemaProto new_schema) {
+ ICING_RETURN_IF_ERROR(LoadSchema());
if (!absl_ports::IsNotFound(GetSchema().status())) {
return absl_ports::FailedPreconditionError(
"Incorrectly tried to initialize schema store with a new schema, when "
@@ -165,11 +333,13 @@ libtextclassifier3::Status SchemaStore::Initialize(SchemaProto new_schema) {
}
ICING_RETURN_IF_ERROR(schema_file_->Write(
std::make_unique<SchemaProto>(std::move(new_schema))));
- return InitializeInternal(/*initialize_stats=*/nullptr);
+ return InitializeInternal(/*create_overlay_if_necessary=*/true,
+ /*initialize_stats=*/nullptr);
}
libtextclassifier3::Status SchemaStore::Initialize(
InitializeStatsProto* initialize_stats) {
+ ICING_RETURN_IF_ERROR(LoadSchema());
auto schema_proto_or = GetSchema();
if (absl_ports::IsNotFound(schema_proto_or.status())) {
// Don't have an existing schema proto, that's fine
@@ -178,11 +348,69 @@ libtextclassifier3::Status SchemaStore::Initialize(
// Real error when trying to read the existing schema
return schema_proto_or.status();
}
- return InitializeInternal(initialize_stats);
+ return InitializeInternal(/*create_overlay_if_necessary=*/false,
+ initialize_stats);
+}
+
+libtextclassifier3::Status SchemaStore::LoadSchema() {
+ libtextclassifier3::StatusOr<Header> header_or =
+ Header::Read(filesystem_, MakeHeaderFilename(base_dir_));
+ bool header_exists = false;
+ if (!header_or.ok() && !absl_ports::IsNotFound(header_or.status())) {
+ return header_or.status();
+ } else if (!header_or.ok()) {
+ header_ = std::make_unique<Header>();
+ } else {
+ header_exists = true;
+ header_ = std::make_unique<Header>(std::move(header_or).ValueOrDie());
+ }
+
+ std::string overlay_schema_filename = MakeOverlaySchemaFilename(base_dir_);
+ bool overlay_schema_file_exists =
+ filesystem_->FileExists(overlay_schema_filename.c_str());
+
+ libtextclassifier3::Status base_schema_state = schema_file_->Read().status();
+ if (!base_schema_state.ok() && !absl_ports::IsNotFound(base_schema_state)) {
+ return base_schema_state;
+ }
+
+ // There are three valid cases:
+ // 1. Everything is missing. This is an empty schema store.
+ if (!base_schema_state.ok() && !overlay_schema_file_exists &&
+ !header_exists) {
+ return libtextclassifier3::Status::OK;
+ }
+
+ // 2. There never was a overlay schema. The header exists, the base schema
+ // exists and the header says the overlay schema shouldn't exist
+ if (base_schema_state.ok() && !overlay_schema_file_exists && header_exists &&
+ !header_->overlay_created()) {
+ // Nothing else to do. Just return safely.
+ return libtextclassifier3::Status::OK;
+ }
+
+ // 3. There is an overlay schema and a base schema and a header. The header
+ // says that the overlay schema should exist.
+ if (base_schema_state.ok() && overlay_schema_file_exists && header_exists &&
+ header_->overlay_created()) {
+ overlay_schema_file_ = std::make_unique<FileBackedProto<SchemaProto>>(
+ *filesystem_, MakeOverlaySchemaFilename(base_dir_));
+ return libtextclassifier3::Status::OK;
+ }
+
+ // Something has gone wrong. We've lost part of the schema ground truth.
+ // Return an error.
+ bool overlay_created = header_->overlay_created();
+ bool base_schema_exists = base_schema_state.ok();
+ return absl_ports::InternalError(IcingStringUtil::StringPrintf(
+ "Unable to properly load schema. Header {exists:%d, overlay_created:%d}, "
+ "base schema exists: %d, overlay_schema_exists: %d",
+ header_exists, overlay_created, base_schema_exists,
+ overlay_schema_file_exists));
}
libtextclassifier3::Status SchemaStore::InitializeInternal(
- InitializeStatsProto* initialize_stats) {
+ bool create_overlay_if_necessary, InitializeStatsProto* initialize_stats) {
if (!InitializeDerivedFiles().ok()) {
ICING_VLOG(3)
<< "Couldn't find derived files or failed to initialize them, "
@@ -192,7 +420,7 @@ libtextclassifier3::Status SchemaStore::InitializeInternal(
initialize_stats->set_schema_store_recovery_cause(
InitializeStatsProto::IO_ERROR);
}
- ICING_RETURN_IF_ERROR(RegenerateDerivedFiles());
+ ICING_RETURN_IF_ERROR(RegenerateDerivedFiles(create_overlay_if_necessary));
if (initialize_stats != nullptr) {
initialize_stats->set_schema_store_recovery_latency_ms(
regenerate_timer->GetElapsedMilliseconds());
@@ -208,24 +436,6 @@ libtextclassifier3::Status SchemaStore::InitializeInternal(
}
libtextclassifier3::Status SchemaStore::InitializeDerivedFiles() {
- if (!HeaderExists()) {
- // Without a header, we don't know if things are consistent between each
- // other so the caller should just regenerate everything from ground truth.
- return absl_ports::InternalError("SchemaStore header doesn't exist");
- }
-
- SchemaStore::Header header;
- if (!filesystem_->Read(MakeHeaderFilename(base_dir_).c_str(), &header,
- sizeof(header))) {
- return absl_ports::InternalError(
- absl_ports::StrCat("Couldn't read: ", MakeHeaderFilename(base_dir_)));
- }
-
- if (header.magic != SchemaStore::Header::kMagic) {
- return absl_ports::InternalError(absl_ports::StrCat(
- "Invalid header kMagic for file: ", MakeHeaderFilename(base_dir_)));
- }
-
ICING_ASSIGN_OR_RETURN(
schema_type_mapper_,
DynamicTrieKeyMapper<SchemaTypeId>::Create(
@@ -233,78 +443,105 @@ libtextclassifier3::Status SchemaStore::InitializeDerivedFiles() {
kSchemaTypeMapperMaxSize));
ICING_ASSIGN_OR_RETURN(Crc32 checksum, ComputeChecksum());
- if (checksum.Get() != header.checksum) {
+ if (checksum.Get() != header_->checksum()) {
return absl_ports::InternalError(
"Combined checksum of SchemaStore was inconsistent");
}
- // Update our in-memory data structures
- type_config_map_.clear();
- ICING_ASSIGN_OR_RETURN(const SchemaProto* schema_proto, GetSchema());
- for (const SchemaTypeConfigProto& type_config : schema_proto->types()) {
- // Update our type_config_map_
- type_config_map_.emplace(type_config.schema_type(), type_config);
- }
- ICING_ASSIGN_OR_RETURN(
- schema_type_manager_,
- SchemaTypeManager::Create(type_config_map_, schema_type_mapper_.get()));
-
+ BuildInMemoryCache();
return libtextclassifier3::Status::OK;
}
-libtextclassifier3::Status SchemaStore::RegenerateDerivedFiles() {
+libtextclassifier3::Status SchemaStore::RegenerateDerivedFiles(
+ bool create_overlay_if_necessary) {
ICING_ASSIGN_OR_RETURN(const SchemaProto* schema_proto, GetSchema());
ICING_RETURN_IF_ERROR(ResetSchemaTypeMapper());
- type_config_map_.clear();
for (const SchemaTypeConfigProto& type_config : schema_proto->types()) {
- // Update our type_config_map_
- type_config_map_.emplace(type_config.schema_type(), type_config);
-
// Assign a SchemaTypeId to the type
ICING_RETURN_IF_ERROR(schema_type_mapper_->Put(
type_config.schema_type(), schema_type_mapper_->num_keys()));
}
-
- ICING_ASSIGN_OR_RETURN(
- schema_type_manager_,
- SchemaTypeManager::Create(type_config_map_, schema_type_mapper_.get()));
+ BuildInMemoryCache();
+
+ if (create_overlay_if_necessary) {
+ ICING_ASSIGN_OR_RETURN(
+ BackupSchemaProducer producer,
+ BackupSchemaProducer::Create(*schema_proto,
+ schema_type_manager_->section_manager()));
+
+ if (producer.is_backup_necessary()) {
+ SchemaProto base_schema = std::move(producer).Produce();
+
+ // The overlay schema should be written to the overlay file location.
+ overlay_schema_file_ = std::make_unique<FileBackedProto<SchemaProto>>(
+ *filesystem_, MakeOverlaySchemaFilename(base_dir_));
+ auto schema_ptr = std::make_unique<SchemaProto>(std::move(*schema_proto));
+ ICING_RETURN_IF_ERROR(overlay_schema_file_->Write(std::move(schema_ptr)));
+
+ // The base schema should be written to the original file
+ auto base_schema_ptr =
+ std::make_unique<SchemaProto>(std::move(base_schema));
+ ICING_RETURN_IF_ERROR(schema_file_->Write(std::move(base_schema_ptr)));
+
+ header_->SetOverlayInfo(
+ /*overlay_created=*/true,
+ /*min_overlay_version_compatibility=*/version_util::kVersionOne);
+ // Rebuild in memory data - references to the old schema will be invalid
+ // now.
+ BuildInMemoryCache();
+ }
+ }
// Write the header
ICING_ASSIGN_OR_RETURN(Crc32 checksum, ComputeChecksum());
- ICING_RETURN_IF_ERROR(UpdateHeader(checksum));
-
- return libtextclassifier3::Status::OK;
-}
-
-bool SchemaStore::HeaderExists() {
- if (!filesystem_->FileExists(MakeHeaderFilename(base_dir_).c_str())) {
- return false;
- }
-
- int64_t file_size =
- filesystem_->GetFileSize(MakeHeaderFilename(base_dir_).c_str());
-
- // If it's been truncated to size 0 before, we consider it to be a new file
- return file_size != 0 && file_size != Filesystem::kBadFileSize;
+ header_->set_checksum(checksum.Get());
+ return header_->Write(filesystem_, MakeHeaderFilename(base_dir_));
}
-libtextclassifier3::Status SchemaStore::UpdateHeader(const Crc32& checksum) {
- // Write the header
- SchemaStore::Header header;
- header.magic = SchemaStore::Header::kMagic;
- header.checksum = checksum.Get();
+libtextclassifier3::Status SchemaStore::BuildInMemoryCache() {
+ ICING_ASSIGN_OR_RETURN(const SchemaProto* schema_proto, GetSchema());
+ ICING_ASSIGN_OR_RETURN(
+ SchemaUtil::InheritanceMap inheritance_map,
+ SchemaUtil::BuildTransitiveInheritanceGraph(*schema_proto));
- ScopedFd scoped_fd(
- filesystem_->OpenForWrite(MakeHeaderFilename(base_dir_).c_str()));
- // This should overwrite the header.
- if (!scoped_fd.is_valid() ||
- !filesystem_->Write(scoped_fd.get(), &header, sizeof(header)) ||
- !filesystem_->DataSync(scoped_fd.get())) {
- return absl_ports::InternalError(absl_ports::StrCat(
- "Failed to write SchemaStore header: ", MakeHeaderFilename(base_dir_)));
+ reverse_schema_type_mapper_.clear();
+ type_config_map_.clear();
+ schema_subtype_id_map_.clear();
+ for (const SchemaTypeConfigProto& type_config : schema_proto->types()) {
+ std::string_view type_name = type_config.schema_type();
+ ICING_ASSIGN_OR_RETURN(SchemaTypeId type_id,
+ schema_type_mapper_->Get(type_name));
+
+ // Build reverse_schema_type_mapper_
+ reverse_schema_type_mapper_.insert({type_id, std::string(type_name)});
+
+ // Build type_config_map_
+ type_config_map_.insert({std::string(type_name), type_config});
+
+ // Build schema_subtype_id_map_
+ std::unordered_set<SchemaTypeId>& subtype_id_set =
+ schema_subtype_id_map_[type_id];
+ // Find all child types
+ auto child_types_names = inheritance_map.find(type_name);
+ if (child_types_names != inheritance_map.end()) {
+ subtype_id_set.reserve(child_types_names->second.size() + 1);
+ for (const auto& [child_type_name, is_direct_child] :
+ child_types_names->second) {
+ ICING_ASSIGN_OR_RETURN(SchemaTypeId child_type_id,
+ schema_type_mapper_->Get(child_type_name));
+ subtype_id_set.insert(child_type_id);
+ }
+ }
+ // Every type is a subtype of itself.
+ subtype_id_set.insert(type_id);
}
+
+ // Build schema_type_manager_
+ ICING_ASSIGN_OR_RETURN(
+ schema_type_manager_,
+ SchemaTypeManager::Create(type_config_map_, schema_type_mapper_.get()));
return libtextclassifier3::Status::OK;
}
@@ -331,7 +568,8 @@ libtextclassifier3::Status SchemaStore::ResetSchemaTypeMapper() {
}
libtextclassifier3::StatusOr<Crc32> SchemaStore::ComputeChecksum() const {
- auto schema_proto_or = GetSchema();
+ // Base schema checksum
+ auto schema_proto_or = schema_file_->Read();
if (absl_ports::IsNotFound(schema_proto_or.status())) {
return Crc32();
}
@@ -339,11 +577,23 @@ libtextclassifier3::StatusOr<Crc32> SchemaStore::ComputeChecksum() const {
Crc32 schema_checksum;
schema_checksum.Append(schema_proto->SerializeAsString());
+ Crc32 overlay_schema_checksum;
+ if (overlay_schema_file_ != nullptr) {
+ auto schema_proto_or = schema_file_->Read();
+ if (schema_proto_or.ok()) {
+ ICING_ASSIGN_OR_RETURN(schema_proto, schema_proto_or);
+ overlay_schema_checksum.Append(schema_proto->SerializeAsString());
+ }
+ }
+
ICING_ASSIGN_OR_RETURN(Crc32 schema_type_mapper_checksum,
schema_type_mapper_->ComputeChecksum());
Crc32 total_checksum;
total_checksum.Append(std::to_string(schema_checksum.Get()));
+ if (overlay_schema_file_ != nullptr) {
+ total_checksum.Append(std::to_string(overlay_schema_checksum.Get()));
+ }
total_checksum.Append(std::to_string(schema_type_mapper_checksum.Get()));
return total_checksum;
@@ -351,6 +601,9 @@ libtextclassifier3::StatusOr<Crc32> SchemaStore::ComputeChecksum() const {
libtextclassifier3::StatusOr<const SchemaProto*> SchemaStore::GetSchema()
const {
+ if (overlay_schema_file_ != nullptr) {
+ return overlay_schema_file_->Read();
+ }
return schema_file_->Read();
}
@@ -360,15 +613,19 @@ libtextclassifier3::StatusOr<const SchemaProto*> SchemaStore::GetSchema()
// SetSchema(SchemaProto&& new_schema)
libtextclassifier3::StatusOr<const SchemaStore::SetSchemaResult>
SchemaStore::SetSchema(const SchemaProto& new_schema,
- bool ignore_errors_and_delete_documents) {
- return SetSchema(SchemaProto(new_schema), ignore_errors_and_delete_documents);
+ bool ignore_errors_and_delete_documents,
+ bool allow_circular_schema_definitions) {
+ return SetSchema(SchemaProto(new_schema), ignore_errors_and_delete_documents,
+ allow_circular_schema_definitions);
}
libtextclassifier3::StatusOr<const SchemaStore::SetSchemaResult>
SchemaStore::SetSchema(SchemaProto&& new_schema,
- bool ignore_errors_and_delete_documents) {
- ICING_ASSIGN_OR_RETURN(SchemaUtil::DependentMap new_dependent_map,
- SchemaUtil::Validate(new_schema));
+ bool ignore_errors_and_delete_documents,
+ bool allow_circular_schema_definitions) {
+ ICING_ASSIGN_OR_RETURN(
+ SchemaUtil::DependentMap new_dependent_map,
+ SchemaUtil::Validate(new_schema, allow_circular_schema_definitions));
SetSchemaResult result;
@@ -493,6 +750,10 @@ libtextclassifier3::Status SchemaStore::ApplySchemaChange(
// Manually set them to the correct paths.
base_dir_ = std::move(old_base_dir);
schema_file_->SetSwappedFilepath(MakeSchemaFilename(base_dir_));
+ if (overlay_schema_file_ != nullptr) {
+ overlay_schema_file_->SetSwappedFilepath(
+ MakeOverlaySchemaFilename(base_dir_));
+ }
return libtextclassifier3::Status::OK;
}
@@ -515,6 +776,19 @@ libtextclassifier3::StatusOr<SchemaTypeId> SchemaStore::GetSchemaTypeId(
return schema_type_mapper_->Get(schema_type);
}
+libtextclassifier3::StatusOr<const std::unordered_set<SchemaTypeId>*>
+SchemaStore::GetSchemaTypeIdsWithChildren(std::string_view schema_type) const {
+ ICING_ASSIGN_OR_RETURN(SchemaTypeId schema_type_id,
+ GetSchemaTypeId(schema_type));
+ auto iter = schema_subtype_id_map_.find(schema_type_id);
+ if (iter == schema_subtype_id_map_.end()) {
+ // This should never happen, unless there is an inconsistency or IO error.
+ return absl_ports::InternalError(absl_ports::StrCat(
+ "Schema type '", schema_type, "' is not found in the subtype map."));
+ }
+ return &iter->second;
+}
+
libtextclassifier3::StatusOr<const SectionMetadata*>
SchemaStore::GetSectionMetadata(SchemaTypeId schema_type_id,
SectionId section_id) const {
@@ -551,9 +825,8 @@ libtextclassifier3::Status SchemaStore::PersistToDisk() {
ICING_RETURN_IF_ERROR(schema_type_mapper_->PersistToDisk());
// Write the header
ICING_ASSIGN_OR_RETURN(Crc32 checksum, ComputeChecksum());
- ICING_RETURN_IF_ERROR(UpdateHeader(checksum));
-
- return libtextclassifier3::Status::OK;
+ header_->set_checksum(checksum.Get());
+ return header_->Write(filesystem_, MakeHeaderFilename(base_dir_));
}
SchemaStoreStorageInfoProto SchemaStore::GetStorageInfo() const {
@@ -589,6 +862,50 @@ SchemaStore::GetSectionMetadata(const std::string& schema_type) const {
return schema_type_manager_->section_manager().GetMetadataList(schema_type);
}
+bool SchemaStore::IsPropertyDefinedInSchema(
+ SchemaTypeId schema_type_id, const std::string& property_path) const {
+ auto schema_name_itr = reverse_schema_type_mapper_.find(schema_type_id);
+ if (schema_name_itr == reverse_schema_type_mapper_.end()) {
+ return false;
+ }
+ const std::string* current_type_name = &schema_name_itr->second;
+
+ std::vector<std::string_view> property_path_parts =
+ property_util::SplitPropertyPathExpr(property_path);
+ for (int i = 0; i < property_path_parts.size(); ++i) {
+ auto type_config_itr = type_config_map_.find(*current_type_name);
+ if (type_config_itr == type_config_map_.end()) {
+ return false;
+ }
+ std::string_view property_name = property_path_parts.at(i);
+ const PropertyConfigProto* selected_property = nullptr;
+ for (const PropertyConfigProto& property :
+ type_config_itr->second.properties()) {
+ if (property.property_name() == property_name) {
+ selected_property = &property;
+ break;
+ }
+ }
+ if (selected_property == nullptr) {
+ return false;
+ }
+ if (i == property_path_parts.size() - 1) {
+ // We've found a property at the final part of the path.
+ return true;
+ }
+ if (selected_property->data_type() !=
+ PropertyConfigProto::DataType::DOCUMENT) {
+ // If this isn't final part of the path, but this property isn't a
+ // document, so we know that this path doesn't exist.
+ return false;
+ }
+ current_type_name = &selected_property->schema_type();
+ }
+
+ // We should never reach this point.
+ return false;
+}
+
libtextclassifier3::StatusOr<SchemaDebugInfoProto> SchemaStore::GetDebugInfo()
const {
SchemaDebugInfoProto debug_info;
@@ -601,5 +918,55 @@ libtextclassifier3::StatusOr<SchemaDebugInfoProto> SchemaStore::GetDebugInfo()
return debug_info;
}
+std::vector<SchemaStore::ExpandedTypePropertyMask>
+SchemaStore::ExpandTypePropertyMasks(
+ const google::protobuf::RepeatedPtrField<TypePropertyMask>& type_property_masks)
+ const {
+ std::unordered_map<SchemaTypeId, ExpandedTypePropertyMask> result_map;
+ for (const TypePropertyMask& type_field_mask : type_property_masks) {
+ if (type_field_mask.schema_type() == kSchemaTypeWildcard) {
+ ExpandedTypePropertyMask entry{type_field_mask.schema_type(),
+ /*paths=*/{}};
+ entry.paths.insert(type_field_mask.paths().begin(),
+ type_field_mask.paths().end());
+ result_map.insert({kInvalidSchemaTypeId, std::move(entry)});
+ } else {
+ auto schema_type_ids_or =
+ GetSchemaTypeIdsWithChildren(type_field_mask.schema_type());
+ // If we can't find the SchemaTypeIds, just throw it away
+ if (!schema_type_ids_or.ok()) {
+ continue;
+ }
+ const std::unordered_set<SchemaTypeId>* schema_type_ids =
+ schema_type_ids_or.ValueOrDie();
+ for (SchemaTypeId schema_type_id : *schema_type_ids) {
+ auto schema_type_name_iter =
+ reverse_schema_type_mapper_.find(schema_type_id);
+ if (schema_type_name_iter == reverse_schema_type_mapper_.end()) {
+ // This should never happen, unless there is an inconsistency or IO
+ // error.
+ ICING_LOG(ERROR) << "Got unknown schema type id: " << schema_type_id;
+ continue;
+ }
+
+ auto iter = result_map.find(schema_type_id);
+ if (iter == result_map.end()) {
+ ExpandedTypePropertyMask entry{schema_type_name_iter->second,
+ /*paths=*/{}};
+ iter = result_map.insert({schema_type_id, std::move(entry)}).first;
+ }
+ iter->second.paths.insert(type_field_mask.paths().begin(),
+ type_field_mask.paths().end());
+ }
+ }
+ }
+ std::vector<ExpandedTypePropertyMask> result;
+ result.reserve(result_map.size());
+ for (auto& entry : result_map) {
+ result.push_back(std::move(entry.second));
+ }
+ return result;
+}
+
} // namespace lib
} // namespace icing
diff --git a/icing/schema/schema-store.h b/icing/schema/schema-store.h
index 5ad714e..73d7848 100644
--- a/icing/schema/schema-store.h
+++ b/icing/schema/schema-store.h
@@ -16,20 +16,26 @@
#define ICING_SCHEMA_SCHEMA_STORE_H_
#include <cstdint>
+#include <cstring>
+#include <limits>
#include <memory>
#include <string>
#include <string_view>
+#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
#include "icing/file/file-backed-proto.h"
#include "icing/file/filesystem.h"
+#include "icing/file/version-util.h"
#include "icing/proto/debug.pb.h"
#include "icing/proto/document.pb.h"
#include "icing/proto/logging.pb.h"
#include "icing/proto/schema.pb.h"
+#include "icing/proto/search.pb.h"
#include "icing/proto/storage.pb.h"
#include "icing/schema/joinable-property.h"
#include "icing/schema/schema-type-manager.h"
@@ -50,9 +56,7 @@ namespace lib {
// should always call Get* from the SchemaStore.
class SchemaStore {
public:
- struct Header {
- static constexpr int32_t kMagic = 0x72650d0a;
-
+ struct LegacyHeader {
// Holds the magic as a quick sanity check against file corruption.
int32_t magic;
@@ -60,6 +64,63 @@ class SchemaStore {
uint32_t checksum;
};
+ class Header {
+ public:
+ static constexpr int32_t kMagic = 0x72650d0a;
+
+ explicit Header()
+ : magic_(kMagic),
+ checksum_(0),
+ overlay_created_(false),
+ min_overlay_version_compatibility_(
+ std::numeric_limits<int32_t>::max()) {
+ memset(padding, 0, kPaddingSize);
+ }
+
+ // RETURNS:
+ // - On success, a valid Header instance
+ // - NOT_FOUND if header file doesn't exist
+ // - INTERNAL if unable to read header
+ static libtextclassifier3::StatusOr<Header> Read(
+ const Filesystem* filesystem, const std::string& path);
+
+ libtextclassifier3::Status Write(const Filesystem* filesystem,
+ const std::string& path);
+
+ int32_t magic() const { return magic_; }
+
+ uint32_t checksum() const { return checksum_; }
+ void set_checksum(uint32_t checksum) { checksum_ = checksum; }
+
+ bool overlay_created() const { return overlay_created_; }
+
+ int32_t min_overlay_version_compatibility() const {
+ return min_overlay_version_compatibility_;
+ }
+
+ void SetOverlayInfo(bool overlay_created,
+ int32_t min_overlay_version_compatibility) {
+ overlay_created_ = overlay_created;
+ min_overlay_version_compatibility_ = min_overlay_version_compatibility;
+ }
+
+ private:
+ // Holds the magic as a quick sanity check against file corruption.
+ int32_t magic_;
+
+ // Checksum of the SchemaStore's sub-component's checksums.
+ uint32_t checksum_;
+
+ bool overlay_created_;
+
+ int32_t min_overlay_version_compatibility_;
+
+ static constexpr int kPaddingSize = 1008;
+ // Padding exists just to reserve space for additional values.
+ uint8_t padding[kPaddingSize];
+ };
+ static_assert(sizeof(Header) == 1024);
+
// Holds information on what may have been affected by the new schema. This is
// generally data that other classes may depend on from the SchemaStore,
// so that we can know if we should go update those classes as well.
@@ -121,6 +182,13 @@ class SchemaStore {
std::unordered_set<std::string> schema_types_join_incompatible_by_name;
};
+ struct ExpandedTypePropertyMask {
+ std::string schema_type;
+ std::unordered_set<std::string> paths;
+ };
+
+ static constexpr std::string_view kSchemaTypeWildcard = "*";
+
// Factory function to create a SchemaStore which does not take ownership
// of any input components, and all pointers must refer to valid objects that
// outlive the created SchemaStore instance. The base_dir must already exist.
@@ -137,6 +205,23 @@ class SchemaStore {
const Filesystem* filesystem, const std::string& base_dir,
const Clock* clock, InitializeStatsProto* initialize_stats = nullptr);
+ // Migrates schema files (backup v.s. new schema) according to version state
+ // change.
+ //
+ // Returns:
+ // OK on success or nothing to migrate
+ static libtextclassifier3::Status MigrateSchema(
+ const Filesystem* filesystem, const std::string& base_dir,
+ version_util::StateChange version_state_change, int32_t new_version);
+
+ // Discards all derived data in the schema store.
+ //
+ // Returns:
+ // OK on success or nothing to discard
+ // INTERNAL_ERROR on any I/O errors
+ static libtextclassifier3::Status DiscardDerivedFiles(
+ const Filesystem* filesystem, const std::string& base_dir);
+
SchemaStore(SchemaStore&&) = default;
SchemaStore& operator=(SchemaStore&&) = default;
@@ -168,10 +253,12 @@ class SchemaStore {
// INTERNAL_ERROR on any IO errors
libtextclassifier3::StatusOr<const SetSchemaResult> SetSchema(
const SchemaProto& new_schema,
- bool ignore_errors_and_delete_documents = false);
+ bool ignore_errors_and_delete_documents,
+ bool allow_circular_schema_definitions);
libtextclassifier3::StatusOr<const SetSchemaResult> SetSchema(
SchemaProto&& new_schema,
- bool ignore_errors_and_delete_documents = false);
+ bool ignore_errors_and_delete_documents,
+ bool allow_circular_schema_definitions);
// Get the SchemaTypeConfigProto of schema_type name.
//
@@ -193,6 +280,17 @@ class SchemaStore {
libtextclassifier3::StatusOr<SchemaTypeId> GetSchemaTypeId(
std::string_view schema_type) const;
+ // Similar to GetSchemaTypeId but will return a set of SchemaTypeId to also
+ // include child types.
+ //
+ // Returns:
+ // A set of SchemaTypeId on success
+ // FAILED_PRECONDITION if schema hasn't been set yet
+ // NOT_FOUND_ERROR if we don't know about the schema type
+ // INTERNAL_ERROR on IO error
+ libtextclassifier3::StatusOr<const std::unordered_set<SchemaTypeId>*>
+ GetSchemaTypeIdsWithChildren(std::string_view schema_type) const;
+
// Returns the SectionMetadata associated with the SectionId that's in the
// SchemaTypeId.
//
@@ -203,6 +301,11 @@ class SchemaStore {
libtextclassifier3::StatusOr<const SectionMetadata*> GetSectionMetadata(
SchemaTypeId schema_type_id, SectionId section_id) const;
+ // Returns true if a property is defined in the said schema, regardless of
+ // whether it is indexed or not.
+ bool IsPropertyDefinedInSchema(SchemaTypeId schema_type_id,
+ const std::string& property) const;
+
// Extracts all sections of different types from the given document and group
// them by type.
// - Each Section vector is sorted by section Id in ascending order. The
@@ -282,6 +385,23 @@ class SchemaStore {
// INTERNAL_ERROR on IO errors, crc compute error
libtextclassifier3::StatusOr<SchemaDebugInfoProto> GetDebugInfo() const;
+ // Expands the provided type_property_masks into a vector of
+ // ExpandedTypePropertyMasks to account for polymorphism. If both a parent
+ // type and one of its child type appears in the masks, the parent type's
+ // paths will be merged into the child's.
+ //
+ // For example, assume that we have two schema types A and B, and we have
+ // - A is the parent type of B
+ // - Paths of A: {P1, P2}
+ // - Paths of B: {P3}
+ //
+ // Then, we will have the following in the result.
+ // - Expanded paths of A: {P1, P2}
+ // - Expanded paths of B: {P1, P2, P3}
+ std::vector<ExpandedTypePropertyMask> ExpandTypePropertyMasks(
+ const google::protobuf::RepeatedPtrField<TypePropertyMask>& type_property_masks)
+ const;
+
private:
// Factory function to create a SchemaStore and set its schema. The created
// instance does not take ownership of any input components and all pointers
@@ -302,6 +422,15 @@ class SchemaStore {
explicit SchemaStore(const Filesystem* filesystem, std::string base_dir,
const Clock* clock);
+ // Deletes the overlay schema and ensures that the Header is correctly set.
+ //
+ // RETURNS:
+ // OK on success
+ // INTERNAL_ERROR on any IO errors
+ static libtextclassifier3::Status DiscardOverlaySchema(
+ const Filesystem* filesystem, const std::string& base_dir,
+ Header& header);
+
// Verifies that there is no error retrieving a previously set schema. Then
// initializes like normal.
//
@@ -325,7 +454,7 @@ class SchemaStore {
// OK on success
// INTERNAL_ERROR on IO error
libtextclassifier3::Status InitializeInternal(
- InitializeStatsProto* initialize_stats);
+ bool create_overlay_if_necessary, InitializeStatsProto* initialize_stats);
// Creates sub-components and verifies the integrity of each sub-component.
//
@@ -340,11 +469,16 @@ class SchemaStore {
// OK on success
// NOT_FOUND_ERROR if a schema proto has not been set
// INTERNAL_ERROR on any IO errors
- libtextclassifier3::Status RegenerateDerivedFiles();
+ libtextclassifier3::Status RegenerateDerivedFiles(
+ bool create_overlay_if_necessary);
- // Checks if the header exists already. This does not create the header file
- // if it doesn't exist.
- bool HeaderExists();
+ // Build type_config_map_, schema_subtype_id_map_, and schema_type_manager_.
+ //
+ // Returns:
+ // OK on success
+ // NOT_FOUND_ERROR if a schema proto has not been set
+ // INTERNAL_ERROR on any IO errors
+ libtextclassifier3::Status BuildInMemoryCache();
// Update and replace the header file. Creates the header file if it doesn't
// exist.
@@ -377,6 +511,15 @@ class SchemaStore {
: absl_ports::FailedPreconditionError("Schema not set yet.");
}
+ // Correctly loads the Header, schema_file_ and (if present) the
+ // overlay_schema_file_.
+ // RETURNS:
+ // - OK on success
+ // - INTERNAL if an IO error is encountered when reading the Header or
+ // schemas.
+ // Or an invalid schema configuration is present.
+ libtextclassifier3::Status LoadSchema();
+
const Filesystem* filesystem_;
std::string base_dir_;
const Clock* clock_;
@@ -389,17 +532,37 @@ class SchemaStore {
// Cached schema
std::unique_ptr<FileBackedProto<SchemaProto>> schema_file_;
+ // This schema holds the definition of any schema types that are not
+ // compatible with older versions of Icing code.
+ std::unique_ptr<FileBackedProto<SchemaProto>> overlay_schema_file_;
+
+ // Maps schema types to a densely-assigned unique id.
+ std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper_;
+
+ // Maps schema type ids to the corresponding schema type. This is an inverse
+ // map of schema_type_mapper_.
+ std::unordered_map<SchemaTypeId, std::string> reverse_schema_type_mapper_;
+
// A hash map of (type config name -> type config), allows faster lookup of
// type config in schema. The O(1) type config access makes schema-related and
// section-related operations faster.
SchemaUtil::TypeConfigMap type_config_map_;
- // Maps schema types to a densely-assigned unique id.
- std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper_;
+ // Maps from each type id to all of its subtype ids.
+ // T2 is a subtype of T1, if and only if one of the following conditions is
+ // met:
+ // - T2 is T1
+ // - T2 extends T1
+ // - There exists a type U, such that T2 is a subtype of U, and U is a subtype
+ // of T1
+ std::unordered_map<SchemaTypeId, std::unordered_set<SchemaTypeId>>
+ schema_subtype_id_map_;
// Manager of section (indexable property) and joinable property related
// metadata for all Schemas.
std::unique_ptr<const SchemaTypeManager> schema_type_manager_;
+
+ std::unique_ptr<Header> header_;
};
} // namespace lib
diff --git a/icing/schema/schema-store_test.cc b/icing/schema/schema-store_test.cc
index 4e2724f..3298b75 100644
--- a/icing/schema/schema-store_test.cc
+++ b/icing/schema/schema-store_test.cc
@@ -25,9 +25,11 @@
#include "icing/document-builder.h"
#include "icing/file/filesystem.h"
#include "icing/file/mock-filesystem.h"
+#include "icing/file/version-util.h"
#include "icing/portable/equals-proto.h"
#include "icing/proto/debug.pb.h"
#include "icing/proto/document.pb.h"
+#include "icing/proto/logging.pb.h"
#include "icing/proto/schema.pb.h"
#include "icing/proto/storage.pb.h"
#include "icing/proto/term.pb.h"
@@ -56,6 +58,7 @@ using ::testing::Not;
using ::testing::Pointee;
using ::testing::Return;
using ::testing::SizeIs;
+using ::testing::UnorderedElementsAre;
constexpr int64_t kDefaultTimestamp = 12345678;
@@ -114,7 +117,7 @@ TEST_F(SchemaStoreTest, SchemaStoreMoveConstructible) {
// Create an instance of SchemaStore.
SchemaProto schema =
SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder().SetType("TypeA").AddProperty(
+ .AddType(SchemaTypeConfigBuilder().SetType("type_a").AddProperty(
PropertyConfigBuilder()
.SetName("prop1")
.SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
@@ -125,7 +128,9 @@ TEST_F(SchemaStoreTest, SchemaStoreMoveConstructible) {
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
- ICING_ASSERT_OK(schema_store->SetSchema(schema));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(Crc32 expected_checksum,
schema_store->ComputeChecksum());
@@ -138,7 +143,7 @@ TEST_F(SchemaStoreTest, SchemaStoreMoveConstructible) {
SectionMetadata expected_metadata(/*id_in=*/0, TYPE_STRING, TOKENIZER_PLAIN,
TERM_MATCH_EXACT, NUMERIC_MATCH_UNKNOWN,
"prop1");
- EXPECT_THAT(move_constructed_schema_store.GetSectionMetadata("TypeA"),
+ EXPECT_THAT(move_constructed_schema_store.GetSectionMetadata("type_a"),
IsOkAndHolds(Pointee(ElementsAre(expected_metadata))));
}
@@ -146,7 +151,7 @@ TEST_F(SchemaStoreTest, SchemaStoreMoveAssignment) {
// Create an instance of SchemaStore.
SchemaProto schema1 =
SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder().SetType("TypeA").AddProperty(
+ .AddType(SchemaTypeConfigBuilder().SetType("type_a").AddProperty(
PropertyConfigBuilder()
.SetName("prop1")
.SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
@@ -157,14 +162,16 @@ TEST_F(SchemaStoreTest, SchemaStoreMoveAssignment) {
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
- ICING_ASSERT_OK(schema_store->SetSchema(schema1));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema1, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(Crc32 expected_checksum,
schema_store->ComputeChecksum());
// Construct another instance of SchemaStore
SchemaProto schema2 =
SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder().SetType("TypeB").AddProperty(
+ .AddType(SchemaTypeConfigBuilder().SetType("type_b").AddProperty(
PropertyConfigBuilder()
.SetName("prop2")
.SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
@@ -174,7 +181,9 @@ TEST_F(SchemaStoreTest, SchemaStoreMoveAssignment) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> move_assigned_schema_store,
SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
- ICING_ASSERT_OK(schema_store->SetSchema(schema2));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema2, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
// Move assign the first instance into the second one.
*move_assigned_schema_store = std::move(*schema_store);
@@ -185,7 +194,7 @@ TEST_F(SchemaStoreTest, SchemaStoreMoveAssignment) {
SectionMetadata expected_metadata(/*id_in=*/0, TYPE_STRING, TOKENIZER_PLAIN,
TERM_MATCH_EXACT, NUMERIC_MATCH_UNKNOWN,
"prop1");
- EXPECT_THAT(move_assigned_schema_store->GetSectionMetadata("TypeA"),
+ EXPECT_THAT(move_assigned_schema_store->GetSectionMetadata("type_a"),
IsOkAndHolds(Pointee(ElementsAre(expected_metadata))));
}
@@ -199,7 +208,9 @@ TEST_F(SchemaStoreTest, CorruptSchemaError) {
SchemaStore::SetSchemaResult result;
result.success = true;
result.schema_types_new_by_name.insert(schema_.types(0).schema_type());
- EXPECT_THAT(schema_store->SetSchema(schema_),
+ EXPECT_THAT(schema_store->SetSchema(
+ schema_, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(result)));
ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
schema_store->GetSchema());
@@ -237,7 +248,9 @@ TEST_F(SchemaStoreTest, RecoverCorruptDerivedFileOk) {
SchemaStore::SetSchemaResult result;
result.success = true;
result.schema_types_new_by_name.insert(schema_.types(0).schema_type());
- EXPECT_THAT(schema_store->SetSchema(schema_),
+ EXPECT_THAT(schema_store->SetSchema(
+ schema_, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(result)));
ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
schema_store->GetSchema());
@@ -254,9 +267,56 @@ TEST_F(SchemaStoreTest, RecoverCorruptDerivedFileOk) {
absl_ports::StrCat(schema_store_dir_, "/schema_type_mapper");
filesystem_.DeleteDirectoryRecursively(schema_type_mapper_dir.c_str());
+ InitializeStatsProto initialize_stats;
+ fake_clock_.SetTimerElapsedMilliseconds(123);
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_,
+ &initialize_stats));
+ EXPECT_THAT(initialize_stats.schema_store_recovery_cause(),
+ Eq(InitializeStatsProto::IO_ERROR));
+ EXPECT_THAT(initialize_stats.schema_store_recovery_latency_ms(), Eq(123));
+
+ // Everything looks fine, ground truth and derived data
+ ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
+ schema_store->GetSchema());
+ EXPECT_THAT(*actual_schema, EqualsProto(schema_));
+ EXPECT_THAT(schema_store->GetSchemaTypeId("email"), IsOkAndHolds(0));
+}
+
+TEST_F(SchemaStoreTest, RecoverDiscardDerivedFilesOk) {
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+
+ // Set it for the first time
+ SchemaStore::SetSchemaResult result;
+ result.success = true;
+ result.schema_types_new_by_name.insert(schema_.types(0).schema_type());
+ EXPECT_THAT(schema_store->SetSchema(
+ schema_, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOkAndHolds(EqualsSetSchemaResult(result)));
+ ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
+ schema_store->GetSchema());
+ EXPECT_THAT(*actual_schema, EqualsProto(schema_));
+
+ EXPECT_THAT(schema_store->GetSchemaTypeId("email"), IsOkAndHolds(0));
+ }
+
+ ICING_ASSERT_OK(
+ SchemaStore::DiscardDerivedFiles(&filesystem_, schema_store_dir_));
+
+ InitializeStatsProto initialize_stats;
+ fake_clock_.SetTimerElapsedMilliseconds(123);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_,
+ &initialize_stats));
+ EXPECT_THAT(initialize_stats.schema_store_recovery_cause(),
+ Eq(InitializeStatsProto::IO_ERROR));
+ EXPECT_THAT(initialize_stats.schema_store_recovery_latency_ms(), Eq(123));
// Everything looks fine, ground truth and derived data
ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
@@ -275,7 +335,9 @@ TEST_F(SchemaStoreTest, RecoverBadChecksumOk) {
SchemaStore::SetSchemaResult result;
result.success = true;
result.schema_types_new_by_name.insert(schema_.types(0).schema_type());
- EXPECT_THAT(schema_store->SetSchema(schema_),
+ EXPECT_THAT(schema_store->SetSchema(
+ schema_, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(result)));
ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
schema_store->GetSchema());
@@ -289,7 +351,7 @@ TEST_F(SchemaStoreTest, RecoverBadChecksumOk) {
// of derived files from ground truth.
const std::string header_file =
absl_ports::StrCat(schema_store_dir_, "/schema_store_header");
- SchemaStore::Header header;
+ SchemaStore::LegacyHeader header;
header.magic = SchemaStore::Header::kMagic;
header.checksum = 10; // Arbitrary garbage checksum
filesystem_.DeleteFile(header_file.c_str());
@@ -348,7 +410,9 @@ TEST_F(SchemaStoreTest, CreateWithPreviousSchemaOk) {
SchemaStore::SetSchemaResult result;
result.success = true;
result.schema_types_new_by_name.insert(schema_.types(0).schema_type());
- EXPECT_THAT(schema_store->SetSchema(schema_),
+ EXPECT_THAT(schema_store->SetSchema(
+ schema_, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(result)));
schema_store.reset();
@@ -374,7 +438,9 @@ TEST_F(SchemaStoreTest, MultipleCreateOk) {
SchemaStore::SetSchemaResult result;
result.success = true;
result.schema_types_new_by_name.insert(schema_.types(0).schema_type());
- EXPECT_THAT(schema_store->SetSchema(schema_),
+ EXPECT_THAT(schema_store->SetSchema(
+ schema_, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(result)));
// Verify that our in-memory structures are ok
@@ -419,7 +485,9 @@ TEST_F(SchemaStoreTest, SetNewSchemaOk) {
SchemaStore::SetSchemaResult result;
result.success = true;
result.schema_types_new_by_name.insert(schema_.types(0).schema_type());
- EXPECT_THAT(schema_store->SetSchema(schema_),
+ EXPECT_THAT(schema_store->SetSchema(
+ schema_, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(result)));
ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
schema_store->GetSchema());
@@ -435,7 +503,9 @@ TEST_F(SchemaStoreTest, SetSameSchemaOk) {
SchemaStore::SetSchemaResult result;
result.success = true;
result.schema_types_new_by_name.insert(schema_.types(0).schema_type());
- EXPECT_THAT(schema_store->SetSchema(schema_),
+ EXPECT_THAT(schema_store->SetSchema(
+ schema_, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(result)));
ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
schema_store->GetSchema());
@@ -444,7 +514,9 @@ TEST_F(SchemaStoreTest, SetSameSchemaOk) {
// And one more for fun
result = SchemaStore::SetSchemaResult();
result.success = true;
- EXPECT_THAT(schema_store->SetSchema(schema_),
+ EXPECT_THAT(schema_store->SetSchema(
+ schema_, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(result)));
ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema());
EXPECT_THAT(*actual_schema, EqualsProto(schema_));
@@ -459,7 +531,9 @@ TEST_F(SchemaStoreTest, SetIncompatibleSchemaOk) {
SchemaStore::SetSchemaResult result;
result.success = true;
result.schema_types_new_by_name.insert(schema_.types(0).schema_type());
- EXPECT_THAT(schema_store->SetSchema(schema_),
+ EXPECT_THAT(schema_store->SetSchema(
+ schema_, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(result)));
ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
schema_store->GetSchema());
@@ -473,7 +547,9 @@ TEST_F(SchemaStoreTest, SetIncompatibleSchemaOk) {
result.success = false;
result.schema_types_deleted_by_name.emplace("email");
result.schema_types_deleted_by_id.emplace(0);
- EXPECT_THAT(schema_store->SetSchema(schema_),
+ EXPECT_THAT(schema_store->SetSchema(
+ schema_, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(result)));
}
@@ -490,7 +566,9 @@ TEST_F(SchemaStoreTest, SetSchemaWithAddedTypeOk) {
SchemaStore::SetSchemaResult result;
result.success = true;
result.schema_types_new_by_name.insert("email");
- EXPECT_THAT(schema_store->SetSchema(schema),
+ EXPECT_THAT(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(result)));
ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
schema_store->GetSchema());
@@ -505,7 +583,9 @@ TEST_F(SchemaStoreTest, SetSchemaWithAddedTypeOk) {
result = SchemaStore::SetSchemaResult();
result.success = true;
result.schema_types_new_by_name.insert("new_type");
- EXPECT_THAT(schema_store->SetSchema(schema),
+ EXPECT_THAT(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(result)));
ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema());
EXPECT_THAT(*actual_schema, EqualsProto(schema));
@@ -527,7 +607,9 @@ TEST_F(SchemaStoreTest, SetSchemaWithDeletedTypeOk) {
result.success = true;
result.schema_types_new_by_name.insert("email");
result.schema_types_new_by_name.insert("message");
- EXPECT_THAT(schema_store->SetSchema(schema),
+ EXPECT_THAT(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(result)));
ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
schema_store->GetSchema());
@@ -552,7 +634,9 @@ TEST_F(SchemaStoreTest, SetSchemaWithDeletedTypeOk) {
old_email_schema_type_id);
// Can't set the incompatible schema
- EXPECT_THAT(schema_store->SetSchema(schema),
+ EXPECT_THAT(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(incompatible_result)));
SchemaStore::SetSchemaResult force_result;
@@ -563,7 +647,8 @@ TEST_F(SchemaStoreTest, SetSchemaWithDeletedTypeOk) {
// Force set the incompatible schema
EXPECT_THAT(schema_store->SetSchema(
- schema, /*ignore_errors_and_delete_documents=*/true),
+ schema, /*ignore_errors_and_delete_documents=*/true,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(force_result)));
ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema());
EXPECT_THAT(*actual_schema, EqualsProto(schema));
@@ -585,7 +670,9 @@ TEST_F(SchemaStoreTest, SetSchemaWithReorderedTypesOk) {
result.success = true;
result.schema_types_new_by_name.insert("email");
result.schema_types_new_by_name.insert("message");
- EXPECT_THAT(schema_store->SetSchema(schema),
+ EXPECT_THAT(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(result)));
ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
schema_store->GetSchema());
@@ -606,7 +693,9 @@ TEST_F(SchemaStoreTest, SetSchemaWithReorderedTypesOk) {
1); // Old SchemaTypeId of "message"
// Set the compatible schema
- EXPECT_THAT(schema_store->SetSchema(schema),
+ EXPECT_THAT(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(result)));
ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema());
EXPECT_THAT(*actual_schema, EqualsProto(schema));
@@ -631,7 +720,9 @@ TEST_F(SchemaStoreTest, IndexedPropertyChangeRequiresReindexingOk) {
SchemaStore::SetSchemaResult result;
result.success = true;
result.schema_types_new_by_name.insert("email");
- EXPECT_THAT(schema_store->SetSchema(schema),
+ EXPECT_THAT(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(result)));
ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
schema_store->GetSchema());
@@ -650,7 +741,9 @@ TEST_F(SchemaStoreTest, IndexedPropertyChangeRequiresReindexingOk) {
result = SchemaStore::SetSchemaResult();
result.success = true;
result.schema_types_index_incompatible_by_name.insert("email");
- EXPECT_THAT(schema_store->SetSchema(schema),
+ EXPECT_THAT(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(result)));
ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema());
EXPECT_THAT(*actual_schema, EqualsProto(schema));
@@ -698,8 +791,11 @@ TEST_F(SchemaStoreTest, IndexNestedDocumentsChangeRequiresReindexingOk) {
result.success = true;
result.schema_types_new_by_name.insert("email");
result.schema_types_new_by_name.insert("person");
- EXPECT_THAT(schema_store->SetSchema(no_nested_index_schema),
- IsOkAndHolds(EqualsSetSchemaResult(result)));
+ EXPECT_THAT(
+ schema_store->SetSchema(no_nested_index_schema,
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOkAndHolds(EqualsSetSchemaResult(result)));
ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
schema_store->GetSchema());
EXPECT_THAT(*actual_schema, EqualsProto(no_nested_index_schema));
@@ -709,8 +805,11 @@ TEST_F(SchemaStoreTest, IndexNestedDocumentsChangeRequiresReindexingOk) {
result = SchemaStore::SetSchemaResult();
result.success = true;
result.schema_types_index_incompatible_by_name.insert("person");
- EXPECT_THAT(schema_store->SetSchema(nested_index_schema),
- IsOkAndHolds(EqualsSetSchemaResult(result)));
+ EXPECT_THAT(
+ schema_store->SetSchema(nested_index_schema,
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOkAndHolds(EqualsSetSchemaResult(result)));
ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema());
EXPECT_THAT(*actual_schema, EqualsProto(nested_index_schema));
@@ -719,8 +818,11 @@ TEST_F(SchemaStoreTest, IndexNestedDocumentsChangeRequiresReindexingOk) {
result = SchemaStore::SetSchemaResult();
result.success = true;
result.schema_types_index_incompatible_by_name.insert("person");
- EXPECT_THAT(schema_store->SetSchema(no_nested_index_schema),
- IsOkAndHolds(EqualsSetSchemaResult(result)));
+ EXPECT_THAT(
+ schema_store->SetSchema(no_nested_index_schema,
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOkAndHolds(EqualsSetSchemaResult(result)));
ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema());
EXPECT_THAT(*actual_schema, EqualsProto(no_nested_index_schema));
}
@@ -744,7 +846,9 @@ TEST_F(SchemaStoreTest, SetSchemaWithIncompatibleTypesOk) {
SchemaStore::SetSchemaResult result;
result.success = true;
result.schema_types_new_by_name.insert("email");
- EXPECT_THAT(schema_store->SetSchema(schema),
+ EXPECT_THAT(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(result)));
ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
schema_store->GetSchema());
@@ -770,7 +874,9 @@ TEST_F(SchemaStoreTest, SetSchemaWithIncompatibleTypesOk) {
old_email_schema_type_id);
// Can't set the incompatible schema
- EXPECT_THAT(schema_store->SetSchema(schema),
+ EXPECT_THAT(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(incompatible_result)));
SchemaStore::SetSchemaResult force_result;
@@ -781,7 +887,8 @@ TEST_F(SchemaStoreTest, SetSchemaWithIncompatibleTypesOk) {
// Force set the incompatible schema
EXPECT_THAT(schema_store->SetSchema(
- schema, /*ignore_errors_and_delete_documents=*/true),
+ schema, /*ignore_errors_and_delete_documents=*/true,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(force_result)));
ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema());
EXPECT_THAT(*actual_schema, EqualsProto(schema));
@@ -803,7 +910,9 @@ TEST_F(SchemaStoreTest, SetSchemaWithIncompatibleNestedTypesOk) {
.SetCardinality(CARDINALITY_REPEATED));
SchemaProto old_schema =
SchemaBuilder().AddType(contact_point_repeated_label).Build();
- ICING_EXPECT_OK(schema_store->SetSchema(old_schema));
+ ICING_EXPECT_OK(schema_store->SetSchema(
+ old_schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId old_contact_point_type_id,
schema_store->GetSchemaTypeId("ContactPoint"));
@@ -839,7 +948,8 @@ TEST_F(SchemaStoreTest, SetSchemaWithIncompatibleNestedTypesOk) {
expected_result.schema_types_new_by_name.insert("Person");
EXPECT_THAT(
schema_store->SetSchema(new_schema,
- /*ignore_errors_and_delete_documents=*/false),
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(expected_result)));
ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
schema_store->GetSchema());
@@ -850,7 +960,8 @@ TEST_F(SchemaStoreTest, SetSchemaWithIncompatibleNestedTypesOk) {
expected_result.success = true;
EXPECT_THAT(
schema_store->SetSchema(new_schema,
- /*ignore_errors_and_delete_documents=*/true),
+ /*ignore_errors_and_delete_documents=*/true,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(expected_result)));
ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema());
EXPECT_THAT(*actual_schema, EqualsProto(new_schema));
@@ -873,7 +984,9 @@ TEST_F(SchemaStoreTest, SetSchemaWithIndexIncompatibleNestedTypesOk) {
.SetCardinality(CARDINALITY_REPEATED));
SchemaProto old_schema =
SchemaBuilder().AddType(contact_point_prefix_label).Build();
- ICING_EXPECT_OK(schema_store->SetSchema(old_schema));
+ ICING_EXPECT_OK(schema_store->SetSchema(
+ old_schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
// 2. Create a type that references the ContactPoint type and make a index
// backwards incompatible change to ContactPoint
@@ -905,7 +1018,8 @@ TEST_F(SchemaStoreTest, SetSchemaWithIndexIncompatibleNestedTypesOk) {
expected_result.schema_types_new_by_name.insert("Person");
EXPECT_THAT(
schema_store->SetSchema(new_schema,
- /*ignore_errors_and_delete_documents=*/false),
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(expected_result)));
ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
schema_store->GetSchema());
@@ -928,7 +1042,9 @@ TEST_F(SchemaStoreTest, SetSchemaWithCompatibleNestedTypesOk) {
.SetCardinality(CARDINALITY_OPTIONAL));
SchemaProto old_schema =
SchemaBuilder().AddType(contact_point_optional_label).Build();
- ICING_EXPECT_OK(schema_store->SetSchema(old_schema));
+ ICING_EXPECT_OK(schema_store->SetSchema(
+ old_schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
// 2. Create a type that references the ContactPoint type and make a backwards
// compatible change to ContactPoint
@@ -960,7 +1076,8 @@ TEST_F(SchemaStoreTest, SetSchemaWithCompatibleNestedTypesOk) {
"ContactPoint");
expected_result.schema_types_new_by_name.insert("Person");
EXPECT_THAT(schema_store->SetSchema(
- new_schema, /*ignore_errors_and_delete_documents=*/false),
+ new_schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(expected_result)));
ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
schema_store->GetSchema());
@@ -988,7 +1105,9 @@ TEST_F(SchemaStoreTest, GetSchemaTypeId) {
result.success = true;
result.schema_types_new_by_name.insert(first_type);
result.schema_types_new_by_name.insert(second_type);
- EXPECT_THAT(schema_store->SetSchema(schema_),
+ EXPECT_THAT(schema_store->SetSchema(
+ schema_, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(result)));
EXPECT_THAT(schema_store->GetSchemaTypeId(first_type), IsOkAndHolds(0));
@@ -1012,7 +1131,9 @@ TEST_F(SchemaStoreTest, ComputeChecksumSameBetweenCalls) {
SchemaProto foo_schema =
SchemaBuilder().AddType(SchemaTypeConfigBuilder().SetType("foo")).Build();
- ICING_EXPECT_OK(schema_store->SetSchema(foo_schema));
+ ICING_EXPECT_OK(schema_store->SetSchema(
+ foo_schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(Crc32 checksum, schema_store->ComputeChecksum());
@@ -1028,7 +1149,9 @@ TEST_F(SchemaStoreTest, ComputeChecksumSameAcrossInstances) {
SchemaProto foo_schema =
SchemaBuilder().AddType(SchemaTypeConfigBuilder().SetType("foo")).Build();
- ICING_EXPECT_OK(schema_store->SetSchema(foo_schema));
+ ICING_EXPECT_OK(schema_store->SetSchema(
+ foo_schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(Crc32 checksum, schema_store->ComputeChecksum());
@@ -1049,7 +1172,9 @@ TEST_F(SchemaStoreTest, ComputeChecksumChangesOnModification) {
SchemaProto foo_schema =
SchemaBuilder().AddType(SchemaTypeConfigBuilder().SetType("foo")).Build();
- ICING_EXPECT_OK(schema_store->SetSchema(foo_schema));
+ ICING_EXPECT_OK(schema_store->SetSchema(
+ foo_schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(Crc32 checksum, schema_store->ComputeChecksum());
@@ -1060,7 +1185,9 @@ TEST_F(SchemaStoreTest, ComputeChecksumChangesOnModification) {
.AddType(SchemaTypeConfigBuilder().SetType("bar"))
.Build();
- ICING_EXPECT_OK(schema_store->SetSchema(foo_bar_schema));
+ ICING_EXPECT_OK(schema_store->SetSchema(
+ foo_bar_schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
EXPECT_THAT(schema_store->ComputeChecksum(), IsOkAndHolds(Not(Eq(checksum))));
}
@@ -1082,7 +1209,9 @@ TEST_F(SchemaStoreTest, PersistToDiskPreservesAcrossInstances) {
SchemaProto schema =
SchemaBuilder().AddType(SchemaTypeConfigBuilder().SetType("foo")).Build();
- ICING_EXPECT_OK(schema_store->SetSchema(schema));
+ ICING_EXPECT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
// Persisting shouldn't change anything
ICING_EXPECT_OK(schema_store->PersistToDisk());
@@ -1095,7 +1224,9 @@ TEST_F(SchemaStoreTest, PersistToDiskPreservesAcrossInstances) {
schema = SchemaBuilder(schema)
.AddType(SchemaTypeConfigBuilder().SetType("bar"))
.Build();
- ICING_EXPECT_OK(schema_store->SetSchema(schema));
+ ICING_EXPECT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
// Should also persist on destruction
schema_store.reset();
@@ -1138,7 +1269,9 @@ TEST_F(SchemaStoreTest, SchemaStoreStorageInfoProto) {
result.success = true;
result.schema_types_new_by_name.insert("email");
result.schema_types_new_by_name.insert("fullSectionsType");
- EXPECT_THAT(schema_store->SetSchema(schema),
+ EXPECT_THAT(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(result)));
SchemaStoreStorageInfoProto storage_info = schema_store->GetStorageInfo();
@@ -1155,7 +1288,9 @@ TEST_F(SchemaStoreTest, GetDebugInfo) {
// Set schema
ASSERT_THAT(
- schema_store->SetSchema(schema_),
+ schema_store->SetSchema(schema_,
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
IsOkAndHolds(EqualsSetSchemaResult(SchemaStore::SetSchemaResult{
.success = true,
.schema_types_new_by_name = {schema_.types(0).schema_type()}})));
@@ -1191,7 +1326,9 @@ TEST_F(SchemaStoreTest, InitializeRegenerateDerivedFilesFailure) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("Type"))
.Build();
- ICING_ASSERT_OK(schema_store->SetSchema(std::move(schema)));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ std::move(schema), /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
}
auto mock_filesystem = std::make_unique<MockFilesystem>();
@@ -1226,7 +1363,9 @@ TEST_F(SchemaStoreTest, SetSchemaRegenerateDerivedFilesFailure) {
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
SchemaProto schema = SchemaBuilder().AddType(type).Build();
- ICING_ASSERT_OK(schema_store->SetSchema(std::move(schema)));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ std::move(schema), /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
}
{
@@ -1244,8 +1383,11 @@ TEST_F(SchemaStoreTest, SetSchemaRegenerateDerivedFilesFailure) {
.AddType(type)
.AddType(SchemaTypeConfigBuilder().SetType("Type2"))
.Build();
- EXPECT_THAT(schema_store->SetSchema(std::move(schema)),
- StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+ EXPECT_THAT(
+ schema_store->SetSchema(std::move(schema),
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ StatusIs(libtextclassifier3::StatusCode::INTERNAL));
DocumentProto document =
DocumentBuilder()
.SetSchema("Type")
@@ -1273,6 +1415,1648 @@ TEST_F(SchemaStoreTest, SetSchemaRegenerateDerivedFilesFailure) {
}
}
+TEST_F(SchemaStoreTest, CanCheckForPropertiesDefinedInSchema) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+
+ // Set it for the first time
+ SchemaStore::SetSchemaResult result;
+ result.success = true;
+ result.schema_types_new_by_name.insert(schema_.types(0).schema_type());
+
+ // Don't use schema_ defined in the test suite, as we want to make sure that
+ // the test is written correctly without referring to what the suite has
+ // defined.
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("email")
+ .AddProperty(
+ // Add an indexed property so we generate
+ // section metadata on it
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("timestamp")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ EXPECT_THAT(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOkAndHolds(EqualsSetSchemaResult(result)));
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId schema_id,
+ schema_store->GetSchemaTypeId("email"));
+ EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(schema_id, "subject"));
+ EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(schema_id, "timestamp"));
+ EXPECT_FALSE(schema_store->IsPropertyDefinedInSchema(schema_id, "foobar"));
+}
+
+TEST_F(SchemaStoreTest, GetSchemaTypeIdsWithChildren) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+
+ // Create a schema with the following inheritance relation:
+ // A
+ // / \
+ // B E
+ // / \
+ // C D
+ // |
+ // F
+ SchemaTypeConfigProto type_a = SchemaTypeConfigBuilder().SetType("A").Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder().SetType("B").AddParentType("A").Build();
+ SchemaTypeConfigProto type_c =
+ SchemaTypeConfigBuilder().SetType("C").AddParentType("B").Build();
+ SchemaTypeConfigProto type_d =
+ SchemaTypeConfigBuilder().SetType("D").AddParentType("B").Build();
+ SchemaTypeConfigProto type_e =
+ SchemaTypeConfigBuilder().SetType("E").AddParentType("A").Build();
+ SchemaTypeConfigProto type_f =
+ SchemaTypeConfigBuilder().SetType("F").AddParentType("D").Build();
+ SchemaProto schema = SchemaBuilder()
+ .AddType(type_a)
+ .AddType(type_b)
+ .AddType(type_c)
+ .AddType(type_d)
+ .AddType(type_e)
+ .AddType(type_f)
+ .Build();
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ // Get schema type id for each type.
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId type_a_id,
+ schema_store->GetSchemaTypeId("A"));
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId type_b_id,
+ schema_store->GetSchemaTypeId("B"));
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId type_c_id,
+ schema_store->GetSchemaTypeId("C"));
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId type_d_id,
+ schema_store->GetSchemaTypeId("D"));
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId type_e_id,
+ schema_store->GetSchemaTypeId("E"));
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId type_f_id,
+ schema_store->GetSchemaTypeId("F"));
+
+ // Check the results from GetSchemaTypeIdsWithChildren
+ EXPECT_THAT(
+ schema_store->GetSchemaTypeIdsWithChildren("A"),
+ IsOkAndHolds(Pointee(UnorderedElementsAre(
+ type_a_id, type_b_id, type_c_id, type_d_id, type_e_id, type_f_id))));
+ EXPECT_THAT(schema_store->GetSchemaTypeIdsWithChildren("B"),
+ IsOkAndHolds(Pointee(UnorderedElementsAre(
+ type_b_id, type_c_id, type_d_id, type_f_id))));
+ EXPECT_THAT(schema_store->GetSchemaTypeIdsWithChildren("C"),
+ IsOkAndHolds(Pointee(UnorderedElementsAre(type_c_id))));
+ EXPECT_THAT(
+ schema_store->GetSchemaTypeIdsWithChildren("D"),
+ IsOkAndHolds(Pointee(UnorderedElementsAre(type_d_id, type_f_id))));
+ EXPECT_THAT(schema_store->GetSchemaTypeIdsWithChildren("E"),
+ IsOkAndHolds(Pointee(UnorderedElementsAre(type_e_id))));
+ EXPECT_THAT(schema_store->GetSchemaTypeIdsWithChildren("F"),
+ IsOkAndHolds(Pointee(UnorderedElementsAre(type_f_id))));
+}
+
+TEST_F(SchemaStoreTest, DiamondGetSchemaTypeIdsWithChildren) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+
+ // Create a schema with the following inheritance relation:
+ // A
+ // / \
+ // B E
+ // / \ /
+ // C D
+ // \ /
+ // F
+ SchemaTypeConfigProto type_a = SchemaTypeConfigBuilder().SetType("A").Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder().SetType("B").AddParentType("A").Build();
+ SchemaTypeConfigProto type_c =
+ SchemaTypeConfigBuilder().SetType("C").AddParentType("B").Build();
+ SchemaTypeConfigProto type_d = SchemaTypeConfigBuilder()
+ .SetType("D")
+ .AddParentType("B")
+ .AddParentType("E")
+ .Build();
+ SchemaTypeConfigProto type_e =
+ SchemaTypeConfigBuilder().SetType("E").AddParentType("A").Build();
+ SchemaTypeConfigProto type_f = SchemaTypeConfigBuilder()
+ .SetType("F")
+ .AddParentType("C")
+ .AddParentType("D")
+ .Build();
+ SchemaProto schema = SchemaBuilder()
+ .AddType(type_a)
+ .AddType(type_b)
+ .AddType(type_c)
+ .AddType(type_d)
+ .AddType(type_e)
+ .AddType(type_f)
+ .Build();
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ // Get schema type id for each type.
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId type_a_id,
+ schema_store->GetSchemaTypeId("A"));
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId type_b_id,
+ schema_store->GetSchemaTypeId("B"));
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId type_c_id,
+ schema_store->GetSchemaTypeId("C"));
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId type_d_id,
+ schema_store->GetSchemaTypeId("D"));
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId type_e_id,
+ schema_store->GetSchemaTypeId("E"));
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId type_f_id,
+ schema_store->GetSchemaTypeId("F"));
+
+ // Check the results from GetSchemaTypeIdsWithChildren
+ EXPECT_THAT(
+ schema_store->GetSchemaTypeIdsWithChildren("A"),
+ IsOkAndHolds(Pointee(UnorderedElementsAre(
+ type_a_id, type_b_id, type_c_id, type_d_id, type_e_id, type_f_id))));
+ EXPECT_THAT(schema_store->GetSchemaTypeIdsWithChildren("B"),
+ IsOkAndHolds(Pointee(UnorderedElementsAre(
+ type_b_id, type_c_id, type_d_id, type_f_id))));
+ EXPECT_THAT(
+ schema_store->GetSchemaTypeIdsWithChildren("C"),
+ IsOkAndHolds(Pointee(UnorderedElementsAre(type_c_id, type_f_id))));
+ EXPECT_THAT(
+ schema_store->GetSchemaTypeIdsWithChildren("D"),
+ IsOkAndHolds(Pointee(UnorderedElementsAre(type_d_id, type_f_id))));
+ EXPECT_THAT(schema_store->GetSchemaTypeIdsWithChildren("E"),
+ IsOkAndHolds(Pointee(
+ UnorderedElementsAre(type_e_id, type_d_id, type_f_id))));
+ EXPECT_THAT(schema_store->GetSchemaTypeIdsWithChildren("F"),
+ IsOkAndHolds(Pointee(UnorderedElementsAre(type_f_id))));
+}
+
+TEST_F(SchemaStoreTest, IndexableFieldsAreDefined) {
+ SchemaTypeConfigProto email_type =
+ SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("recipients")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REPEATED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("recipientIds")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REPEATED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("timestamp")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .Build();
+
+ SchemaProto schema = SchemaBuilder().AddType(email_type).Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/true));
+ constexpr SchemaTypeId kTypeEmailSchemaId = 0;
+
+ // Indexables.
+ EXPECT_TRUE(
+ schema_store->IsPropertyDefinedInSchema(kTypeEmailSchemaId, "subject"));
+ EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeEmailSchemaId,
+ "senderQualifiedId"));
+ EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeEmailSchemaId,
+ "recipients"));
+ EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeEmailSchemaId,
+ "recipientIds"));
+ EXPECT_TRUE(
+ schema_store->IsPropertyDefinedInSchema(kTypeEmailSchemaId, "timestamp"));
+}
+
+TEST_F(SchemaStoreTest, JoinableFieldsAreDefined) {
+ SchemaTypeConfigProto email_type =
+ SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("tagQualifiedId")
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .Build();
+
+ SchemaProto schema = SchemaBuilder().AddType(email_type).Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/true));
+ constexpr SchemaTypeId kTypeEmailSchemaId = 0;
+
+ // Joinables.
+ EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeEmailSchemaId,
+ "tagQualifiedId"));
+ EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeEmailSchemaId,
+ "senderQualifiedId"));
+}
+
+TEST_F(SchemaStoreTest, NonIndexableFieldsAreDefined) {
+ SchemaTypeConfigProto email_type =
+ SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("text")
+ .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("attachment")
+ .SetDataType(TYPE_BYTES)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("nonindexableInteger")
+ .SetDataType(TYPE_INT64)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .Build();
+
+ SchemaProto schema = SchemaBuilder().AddType(email_type).Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/true));
+ constexpr SchemaTypeId kTypeEmailSchemaId = 0;
+
+ // Non-indexables.
+ EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeEmailSchemaId,
+ "attachment"));
+ EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeEmailSchemaId,
+ "nonindexableInteger"));
+ EXPECT_TRUE(
+ schema_store->IsPropertyDefinedInSchema(kTypeEmailSchemaId, "text"));
+}
+
+TEST_F(SchemaStoreTest, NonExistentFieldsAreUndefined) {
+ SchemaTypeConfigProto email_type =
+ SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("senderQualifiedId")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("timestamp")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("nonindexableInteger")
+ .SetDataType(TYPE_INT64)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .Build();
+
+ SchemaProto schema = SchemaBuilder().AddType(email_type).Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/true));
+ constexpr SchemaTypeId kTypeEmailSchemaId = 0;
+
+ // Non-existents.
+ EXPECT_FALSE(
+ schema_store->IsPropertyDefinedInSchema(kTypeEmailSchemaId, "foobar"));
+ EXPECT_FALSE(schema_store->IsPropertyDefinedInSchema(kTypeEmailSchemaId,
+ "timestamp.foo"));
+ EXPECT_FALSE(
+ schema_store->IsPropertyDefinedInSchema(kTypeEmailSchemaId, "time"));
+}
+
+TEST_F(SchemaStoreTest, NestedIndexableFieldsAreDefined) {
+ SchemaTypeConfigProto email_type =
+ SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("tagQualifiedId")
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("text")
+ .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("timestamp")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .Build();
+
+ SchemaTypeConfigProto conversation_type =
+ SchemaTypeConfigBuilder()
+ .SetType("Conversation")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emails")
+ .SetDataTypeDocument(
+ "Email", /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("nestedNonIndexable")
+ .SetDataTypeDocument("Email",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ SchemaProto schema =
+ SchemaBuilder().AddType(email_type).AddType(conversation_type).Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/true));
+ constexpr SchemaTypeId kTypeConversationSchemaId = 1;
+
+ // Indexables.
+ EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeConversationSchemaId,
+ "emails.subject"));
+ EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeConversationSchemaId,
+ "emails.timestamp"));
+}
+
+TEST_F(SchemaStoreTest, NestedJoinableFieldsAreDefined) {
+ SchemaTypeConfigProto email_type =
+ SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("tagQualifiedId")
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("text")
+ .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("timestamp")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .Build();
+
+ SchemaTypeConfigProto conversation_type =
+ SchemaTypeConfigBuilder()
+ .SetType("Conversation")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emails")
+ .SetDataTypeDocument(
+ "Email", /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("nestedNonIndexable")
+ .SetDataTypeDocument("Email",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ SchemaProto schema =
+ SchemaBuilder().AddType(email_type).AddType(conversation_type).Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/true));
+ constexpr SchemaTypeId kTypeConversationSchemaId = 1;
+
+ // Joinables.
+ EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeConversationSchemaId,
+ "emails.tagQualifiedId"));
+ EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(
+ kTypeConversationSchemaId, "nestedNonIndexable.tagQualifiedId"));
+}
+
+TEST_F(SchemaStoreTest, NestedNonIndexableFieldsAreDefined) {
+ SchemaTypeConfigProto email_type =
+ SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("tagQualifiedId")
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("text")
+ .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("timestamp")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .Build();
+
+ SchemaTypeConfigProto conversation_type =
+ SchemaTypeConfigBuilder()
+ .SetType("Conversation")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emails")
+ .SetDataTypeDocument(
+ "Email", /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("nestedNonIndexable")
+ .SetDataTypeDocument("Email",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ SchemaProto schema =
+ SchemaBuilder().AddType(email_type).AddType(conversation_type).Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/true));
+ constexpr SchemaTypeId kTypeConversationSchemaId = 1;
+
+ // Non-indexables.
+ EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeConversationSchemaId,
+ "emails.text"));
+ EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(
+ kTypeConversationSchemaId, "nestedNonIndexable.subject"));
+ EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(
+ kTypeConversationSchemaId, "nestedNonIndexable.text"));
+ EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(
+ kTypeConversationSchemaId, "nestedNonIndexable.timestamp"));
+}
+
+TEST_F(SchemaStoreTest, NestedNonExistentFieldsAreUndefined) {
+ SchemaTypeConfigProto email_type =
+ SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("tagQualifiedId")
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("text")
+ .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("timestamp")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .Build();
+
+ SchemaTypeConfigProto conversation_type =
+ SchemaTypeConfigBuilder()
+ .SetType("Conversation")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emails")
+ .SetDataTypeDocument(
+ "Email", /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("nestedNonIndexable")
+ .SetDataTypeDocument("Email",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ SchemaProto schema =
+ SchemaBuilder().AddType(email_type).AddType(conversation_type).Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/true));
+ constexpr SchemaTypeId kTypeConversationSchemaId = 1;
+
+ // Non-existents.
+ EXPECT_FALSE(schema_store->IsPropertyDefinedInSchema(
+ kTypeConversationSchemaId, "emails.foobar"));
+ EXPECT_FALSE(schema_store->IsPropertyDefinedInSchema(
+ kTypeConversationSchemaId, "nestedNonIndexable.foobar"));
+ EXPECT_FALSE(schema_store->IsPropertyDefinedInSchema(
+ kTypeConversationSchemaId, "emails.timestamp.foo"));
+ EXPECT_FALSE(schema_store->IsPropertyDefinedInSchema(
+ kTypeConversationSchemaId, "emails.time"));
+}
+
+TEST_F(SchemaStoreTest, IntermediateDocumentPropertiesAreDefined) {
+ SchemaTypeConfigProto email_type =
+ SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("tagQualifiedId")
+ .SetDataType(TYPE_STRING)
+ .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID,
+ /*propagate_delete=*/true)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("text")
+ .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("timestamp")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .Build();
+
+ SchemaTypeConfigProto conversation_type =
+ SchemaTypeConfigBuilder()
+ .SetType("Conversation")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emails")
+ .SetDataTypeDocument(
+ "Email", /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("nestedNonIndexable")
+ .SetDataTypeDocument("Email",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ SchemaProto schema =
+ SchemaBuilder().AddType(email_type).AddType(conversation_type).Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/true));
+ constexpr SchemaTypeId kTypeConversationSchemaId = 1;
+
+ // Intermediate documents props.
+ EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeConversationSchemaId,
+ "emails"));
+ EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeConversationSchemaId,
+ "nestedNonIndexable"));
+}
+
+TEST_F(SchemaStoreTest, CyclePathsAreDefined) {
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetDataTypeDocument("B", /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetDataTypeDocument("A", /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/true));
+ constexpr SchemaTypeId kTypeASchemaId = 0;
+ constexpr SchemaTypeId kTypeBSchemaId = 1;
+
+ // A's top-level properties
+ EXPECT_TRUE(
+ schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "subject"));
+ EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "b"));
+
+ // A's nested properties in B
+ EXPECT_TRUE(
+ schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "b.body"));
+ EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "b.a"));
+
+ // A's nested properties in B's nested property in A
+ EXPECT_TRUE(
+ schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "b.a.subject"));
+ EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "b.a.b"));
+
+ // B's top-level properties
+ EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "body"));
+ EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "a"));
+
+ // B's nested properties in A
+ EXPECT_TRUE(
+ schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "a.subject"));
+ EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "a.b"));
+
+ // B's nested properties in A's nested property in B
+ EXPECT_TRUE(
+ schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "a.b.body"));
+ EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "a.b.a"));
+}
+
+TEST_F(SchemaStoreTest, WrongTypeCyclePathsAreUndefined) {
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetDataTypeDocument("B", /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetDataTypeDocument("A", /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/true));
+ constexpr SchemaTypeId kTypeASchemaId = 0;
+ constexpr SchemaTypeId kTypeBSchemaId = 1;
+
+ // The same paths as above, but we check the wrong types instead.
+ // A's top-level properties
+ EXPECT_FALSE(
+ schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "subject"));
+ EXPECT_FALSE(schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "b"));
+
+ // A's nested properties in B
+ EXPECT_FALSE(
+ schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "b.body"));
+ EXPECT_FALSE(schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "b.a"));
+
+ // A's nested properties in B's nested property in A
+ EXPECT_FALSE(
+ schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "b.a.subject"));
+ EXPECT_FALSE(
+ schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "b.a.b"));
+
+ // B's top-level properties
+ EXPECT_FALSE(schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "body"));
+ EXPECT_FALSE(schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "a"));
+
+ // B's nested properties in A
+ EXPECT_FALSE(
+ schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "a.subject"));
+ EXPECT_FALSE(schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "a.b"));
+
+ // B's nested properties in A's nested property in B
+ EXPECT_FALSE(
+ schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "a.b.body"));
+ EXPECT_FALSE(
+ schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "a.b.a"));
+}
+
+TEST_F(SchemaStoreTest, CyclePathsNonexistentPropertiesAreUndefined) {
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetDataTypeDocument("B", /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetDataTypeDocument("A", /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/true));
+ constexpr SchemaTypeId kTypeASchemaId = 0;
+ constexpr SchemaTypeId kTypeBSchemaId = 1;
+
+ // Undefined paths in A
+ EXPECT_FALSE(
+ schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "b.subject"));
+ EXPECT_FALSE(
+ schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "b.a.body"));
+ EXPECT_FALSE(
+ schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "b.a.a"));
+ EXPECT_FALSE(
+ schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "b.a.subject.b"));
+
+ // Undefined paths in B
+ EXPECT_FALSE(
+ schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "a.body"));
+ EXPECT_FALSE(
+ schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "a.b.subject"));
+ EXPECT_FALSE(
+ schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "a.b.b"));
+ EXPECT_FALSE(
+ schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "a.b.body.a"));
+}
+
+TEST_F(SchemaStoreTest, LoadsOverlaySchemaOnInit) {
+ // Create a schema that is rollback incompatible and will trigger us to create
+ // an overlay schema.
+ PropertyConfigBuilder indexed_string_property_builder =
+ PropertyConfigBuilder()
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN);
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("type_a")
+ .AddProperty(indexed_string_property_builder.SetName("prop0"))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("propRfc")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("type_b")
+ .AddProperty(indexed_string_property_builder.SetName("prop0"))
+ .Build();
+ SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build();
+
+ {
+ // Create an instance of the schema store and set the schema.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ EXPECT_THAT(schema_store->GetSchema(),
+ IsOkAndHolds(Pointee(EqualsProto(schema))));
+ }
+
+ {
+ // Create a new of the schema store and check that the same schema is
+ // present.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ EXPECT_THAT(schema_store->GetSchema(),
+ IsOkAndHolds(Pointee(EqualsProto(schema))));
+
+ // The overlay should exist
+ std::string overlay_schema_path = schema_store_dir_ + "/overlay_schema.pb";
+ ASSERT_TRUE(filesystem_.FileExists(overlay_schema_path.c_str()));
+
+ // The base schema should hold a compatible schema
+ SchemaTypeConfigProto modified_type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("type_a")
+ .AddProperty(indexed_string_property_builder.SetName("prop0"))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("propRfc")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataType(TYPE_STRING))
+ .Build();
+ SchemaProto expected_base_schema =
+ SchemaBuilder().AddType(modified_type_a).AddType(type_b).Build();
+ std::string base_schema_path = schema_store_dir_ + "/schema.pb";
+ auto base_schema_file_ = std::make_unique<FileBackedProto<SchemaProto>>(
+ filesystem_, base_schema_path);
+ ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* base_schema,
+ base_schema_file_->Read());
+ EXPECT_THAT(*base_schema, EqualsProto(expected_base_schema));
+ }
+}
+
+TEST_F(SchemaStoreTest, LoadsBaseSchemaWithNoOverlayOnInit) {
+ // Create a normal schema that won't require an overlay.
+ PropertyConfigBuilder indexed_string_property_builder =
+ PropertyConfigBuilder()
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN);
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("type_a")
+ .AddProperty(indexed_string_property_builder.SetName("prop0"))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("propRfc")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("type_b")
+ .AddProperty(indexed_string_property_builder.SetName("prop0"))
+ .Build();
+ SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build();
+
+ {
+ // Create an instance of the schema store and set the schema.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ EXPECT_THAT(schema_store->GetSchema(),
+ IsOkAndHolds(Pointee(EqualsProto(schema))));
+ }
+
+ {
+ // Create a new instance of the schema store and check that the same schema
+ // is present.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ EXPECT_THAT(schema_store->GetSchema(),
+ IsOkAndHolds(Pointee(EqualsProto(schema))));
+
+ // Additionally, the overlay should not exist
+ std::string overlay_schema_path = schema_store_dir_ + "/overlay_schema.pb";
+ ASSERT_FALSE(filesystem_.FileExists(overlay_schema_path.c_str()));
+ }
+}
+
+TEST_F(SchemaStoreTest, LoadSchemaBackupSchemaMissing) {
+ // Create a schema that is rollback incompatible and will trigger us to create
+ // a backup schema.
+ PropertyConfigBuilder indexed_string_property_builder =
+ PropertyConfigBuilder()
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN);
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("type_a")
+ .AddProperty(indexed_string_property_builder.SetName("prop0"))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("propRfc")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("type_b")
+ .AddProperty(indexed_string_property_builder.SetName("prop0"))
+ .Build();
+ SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build();
+
+ {
+ // Create an instance of the schema store and set the schema.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ EXPECT_THAT(schema_store->GetSchema(),
+ IsOkAndHolds(Pointee(EqualsProto(schema))));
+ }
+
+ // Delete the backup schema.
+ std::string backup_schema_path = schema_store_dir_ + "/schema.pb";
+ ASSERT_TRUE(filesystem_.DeleteFile(backup_schema_path.c_str()));
+
+ {
+ // Create a new instance of the schema store and check that it fails because
+ // the backup schema is not available.
+ EXPECT_THAT(
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_),
+ StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+ }
+}
+
+TEST_F(SchemaStoreTest, LoadSchemaOverlaySchemaMissing) {
+ // Create a schema that is rollback incompatible and will trigger us to create
+ // a backup schema.
+ PropertyConfigBuilder indexed_string_property_builder =
+ PropertyConfigBuilder()
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN);
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("type_a")
+ .AddProperty(indexed_string_property_builder.SetName("prop0"))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("propRfc")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("type_b")
+ .AddProperty(indexed_string_property_builder.SetName("prop0"))
+ .Build();
+ SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build();
+
+ {
+ // Create an instance of the schema store and set the schema.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ EXPECT_THAT(schema_store->GetSchema(),
+ IsOkAndHolds(Pointee(EqualsProto(schema))));
+ }
+
+ // Delete the overlay schema.
+ std::string overlay_schema_path = schema_store_dir_ + "/overlay_schema.pb";
+ ASSERT_TRUE(filesystem_.DeleteFile(overlay_schema_path.c_str()));
+
+ {
+ // Create a new instance of the schema store and check that it fails because
+ // the overlay schema is not available when we expected it to be.
+ EXPECT_THAT(
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_),
+ StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+ }
+}
+
+TEST_F(SchemaStoreTest, LoadSchemaHeaderMissing) {
+ // Create a schema that is rollback incompatible and will trigger us to create
+ // a backup schema.
+ PropertyConfigBuilder indexed_string_property_builder =
+ PropertyConfigBuilder()
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN);
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("type_a")
+ .AddProperty(indexed_string_property_builder.SetName("prop0"))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("propRfc")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("type_b")
+ .AddProperty(indexed_string_property_builder.SetName("prop0"))
+ .Build();
+ SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build();
+
+ {
+ // Create an instance of the schema store and set the schema.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ EXPECT_THAT(schema_store->GetSchema(),
+ IsOkAndHolds(Pointee(EqualsProto(schema))));
+ }
+
+ // Delete the overlay schema.
+ std::string schema_header_path = schema_store_dir_ + "/schema_store_header";
+ ASSERT_TRUE(filesystem_.DeleteFile(schema_header_path.c_str()));
+
+ {
+ // Create a new of the schema store and check that the same schema is
+ // present.
+ EXPECT_THAT(
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_),
+ StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+ }
+}
+
+TEST_F(SchemaStoreTest, LoadSchemaNoOverlayHeaderMissing) {
+ // Create a normal schema that won't require a backup.
+ PropertyConfigBuilder indexed_string_property_builder =
+ PropertyConfigBuilder()
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN);
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("type_a")
+ .AddProperty(indexed_string_property_builder.SetName("prop0"))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("propRfc")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("type_b")
+ .AddProperty(indexed_string_property_builder.SetName("prop0"))
+ .Build();
+ SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build();
+
+ {
+ // Create an instance of the schema store and set the schema.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ EXPECT_THAT(schema_store->GetSchema(),
+ IsOkAndHolds(Pointee(EqualsProto(schema))));
+ }
+
+ // Delete the schema header.
+ std::string schema_header_path = schema_store_dir_ + "/schema_store_header";
+ ASSERT_TRUE(filesystem_.DeleteFile(schema_header_path.c_str()));
+
+ {
+ // Create a new instance of the schema store and check that it fails because
+ // the schema header (which is now a part of the ground truth) is not
+ // available.
+ EXPECT_THAT(
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_),
+ StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+ }
+}
+
+TEST_F(SchemaStoreTest, MigrateSchemaCompatibleNoChange) {
+ // Create a schema that is rollback incompatible and will trigger us to create
+ // a backup schema.
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("type_a")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("propRfc")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822))
+ .Build();
+ SchemaProto schema = SchemaBuilder().AddType(type_a).Build();
+
+ {
+ // Create an instance of the schema store and set the schema.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ EXPECT_THAT(schema_store->GetSchema(),
+ IsOkAndHolds(Pointee(EqualsProto(schema))));
+ }
+
+ ICING_EXPECT_OK(SchemaStore::MigrateSchema(
+ &filesystem_, schema_store_dir_, version_util::StateChange::kCompatible,
+ version_util::kVersion));
+
+ {
+ // Create a new of the schema store and check that the same schema is
+ // present.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ EXPECT_THAT(schema_store->GetSchema(),
+ IsOkAndHolds(Pointee(EqualsProto(schema))));
+ }
+}
+
+TEST_F(SchemaStoreTest, MigrateSchemaUpgradeNoChange) {
+ // Create a schema that is rollback incompatible and will trigger us to create
+ // a backup schema.
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("type_a")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("propRfc")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822))
+ .Build();
+ SchemaProto schema = SchemaBuilder().AddType(type_a).Build();
+
+ {
+ // Create an instance of the schema store and set the schema.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ EXPECT_THAT(schema_store->GetSchema(),
+ IsOkAndHolds(Pointee(EqualsProto(schema))));
+ }
+
+ ICING_EXPECT_OK(SchemaStore::MigrateSchema(
+ &filesystem_, schema_store_dir_, version_util::StateChange::kUpgrade,
+ version_util::kVersion + 1));
+
+ {
+ // Create a new of the schema store and check that the same schema is
+ // present.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ EXPECT_THAT(schema_store->GetSchema(),
+ IsOkAndHolds(Pointee(EqualsProto(schema))));
+ }
+}
+
+TEST_F(SchemaStoreTest, MigrateSchemaVersionZeroUpgradeNoChange) {
+ // Because we are upgrading from version zero, the schema must be compatible
+ // with version zero.
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("type_a")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("propRfc")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN))
+ .Build();
+ SchemaProto schema = SchemaBuilder().AddType(type_a).Build();
+
+ {
+ // Create an instance of the schema store and set the schema.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ EXPECT_THAT(schema_store->GetSchema(),
+ IsOkAndHolds(Pointee(EqualsProto(schema))));
+ }
+
+ ICING_EXPECT_OK(
+ SchemaStore::MigrateSchema(&filesystem_, schema_store_dir_,
+ version_util::StateChange::kVersionZeroUpgrade,
+ version_util::kVersion + 1));
+
+ {
+ // Create a new of the schema store and check that the same schema is
+ // present.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ EXPECT_THAT(schema_store->GetSchema(),
+ IsOkAndHolds(Pointee(EqualsProto(schema))));
+ }
+}
+
+TEST_F(SchemaStoreTest, MigrateSchemaRollbackDiscardsOverlaySchema) {
+ // Because we are upgrading from version zero, the schema must be compatible
+ // with version zero.
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("type_a")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("propRfc")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822))
+ .Build();
+ SchemaProto schema = SchemaBuilder().AddType(type_a).Build();
+
+ {
+ // Create an instance of the schema store and set the schema.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ EXPECT_THAT(schema_store->GetSchema(),
+ IsOkAndHolds(Pointee(EqualsProto(schema))));
+ }
+
+ // Rollback to a version before kVersion. The schema header will declare that
+ // the overlay is compatible with any version starting with kVersion. So
+ // kVersion - 1 is incompatible and will throw out the schema.
+ ICING_EXPECT_OK(SchemaStore::MigrateSchema(
+ &filesystem_, schema_store_dir_, version_util::StateChange::kRollBack,
+ version_util::kVersion - 1));
+
+ {
+ // Create a new of the schema store and check that we fell back to the
+ // base schema.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+
+ SchemaTypeConfigProto other_type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("type_a")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("propRfc")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataType(TYPE_STRING))
+ .Build();
+ SchemaProto base_schema = SchemaBuilder().AddType(other_type_a).Build();
+ EXPECT_THAT(schema_store->GetSchema(),
+ IsOkAndHolds(Pointee(EqualsProto(base_schema))));
+ }
+}
+
+TEST_F(SchemaStoreTest, MigrateSchemaCompatibleRollbackKeepsOverlaySchema) {
+ // Because we are upgrading from version zero, the schema must be compatible
+ // with version zero.
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("type_a")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("propRfc")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822))
+ .Build();
+ SchemaProto schema = SchemaBuilder().AddType(type_a).Build();
+
+ {
+ // Create an instance of the schema store and set the schema.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ EXPECT_THAT(schema_store->GetSchema(),
+ IsOkAndHolds(Pointee(EqualsProto(schema))));
+ }
+
+ // Rollback to kVersion. The schema header will declare that the overlay is
+ // compatible with any version starting with kVersion. So we will be
+ // compatible and retain the overlay schema.
+ ICING_EXPECT_OK(SchemaStore::MigrateSchema(
+ &filesystem_, schema_store_dir_, version_util::StateChange::kRollBack,
+ version_util::kVersion));
+
+ {
+ // Create a new of the schema store and check that the same schema is
+ // present.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ EXPECT_THAT(schema_store->GetSchema(),
+ IsOkAndHolds(Pointee(EqualsProto(schema))));
+ }
+}
+
+TEST_F(SchemaStoreTest, MigrateSchemaRollforwardRetainsBaseSchema) {
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("type_a")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("propRfc")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822))
+ .Build();
+ SchemaProto schema = SchemaBuilder().AddType(type_a).Build();
+ {
+ // Create an instance of the schema store and set the schema.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ EXPECT_THAT(schema_store->GetSchema(),
+ IsOkAndHolds(Pointee(EqualsProto(schema))));
+ }
+
+ // Rollback to a version before kVersion. The schema header will declare that
+ // the overlay is compatible with any version starting with kVersion. So
+ // kVersion - 1 is incompatible and will throw out the schema.
+ ICING_EXPECT_OK(SchemaStore::MigrateSchema(
+ &filesystem_, schema_store_dir_, version_util::StateChange::kRollBack,
+ version_util::kVersion - 1));
+
+ SchemaTypeConfigProto other_type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("type_a")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("propRfc")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataType(TYPE_STRING))
+ .Build();
+ SchemaProto base_schema = SchemaBuilder().AddType(other_type_a).Build();
+
+ {
+ // Create a new of the schema store and check that we fell back to the
+ // base schema.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+
+ EXPECT_THAT(schema_store->GetSchema(),
+ IsOkAndHolds(Pointee(EqualsProto(base_schema))));
+ }
+
+ // Now rollforward to a new version. This should accept whatever schema is
+ // present (currently base schema)
+ ICING_EXPECT_OK(SchemaStore::MigrateSchema(
+ &filesystem_, schema_store_dir_, version_util::StateChange::kRollForward,
+ version_util::kVersion));
+ {
+ // Create a new of the schema store and check that we fell back to the
+ // base schema.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+
+ EXPECT_THAT(schema_store->GetSchema(),
+ IsOkAndHolds(Pointee(EqualsProto(base_schema))));
+ }
+}
+
+TEST_F(SchemaStoreTest, MigrateSchemaRollforwardRetainsOverlaySchema) {
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("type_a")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("propRfc")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822))
+ .Build();
+ SchemaProto schema = SchemaBuilder().AddType(type_a).Build();
+ {
+ // Create an instance of the schema store and set the schema.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ EXPECT_THAT(schema_store->GetSchema(),
+ IsOkAndHolds(Pointee(EqualsProto(schema))));
+ }
+
+ // Rollback to kVersion. The schema header will declare that the overlay is
+ // compatible with any version starting with kVersion. So we will be
+ // compatible and retain the overlay schema.
+ ICING_EXPECT_OK(SchemaStore::MigrateSchema(
+ &filesystem_, schema_store_dir_, version_util::StateChange::kRollBack,
+ version_util::kVersion));
+
+ {
+ // Create a new of the schema store and check that the same schema is
+ // present.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+
+ EXPECT_THAT(schema_store->GetSchema(),
+ IsOkAndHolds(Pointee(EqualsProto(schema))));
+ }
+
+ // Now rollforward to a new version. This should accept whatever schema is
+ // present (currently overlay schema)
+ ICING_EXPECT_OK(SchemaStore::MigrateSchema(
+ &filesystem_, schema_store_dir_, version_util::StateChange::kRollForward,
+ version_util::kVersion));
+ {
+ // Create a new of the schema store and check that the same schema is
+ // present.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+
+ EXPECT_THAT(schema_store->GetSchema(),
+ IsOkAndHolds(Pointee(EqualsProto(schema))));
+ }
+}
+
+TEST_F(SchemaStoreTest,
+ MigrateSchemaVersionZeroRollforwardDiscardsOverlaySchema) {
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("type_a")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("propRfc")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822))
+ .Build();
+ SchemaProto schema = SchemaBuilder().AddType(type_a).Build();
+ {
+ // Create an instance of the schema store and set the schema.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ EXPECT_THAT(schema_store->GetSchema(),
+ IsOkAndHolds(Pointee(EqualsProto(schema))));
+ }
+
+ // A VersionZeroRollforward will always discard the overlay schema because it
+ // could be stale.
+ ICING_EXPECT_OK(SchemaStore::MigrateSchema(
+ &filesystem_, schema_store_dir_,
+ version_util::StateChange::kVersionZeroRollForward,
+ version_util::kVersion));
+
+ SchemaTypeConfigProto other_type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("type_a")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("propRfc")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataType(TYPE_STRING))
+ .Build();
+ SchemaProto base_schema = SchemaBuilder().AddType(other_type_a).Build();
+
+ {
+ // Create a new of the schema store and check that we fell back to the
+ // base schema.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+
+ EXPECT_THAT(schema_store->GetSchema(),
+ IsOkAndHolds(Pointee(EqualsProto(base_schema))));
+ }
+}
+
+TEST_F(SchemaStoreTest, MigrateSchemaVersionUndeterminedDiscardsOverlaySchema) {
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("type_a")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("propRfc")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822))
+ .Build();
+ SchemaProto schema = SchemaBuilder().AddType(type_a).Build();
+ {
+ // Create an instance of the schema store and set the schema.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
+
+ EXPECT_THAT(schema_store->GetSchema(),
+ IsOkAndHolds(Pointee(EqualsProto(schema))));
+ }
+
+ // An Undetermined will always discard the overlay schema because it doesn't
+ // know which state we're in and so it fallback to the base schema because
+ // it should always be valid.
+ ICING_EXPECT_OK(SchemaStore::MigrateSchema(
+ &filesystem_, schema_store_dir_, version_util::StateChange::kUndetermined,
+ version_util::kVersion));
+
+ SchemaTypeConfigProto other_type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("type_a")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("propRfc")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataType(TYPE_STRING))
+ .Build();
+ SchemaProto base_schema = SchemaBuilder().AddType(other_type_a).Build();
+
+ {
+ // Create a new of the schema store and check that we fell back to the
+ // base schema.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+
+ EXPECT_THAT(schema_store->GetSchema(),
+ IsOkAndHolds(Pointee(EqualsProto(base_schema))));
+ }
+}
+
} // namespace
} // namespace lib
diff --git a/icing/schema/schema-type-manager.cc b/icing/schema/schema-type-manager.cc
index 7882db5..f3a86d4 100644
--- a/icing/schema/schema-type-manager.cc
+++ b/icing/schema/schema-type-manager.cc
@@ -15,6 +15,7 @@
#include "icing/schema/schema-type-manager.h"
#include <memory>
+#include <utility>
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/absl_ports/canonical_errors.h"
diff --git a/icing/schema/schema-type-manager.h b/icing/schema/schema-type-manager.h
index dc5f799..f2adbd9 100644
--- a/icing/schema/schema-type-manager.h
+++ b/icing/schema/schema-type-manager.h
@@ -16,6 +16,9 @@
#define ICING_SCHEMA_SCHEMA_TYPE_MANAGER_H_
#include <memory>
+#include <string>
+#include <unordered_set>
+#include <vector>
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/schema/joinable-property-manager.h"
@@ -30,6 +33,10 @@ namespace lib {
// This class is a wrapper of SectionManager and JoinablePropertyManager.
class SchemaTypeManager {
public:
+ // Schema type ids are continuous, and so we use a vector instead of an
+ // unordered map for the mappings.
+ using SchemaTypeIdToPropertiesVector =
+ std::vector<std::unordered_set<std::string>>;
// Factory function to create a SchemaTypeManager which does not take
// ownership of any input components, and all pointers must refer to valid
// objects that outlive the created SchemaTypeManager instance.
diff --git a/icing/schema/schema-type-manager_test.cc b/icing/schema/schema-type-manager_test.cc
index 93cbdee..eafc612 100644
--- a/icing/schema/schema-type-manager_test.cc
+++ b/icing/schema/schema-type-manager_test.cc
@@ -41,6 +41,7 @@ using ::testing::Pointee;
// type and property names of EmailMessage
static constexpr char kTypeEmail[] = "EmailMessage";
+static constexpr SchemaTypeId kTypeEmailSchemaId = 0;
// indexable (in lexicographical order)
static constexpr char kPropertyRecipientIds[] = "recipientIds";
static constexpr char kPropertyRecipients[] = "recipients";
@@ -57,6 +58,7 @@ static constexpr char kPropertyText[] = "text";
// type and property names of Conversation
static constexpr char kTypeConversation[] = "Conversation";
+static constexpr SchemaTypeId kTypeConversationSchemaId = 1;
// indexable (in lexicographical order)
static constexpr char kPropertyEmails[] = "emails";
static constexpr char kPropertyGroupQualifiedId[] =
@@ -208,8 +210,9 @@ TEST_F(SchemaTypeManagerTest, Create) {
DynamicTrieKeyMapper<SchemaTypeId>::Create(
filesystem_, test_dir_ + "/schema_type_mapper",
/*maximum_size_bytes=*/3 * 128 * 1024));
- ICING_ASSERT_OK(schema_type_mapper->Put(kTypeEmail, 0));
- ICING_ASSERT_OK(schema_type_mapper->Put(kTypeConversation, 1));
+ ICING_ASSERT_OK(schema_type_mapper->Put(kTypeEmail, kTypeEmailSchemaId));
+ ICING_ASSERT_OK(
+ schema_type_mapper->Put(kTypeConversation, kTypeConversationSchemaId));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaTypeManager> schema_type_manager,
@@ -237,6 +240,7 @@ TEST_F(SchemaTypeManagerTest, Create) {
EqualsSectionMetadata(/*expected_id=*/4,
/*expected_property_path=*/"timestamp",
CreateTimestampPropertyConfig())))));
+
// In the Conversation type, "groupQualifiedId" and "name" are indexable
// properties as are the indexable properties of the email in the "emails"
// property. All properties of the email in the "nestedNonIndexable" property
diff --git a/icing/schema/schema-util.cc b/icing/schema/schema-util.cc
index f3f7aad..c85cc87 100644
--- a/icing/schema/schema-util.cc
+++ b/icing/schema/schema-util.cc
@@ -14,6 +14,7 @@
#include "icing/schema/schema-util.h"
+#include <algorithm>
#include <cstdint>
#include <queue>
#include <string>
@@ -21,13 +22,13 @@
#include <unordered_map>
#include <unordered_set>
#include <utility>
+#include <vector>
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/absl_ports/annotate.h"
#include "icing/absl_ports/canonical_errors.h"
#include "icing/absl_ports/str_cat.h"
#include "icing/absl_ports/str_join.h"
-#include "icing/legacy/core/icing-string-util.h"
#include "icing/proto/schema.pb.h"
#include "icing/proto/term.pb.h"
#include "icing/util/logging.h"
@@ -158,97 +159,273 @@ void AddIncompatibleChangeToDelta(
}
}
+// Returns if C1 <= C2 based on the following rule, where C1 and C2 are
+// cardinalities that can be one of REPEATED, OPTIONAL, or REQUIRED.
+//
+// Rule: REQUIRED < OPTIONAL < REPEATED
+bool CardinalityLessThanEq(PropertyConfigProto::Cardinality::Code C1,
+ PropertyConfigProto::Cardinality::Code C2) {
+ if (C1 == C2) {
+ return true;
+ }
+ if (C1 == PropertyConfigProto::Cardinality::REQUIRED) {
+ return C2 == PropertyConfigProto::Cardinality::OPTIONAL ||
+ C2 == PropertyConfigProto::Cardinality::REPEATED;
+ }
+ if (C1 == PropertyConfigProto::Cardinality::OPTIONAL) {
+ return C2 == PropertyConfigProto::Cardinality::REPEATED;
+ }
+ return false;
+}
+
} // namespace
-libtextclassifier3::Status ExpandTranstiveDependents(
- const SchemaUtil::DependentMap& dependent_map, std::string_view type,
- SchemaUtil::DependentMap* expanded_dependent_map,
+libtextclassifier3::Status CalculateTransitiveNestedTypeRelations(
+ const SchemaUtil::DependentMap& direct_nested_types_map,
+ const std::unordered_set<std::string_view>& joinable_types,
+ std::string_view type, bool path_contains_joinable_property,
+ SchemaUtil::DependentMap* expanded_nested_types_map,
+ std::unordered_map<std::string_view, bool>&&
+ pending_expansion_paths_indexable,
+ std::unordered_set<std::string_view>* sink_types) {
+ // TODO(b/280698121): Implement optimizations to this code to avoid reentering
+ // a node after it's already been expanded.
+
+ auto itr = direct_nested_types_map.find(type);
+ if (itr == direct_nested_types_map.end()) {
+ // It's a sink node. Just return.
+ sink_types->insert(type);
+ return libtextclassifier3::Status::OK;
+ }
+ std::unordered_map<std::string_view, std::vector<const PropertyConfigProto*>>
+ expanded_relations;
+
+ // Add all of the adjacent outgoing relations.
+ expanded_relations.reserve(itr->second.size());
+ expanded_relations.insert(itr->second.begin(), itr->second.end());
+
+ // Iterate through each adjacent outgoing relation and add their indirect
+ // outgoing relations.
+ for (const auto& [adjacent_type, adjacent_property_protos] : itr->second) {
+ // Make a copy of pending_expansion_paths_indexable for every iteration.
+ std::unordered_map<std::string_view, bool> pending_expansion_paths_copy(
+ pending_expansion_paths_indexable);
+
+ // 1. Check the nested indexable config of the edge (type -> adjacent_type),
+ // and the joinable config of the current path up to adjacent_type.
+ //
+ // The nested indexable config is true if any of the PropertyConfigProtos
+ // representing the connecting edge has index_nested_properties=true.
+ bool is_edge_nested_indexable = std::any_of(
+ adjacent_property_protos.begin(), adjacent_property_protos.end(),
+ [](const PropertyConfigProto* property_config) {
+ return property_config->document_indexing_config()
+ .index_nested_properties();
+ });
+ // TODO(b/265304217): change this once we add joinable_properties_list.
+ // Check if addition of the new edge (type->adjacent_type) makes the path
+ // joinable.
+ bool new_path_contains_joinable_property =
+ joinable_types.count(type) > 0 || path_contains_joinable_property;
+ // Set is_nested_indexable field for the current edge
+ pending_expansion_paths_copy[type] = is_edge_nested_indexable;
+
+ // If is_edge_nested_indexable=false, then all paths to adjacent_type
+ // currently in the pending_expansions map are also not nested indexable.
+ if (!is_edge_nested_indexable) {
+ for (auto& pending_expansion : pending_expansion_paths_copy) {
+ pending_expansion.second = false;
+ }
+ }
+
+ // 2. Check if we're in the middle of expanding this type - IOW
+ // there's a cycle!
+ //
+ // This cycle is not allowed if either:
+ // 1. The cycle starting at adjacent_type is nested indexable, OR
+ // 2. The current path contains a joinable property.
+ auto adjacent_itr = pending_expansion_paths_copy.find(adjacent_type);
+ if (adjacent_itr != pending_expansion_paths_copy.end()) {
+ if (adjacent_itr->second || new_path_contains_joinable_property) {
+ return absl_ports::InvalidArgumentError(absl_ports::StrCat(
+ "Invalid cycle detected in type configs. '", type,
+ "' references itself and is nested-indexable or nested-joinable."));
+ }
+ // The cycle is allowed and there's no need to keep iterating the loop.
+ // Move on to the next adjacent value.
+ continue;
+ }
+
+ // 3. Expand this type as needed.
+ ICING_RETURN_IF_ERROR(CalculateTransitiveNestedTypeRelations(
+ direct_nested_types_map, joinable_types, adjacent_type,
+ new_path_contains_joinable_property, expanded_nested_types_map,
+ std::move(pending_expansion_paths_copy), sink_types));
+ if (sink_types->count(adjacent_type) > 0) {
+ // "adjacent" is a sink node. Just skip to the next.
+ continue;
+ }
+
+ // 4. "adjacent" has been fully expanded. Add all of its transitive
+ // outgoing relations to this type's transitive outgoing relations.
+ auto adjacent_expanded_itr = expanded_nested_types_map->find(adjacent_type);
+ expanded_relations.reserve(expanded_relations.size() +
+ adjacent_expanded_itr->second.size());
+ for (const auto& [transitive_reachable, _] :
+ adjacent_expanded_itr->second) {
+ // Insert a transitive reachable node `transitive_reachable` for `type` if
+ // it wasn't previously reachable.
+ // Since there is no direct edge between `type` and `transitive_reachable`
+ // we insert an empty vector into the dependent map.
+ expanded_relations.insert({transitive_reachable, {}});
+ }
+ }
+ for (const auto& kvp : expanded_relations) {
+ expanded_nested_types_map->operator[](type).insert(kvp);
+ }
+ return libtextclassifier3::Status::OK;
+}
+
+template <typename T>
+libtextclassifier3::Status CalculateAcyclicTransitiveRelations(
+ const SchemaUtil::TypeRelationMap<T>& direct_relation_map,
+ std::string_view type,
+ SchemaUtil::TypeRelationMap<T>* expanded_relation_map,
std::unordered_set<std::string_view>* pending_expansions,
- std::unordered_set<std::string_view>* orphaned_types) {
- auto expanded_itr = expanded_dependent_map->find(type);
- if (expanded_itr != expanded_dependent_map->end()) {
+ std::unordered_set<std::string_view>* sink_types) {
+ auto expanded_itr = expanded_relation_map->find(type);
+ if (expanded_itr != expanded_relation_map->end()) {
// We've already expanded this type. Just return.
return libtextclassifier3::Status::OK;
}
- auto itr = dependent_map.find(type);
- if (itr == dependent_map.end()) {
- // It's an orphan. Just return.
- orphaned_types->insert(type);
+ auto itr = direct_relation_map.find(type);
+ if (itr == direct_relation_map.end()) {
+ // It's a sink node. Just return.
+ sink_types->insert(type);
return libtextclassifier3::Status::OK;
}
pending_expansions->insert(type);
- std::unordered_map<std::string_view, std::vector<const PropertyConfigProto*>>
- expanded_dependents;
+ std::unordered_map<std::string_view, T> expanded_relations;
- // Add all of the direct dependents.
- expanded_dependents.reserve(itr->second.size());
- expanded_dependents.insert(itr->second.begin(), itr->second.end());
+ // Add all of the adjacent outgoing relations.
+ expanded_relations.reserve(itr->second.size());
+ expanded_relations.insert(itr->second.begin(), itr->second.end());
- // Iterate through each direct dependent and add their indirect dependents.
- for (const auto& [dep, _] : itr->second) {
+ // Iterate through each adjacent outgoing relation and add their indirect
+ // outgoing relations.
+ for (const auto& [adjacent, _] : itr->second) {
// 1. Check if we're in the middle of expanding this type - IOW there's a
// cycle!
- if (pending_expansions->count(dep) > 0) {
+ if (pending_expansions->count(adjacent) > 0) {
return absl_ports::InvalidArgumentError(
- absl_ports::StrCat("Infinite loop detected in type configs. '", type,
- "' references itself."));
+ absl_ports::StrCat("Invalid cycle detected in type configs. '", type,
+ "' references or inherits from itself."));
}
// 2. Expand this type as needed.
- ICING_RETURN_IF_ERROR(
- ExpandTranstiveDependents(dependent_map, dep, expanded_dependent_map,
- pending_expansions, orphaned_types));
- if (orphaned_types->count(dep) > 0) {
- // Dep is an orphan. Just skip to the next dep.
+ ICING_RETURN_IF_ERROR(CalculateAcyclicTransitiveRelations(
+ direct_relation_map, adjacent, expanded_relation_map,
+ pending_expansions, sink_types));
+ if (sink_types->count(adjacent) > 0) {
+ // "adjacent" is a sink node. Just skip to the next.
continue;
}
- // 3. Dep has been fully expanded. Add all of its dependents to this
- // type's dependents.
- auto dep_expanded_itr = expanded_dependent_map->find(dep);
- expanded_dependents.reserve(expanded_dependents.size() +
- dep_expanded_itr->second.size());
- for (const auto& [dep_dependent, _] : dep_expanded_itr->second) {
- // Insert a transitive dependent `dep_dependent` for `type`. Also since
- // there is no direct edge between `type` and `dep_dependent`, the direct
- // edge (i.e. PropertyConfigProto*) vector is empty.
- expanded_dependents.insert({dep_dependent, {}});
+ // 3. "adjacent" has been fully expanded. Add all of its transitive outgoing
+ // relations to this type's transitive outgoing relations.
+ auto adjacent_expanded_itr = expanded_relation_map->find(adjacent);
+ expanded_relations.reserve(expanded_relations.size() +
+ adjacent_expanded_itr->second.size());
+ for (const auto& [transitive_reachable, _] :
+ adjacent_expanded_itr->second) {
+ // Insert a transitive reachable node `transitive_reachable` for `type`.
+ // Also since there is no direct edge between `type` and
+ // `transitive_reachable`, the direct edge is initialized by default.
+ expanded_relations.insert({transitive_reachable, T()});
}
}
- expanded_dependent_map->insert({type, std::move(expanded_dependents)});
+ expanded_relation_map->insert({type, std::move(expanded_relations)});
pending_expansions->erase(type);
return libtextclassifier3::Status::OK;
}
-// Calculate and return the transitive closure of dependent_map, which expands
-// the dependent_map to also include indirect dependents
+// Calculate and return the expanded nested-type map from
+// direct_nested_type_map. This expands the direct_nested_type_map to also
+// include indirect nested-type relations.
//
-// Ex. Suppose we have a schema with three types A, B and C, and we have the
-// following dependent relationship.
+// Ex. Suppose we have the following relations in direct_nested_type_map.
//
-// C -> B (B depends on C)
-// B -> A (A depends on B)
+// C -> B (Schema type B has a document property of type C)
+// B -> A (Schema type A has a document property of type B)
//
// Then, this function would expand the map by adding C -> A to the map.
libtextclassifier3::StatusOr<SchemaUtil::DependentMap>
-ExpandTranstiveDependents(const SchemaUtil::DependentMap& dependent_map) {
- SchemaUtil::DependentMap expanded_dependent_map;
+CalculateTransitiveNestedTypeRelations(
+ const SchemaUtil::DependentMap& direct_nested_type_map,
+ const std::unordered_set<std::string_view>& joinable_types,
+ bool allow_circular_schema_definitions) {
+ SchemaUtil::DependentMap expanded_nested_type_map;
+ // Types that have no outgoing relations.
+ std::unordered_set<std::string_view> sink_types;
+
+ if (allow_circular_schema_definitions) {
+ // Map of nodes that are pending expansion -> whether the path from each key
+ // node to the 'current' node is nested_indexable.
+ // A copy of this map is made for each new node that we expand.
+ std::unordered_map<std::string_view, bool>
+ pending_expansion_paths_indexable;
+ for (const auto& kvp : direct_nested_type_map) {
+ ICING_RETURN_IF_ERROR(CalculateTransitiveNestedTypeRelations(
+ direct_nested_type_map, joinable_types, kvp.first,
+ /*path_contains_joinable_property=*/false, &expanded_nested_type_map,
+ std::unordered_map<std::string_view, bool>(
+ pending_expansion_paths_indexable),
+ &sink_types));
+ }
+ } else {
+ // If allow_circular_schema_definitions is false, then fallback to the old
+ // way of detecting cycles.
+ // Types that we are expanding.
+ std::unordered_set<std::string_view> pending_expansions;
+ for (const auto& kvp : direct_nested_type_map) {
+ ICING_RETURN_IF_ERROR(CalculateAcyclicTransitiveRelations(
+ direct_nested_type_map, kvp.first, &expanded_nested_type_map,
+ &pending_expansions, &sink_types));
+ }
+ }
+ return expanded_nested_type_map;
+}
+
+// Calculate and return the expanded inheritance map from
+// direct_nested_type_map. This expands the direct_inheritance_map to also
+// include indirect inheritance relations.
+//
+// Ex. Suppose we have the following relations in direct_inheritance_map.
+//
+// C -> B (Schema type C is B's parent_type )
+// B -> A (Schema type B is A's parent_type)
+//
+// Then, this function would expand the map by adding C -> A to the map.
+libtextclassifier3::StatusOr<SchemaUtil::InheritanceMap>
+CalculateTransitiveInheritanceRelations(
+ const SchemaUtil::InheritanceMap& direct_inheritance_map) {
+ SchemaUtil::InheritanceMap expanded_inheritance_map;
// Types that we are expanding.
std::unordered_set<std::string_view> pending_expansions;
- // Types that have no dependents.
- std::unordered_set<std::string_view> orphaned_types;
- for (const auto& kvp : dependent_map) {
- ICING_RETURN_IF_ERROR(ExpandTranstiveDependents(
- dependent_map, kvp.first, &expanded_dependent_map, &pending_expansions,
- &orphaned_types));
+ // Types that have no outgoing relation.
+ std::unordered_set<std::string_view> sink_types;
+ for (const auto& kvp : direct_inheritance_map) {
+ ICING_RETURN_IF_ERROR(CalculateAcyclicTransitiveRelations(
+ direct_inheritance_map, kvp.first, &expanded_inheritance_map,
+ &pending_expansions, &sink_types));
}
- return expanded_dependent_map;
+ return expanded_inheritance_map;
}
-// Builds a transitive dependent map. 'Orphaned' types (types with no
-// dependents) will not be present in the map.
+// Builds a transitive dependent map. Types with no dependents will not be
+// present in the map as keys.
//
// Ex. Suppose we have a schema with four types A, B, C, D. A has a property of
// type B and B has a property of type C. C and D only have non-document
@@ -258,7 +435,7 @@ ExpandTranstiveDependents(const SchemaUtil::DependentMap& dependent_map) {
// C -> A, B (both A and B depend on C)
// B -> A (A depends on B)
//
-// A and D would be considered orphaned properties because no type refers to
+// A and D will not be present in the map as keys because no type depends on
// them.
//
// RETURNS:
@@ -266,8 +443,21 @@ ExpandTranstiveDependents(const SchemaUtil::DependentMap& dependent_map) {
// INVALID_ARGUMENT if the schema contains a cycle or an undefined type.
// ALREADY_EXISTS if a schema type is specified more than once in the schema
libtextclassifier3::StatusOr<SchemaUtil::DependentMap>
-BuildTransitiveDependentGraph(const SchemaProto& schema) {
- SchemaUtil::DependentMap dependent_map;
+BuildTransitiveDependentGraph(const SchemaProto& schema,
+ bool allow_circular_schema_definitions) {
+ // We expand the nested-type dependent map and inheritance map differently
+ // when calculating transitive relations. These two types of relations also
+ // should not be transitive so we keep these as separate maps.
+ //
+ // e.g. For schema type A, B and C, B depends on A through inheritance, and
+ // C depends on B by having a property with type B, we will have the two
+ // relations {A, B} and {B, C} in the dependent map, but will not have {A, C}
+ // in the map.
+ SchemaUtil::DependentMap direct_nested_type_map;
+ SchemaUtil::InheritanceMap direct_inheritance_map;
+
+ // Set of schema types that have at least one joinable property.
+ std::unordered_set<std::string_view> joinable_types;
// Add all first-order dependents.
std::unordered_set<std::string_view> known_types;
@@ -280,16 +470,19 @@ BuildTransitiveDependentGraph(const SchemaProto& schema) {
}
known_types.insert(schema_type);
unknown_types.erase(schema_type);
- if (!type_config.parent_type().empty()) {
- std::string_view parent_schema_type(type_config.parent_type());
+ // Insert inheritance relations into the inheritance map.
+ for (std::string_view parent_schema_type : type_config.parent_types()) {
if (known_types.count(parent_schema_type) == 0) {
unknown_types.insert(parent_schema_type);
}
- // Try to add schema_type to the parent type's dependent map when it is
- // not present already, in which case the value will be an empty vector.
- dependent_map[parent_schema_type].insert({schema_type, {}});
+ direct_inheritance_map[parent_schema_type][schema_type] = true;
}
for (const auto& property_config : type_config.properties()) {
+ if (property_config.joinable_config().value_type() !=
+ JoinableConfig::ValueType::NONE) {
+ joinable_types.insert(schema_type);
+ }
+ // Insert nested-type relations into the nested-type map.
if (property_config.data_type() ==
PropertyConfigProto::DataType::DOCUMENT) {
// Need to know what schema_type these Document properties should be
@@ -298,7 +491,7 @@ BuildTransitiveDependentGraph(const SchemaProto& schema) {
if (known_types.count(property_schema_type) == 0) {
unknown_types.insert(property_schema_type);
}
- dependent_map[property_schema_type][schema_type].push_back(
+ direct_nested_type_map[property_schema_type][schema_type].push_back(
&property_config);
}
}
@@ -307,15 +500,50 @@ BuildTransitiveDependentGraph(const SchemaProto& schema) {
return absl_ports::InvalidArgumentError(absl_ports::StrCat(
"Undefined 'schema_type's: ", absl_ports::StrJoin(unknown_types, ",")));
}
- return ExpandTranstiveDependents(dependent_map);
+
+ // Merge two expanded maps into a single dependent_map, without making
+ // inheritance and nested-type relations transitive.
+ ICING_ASSIGN_OR_RETURN(SchemaUtil::DependentMap merged_dependent_map,
+ CalculateTransitiveNestedTypeRelations(
+ direct_nested_type_map, joinable_types,
+ allow_circular_schema_definitions));
+ ICING_ASSIGN_OR_RETURN(
+ SchemaUtil::InheritanceMap expanded_inheritance_map,
+ CalculateTransitiveInheritanceRelations(direct_inheritance_map));
+ for (const auto& [parent_type, inheritance_relation] :
+ expanded_inheritance_map) {
+ // Insert the parent_type into the dependent map if it is not present
+ // already.
+ merged_dependent_map.insert({parent_type, {}});
+ merged_dependent_map[parent_type].reserve(inheritance_relation.size());
+ for (const auto& [child_type, _] : inheritance_relation) {
+ // Insert the child_type into parent_type's dependent map if it's not
+ // present already, in which case the value will be an empty vector.
+ merged_dependent_map[parent_type].insert({child_type, {}});
+ }
+ }
+ return merged_dependent_map;
+}
+
+libtextclassifier3::StatusOr<SchemaUtil::InheritanceMap>
+SchemaUtil::BuildTransitiveInheritanceGraph(const SchemaProto& schema) {
+ SchemaUtil::InheritanceMap direct_inheritance_map;
+ for (const auto& type_config : schema.types()) {
+ for (std::string_view parent_schema_type : type_config.parent_types()) {
+ direct_inheritance_map[parent_schema_type][type_config.schema_type()] =
+ true;
+ }
+ }
+ return CalculateTransitiveInheritanceRelations(direct_inheritance_map);
}
libtextclassifier3::StatusOr<SchemaUtil::DependentMap> SchemaUtil::Validate(
- const SchemaProto& schema) {
+ const SchemaProto& schema, bool allow_circular_schema_definitions) {
// 1. Build the dependent map. This will detect any cycles, non-existent or
// duplicate types in the schema.
- ICING_ASSIGN_OR_RETURN(SchemaUtil::DependentMap dependent_map,
- BuildTransitiveDependentGraph(schema));
+ ICING_ASSIGN_OR_RETURN(
+ SchemaUtil::DependentMap dependent_map,
+ BuildTransitiveDependentGraph(schema, allow_circular_schema_definitions));
// Tracks PropertyConfigs within a SchemaTypeConfig that we've validated
// already.
@@ -422,6 +650,9 @@ libtextclassifier3::StatusOr<SchemaUtil::DependentMap> SchemaUtil::Validate(
}
}
+ // Verify that every child type's property set has included all compatible
+ // properties from parent types.
+ ICING_RETURN_IF_ERROR(ValidateInheritedProperties(schema));
return dependent_map;
}
@@ -537,6 +768,100 @@ libtextclassifier3::Status SchemaUtil::ValidateJoinableConfig(
return libtextclassifier3::Status::OK;
}
+bool SchemaUtil::IsParent(const SchemaUtil::InheritanceMap& inheritance_map,
+ std::string_view parent_type,
+ std::string_view child_type) {
+ auto iter = inheritance_map.find(parent_type);
+ if (iter == inheritance_map.end()) {
+ return false;
+ }
+ return iter->second.count(child_type) > 0;
+}
+
+bool SchemaUtil::IsInheritedPropertyCompatible(
+ const SchemaUtil::InheritanceMap& inheritance_map,
+ const PropertyConfigProto& child_property_config,
+ const PropertyConfigProto& parent_property_config) {
+ // Check if child_property_config->cardinality() <=
+ // parent_property_config->cardinality().
+ // Subtype may require a stricter cardinality, but cannot loosen cardinality
+ // requirements.
+ if (!CardinalityLessThanEq(child_property_config.cardinality(),
+ parent_property_config.cardinality())) {
+ return false;
+ }
+
+ // Now we can assume T1 and T2 are not nullptr, and cardinality check passes.
+ if (child_property_config.data_type() !=
+ PropertyConfigProto::DataType::DOCUMENT ||
+ parent_property_config.data_type() !=
+ PropertyConfigProto::DataType::DOCUMENT) {
+ return child_property_config.data_type() ==
+ parent_property_config.data_type();
+ }
+
+ // Now we can assume T1 and T2 are both document type.
+ return child_property_config.schema_type() ==
+ parent_property_config.schema_type() ||
+ IsParent(inheritance_map, parent_property_config.schema_type(),
+ child_property_config.schema_type());
+}
+
+libtextclassifier3::Status SchemaUtil::ValidateInheritedProperties(
+ const SchemaProto& schema) {
+ // Create a inheritance map
+ ICING_ASSIGN_OR_RETURN(SchemaUtil::InheritanceMap inheritance_map,
+ BuildTransitiveInheritanceGraph(schema));
+
+ // Create a map that maps from type name to property names, and then from
+ // property names to PropertyConfigProto.
+ std::unordered_map<
+ std::string, std::unordered_map<std::string, const PropertyConfigProto*>>
+ property_map;
+ for (const SchemaTypeConfigProto& type_config : schema.types()) {
+ // Skipping building entries for types without any child or parent, since
+ // such entry will never be used.
+ if (type_config.parent_types().empty() &&
+ inheritance_map.count(type_config.schema_type()) == 0) {
+ continue;
+ }
+ auto& curr_property_map = property_map[type_config.schema_type()];
+ for (const PropertyConfigProto& property_config :
+ type_config.properties()) {
+ curr_property_map[property_config.property_name()] = &property_config;
+ }
+ }
+
+ // Validate child properties.
+ for (const SchemaTypeConfigProto& type_config : schema.types()) {
+ const std::string& child_type_name = type_config.schema_type();
+ auto& child_property_map = property_map[child_type_name];
+
+ for (const std::string& parent_type_name : type_config.parent_types()) {
+ auto& parent_property_map = property_map[parent_type_name];
+
+ for (const auto& [property_name, parent_property_config] :
+ parent_property_map) {
+ auto child_property_iter = child_property_map.find(property_name);
+ if (child_property_iter == child_property_map.end()) {
+ return absl_ports::InvalidArgumentError(absl_ports::StrCat(
+ "Property ", property_name, " is not present in child type ",
+ child_type_name, ", but it is defined in the parent type ",
+ parent_type_name, "."));
+ }
+ if (!IsInheritedPropertyCompatible(inheritance_map,
+ *child_property_iter->second,
+ *parent_property_config)) {
+ return absl_ports::InvalidArgumentError(absl_ports::StrCat(
+ "Property ", property_name, " from child type ", child_type_name,
+ " is not compatible to the parent type ", parent_type_name, "."));
+ }
+ }
+ }
+ }
+ return libtextclassifier3::Status::OK;
+}
+
void SchemaUtil::BuildTypeConfigMap(
const SchemaProto& schema, SchemaUtil::TypeConfigMap* type_config_map) {
type_config_map->clear();
diff --git a/icing/schema/schema-util.h b/icing/schema/schema-util.h
index 825625e..445affd 100644
--- a/icing/schema/schema-util.h
+++ b/icing/schema/schema-util.h
@@ -33,6 +33,14 @@ class SchemaUtil {
using TypeConfigMap =
std::unordered_map<std::string, const SchemaTypeConfigProto>;
+ // A data structure that stores the relationships between schema types. The
+ // keys in TypeRelationMap are schema types, and the values are sets of schema
+ // types that are directly or indirectly related to the key.
+ template <typename T>
+ using TypeRelationMap =
+ std::unordered_map<std::string_view,
+ std::unordered_map<std::string_view, T>>;
+
// If A -> B is indicated in the map, then type A must be built before
// building type B, which implies one of the following situations.
//
@@ -48,10 +56,16 @@ class SchemaUtil {
// C -> B with valid PropertyConfigProto* respectively in this map, but we
// will also expand transitive dependents: add A -> B into dependent map with
// empty vector of "edges".
- using DependentMap = std::unordered_map<
- std::string_view,
- std::unordered_map<std::string_view,
- std::vector<const PropertyConfigProto*>>>;
+ using DependentMap = TypeRelationMap<std::vector<const PropertyConfigProto*>>;
+
+ // If A -> B is indicated in the map, then type A is a parent type of B,
+ // directly or indirectly. If directly, the bool value in the map will be
+ // true, otherwise false.
+ //
+ // Note that all relationships contained in this map are also entries in the
+ // DependentMap, i.e. if B inherits from A, then there will be a mapping from
+ // A to B in both this map and the DependentMap.
+ using InheritanceMap = TypeRelationMap<bool>;
struct SchemaDelta {
// Which schema types were present in the old schema, but were deleted from
@@ -124,18 +138,25 @@ class SchemaUtil {
// SchemaTypeConfigProto.schema_type
// 10. Property names can only be alphanumeric.
// 11. Any STRING data types have a valid string_indexing_config
- // 12. A SchemaTypeConfigProto cannot have a property whose schema_type is
- // itself, thus creating an infinite loop.
- // 13. Two SchemaTypeConfigProtos cannot have properties that reference each
- // other's schema_type, thus creating an infinite loop.
- // 14. PropertyConfigProtos.joinable_config must be valid. See
+ // 12. PropertyConfigProtos.joinable_config must be valid. See
// ValidateJoinableConfig for more details.
- // 15. Any PropertyConfigProtos with nested DOCUMENT data type must not have
+ // 13. Any PropertyConfigProtos with nested DOCUMENT data type must not have
// REPEATED cardinality if they reference a schema type containing
// joinable property.
- //
- // TODO(b/171996137): Clarify 12 and 13 are only for indexed properties, once
- // document properties can be opted out of indexing.
+ // 14. The schema definition cannot have invalid cycles. A cycle is invalid
+ // if:
+ // a. SchemaTypeConfigProto.parent_type definitions form an inheritance
+ // cycle.
+ // b. The schema's property definitions have schema_types that form a
+ // cycle, and all properties on the cycle declare
+ // DocumentIndexingConfig.index_nested_properties=true.
+ // c. The schema's property definitions have schema_types that form a
+ // cycle, and the cycle leads to an invalid joinable property config.
+ // This is the case if:
+ // i. Any type node in the cycle itself has a joinable proprty
+ // (property whose joinable config is not NONE), OR
+ // ii. Any type node in the cycle has a nested-type (direct or
+ // indirect) with a joinable property.
//
// Returns:
// On success, a dependent map from each types to their dependent types
@@ -143,7 +164,28 @@ class SchemaUtil {
// ALREADY_EXISTS for case 1 and 2
// INVALID_ARGUMENT for 3-15
static libtextclassifier3::StatusOr<DependentMap> Validate(
- const SchemaProto& schema);
+ const SchemaProto& schema, bool allow_circular_schema_definitions);
+
+ // Builds a transitive inheritance map.
+ //
+ // Ex. Suppose we have a schema with four types A, B, C and D, and we have the
+ // following direct inheritance relation.
+ //
+ // A -> B (A is the parent type of B)
+ // B -> C (B is the parent type of C)
+ // C -> D (C is the parent type of D)
+ //
+ // Then, the transitive inheritance map for this schema would be:
+ //
+ // A -> B, C, D
+ // B -> C, D
+ // C -> D
+ //
+ // RETURNS:
+ // On success, a transitive inheritance map of all types in the schema.
+ // INVALID_ARGUMENT if the inheritance graph contains a cycle.
+ static libtextclassifier3::StatusOr<SchemaUtil::InheritanceMap>
+ BuildTransitiveInheritanceGraph(const SchemaProto& schema);
// Creates a mapping of schema type -> schema type config proto. The
// type_config_map is cleared, and then each schema-type_config_proto pair is
@@ -270,6 +312,52 @@ class SchemaUtil {
PropertyConfigProto::DataType::Code data_type,
PropertyConfigProto::Cardinality::Code cardinality,
std::string_view schema_type, std::string_view property_name);
+
+ // Returns if 'parent_type' is a direct or indirect parent of 'child_type'.
+ static bool IsParent(const SchemaUtil::InheritanceMap& inheritance_map,
+ std::string_view parent_type,
+ std::string_view child_type);
+
+ // Returns if 'child_property_config' in a child type can override
+ // 'parent_property_config' in the parent type.
+ //
+ // Let's assign 'child_property_config' a type T1 and 'parent_property_config'
+ // a type T2 that captures information for their data_type, schema_type and
+ // cardinalities, so that 'child_property_config' can override
+ // 'parent_property_config' if and only if T1 <: T2, i.e. T1 is a subtype of
+ // T2.
+ //
+ // Below are the rules for inferring subtype relations.
+ // - T <: T for every type T.
+ // - If U extends T, then U <: T.
+ // - For every type T1, T2 and T3, if T1 <: T2 and T2 <: T3, then T1 <: T3.
+ // - Optional<T> <: Repeated<T> for every type T.
+ // - Required<T> <: Optional<T> for every type T.
+ // - If T1 <: T2, then
+ // - Required<T1> <: Required<T2>
+ // - Optional<T1> <: Optional<T2>
+ // - Repeated<T1> <: Repeated<T2>
+ //
+ // We assume the Closed World Assumption (CWA), i.e. if T1 <: T2 cannot be
+ // deduced from the above rules, then T1 is not a subtype of T2.
+ static bool IsInheritedPropertyCompatible(
+ const SchemaUtil::InheritanceMap& inheritance_map,
+ const PropertyConfigProto& child_property_config,
+ const PropertyConfigProto& parent_property_config);
+
+ // Verifies that every child type's property set has included all compatible
+ // properties from parent types, based on the following rule:
+ //
+ // - If a property "prop" of type T is in the parent, then the child type must
+ // also have "prop" that is of type U, such that U <: T, i.e. U is a subtype
+ // of T.
+ //
+ // RETURNS:
+ // Ok on validation success
+ // INVALID_ARGUMENT if an exception that violates the above validation rule
+ // is found.
+ static libtextclassifier3::Status ValidateInheritedProperties(
+ const SchemaProto& schema);
};
} // namespace lib
diff --git a/icing/schema/schema-util_test.cc b/icing/schema/schema-util_test.cc
index df7a421..3ea855c 100644
--- a/icing/schema/schema-util_test.cc
+++ b/icing/schema/schema-util_test.cc
@@ -14,15 +14,12 @@
#include "icing/schema/schema-util.h"
-#include <cstdint>
-#include <string>
#include <string_view>
#include <unordered_set>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "icing/proto/schema.pb.h"
-#include "icing/proto/term.pb.h"
#include "icing/schema-builder.h"
#include "icing/testing/common-matchers.h"
@@ -34,6 +31,8 @@ using portable_equals_proto::EqualsProto;
using ::testing::Eq;
using ::testing::HasSubstr;
using ::testing::IsEmpty;
+using ::testing::IsFalse;
+using ::testing::IsTrue;
using ::testing::Pair;
using ::testing::Pointee;
using ::testing::SizeIs;
@@ -44,7 +43,9 @@ constexpr char kEmailType[] = "EmailMessage";
constexpr char kMessageType[] = "Text";
constexpr char kPersonType[] = "Person";
-TEST(SchemaUtilTest, DependentGraphAlphabeticalOrder) {
+class SchemaUtilTest : public ::testing::TestWithParam<bool> {};
+
+TEST_P(SchemaUtilTest, DependentGraphAlphabeticalOrder) {
// Create a schema with the following dependent relation:
// C
// / \
@@ -121,7 +122,7 @@ TEST(SchemaUtilTest, DependentGraphAlphabeticalOrder) {
.AddType(type_f)
.Build();
ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
- SchemaUtil::Validate(schema));
+ SchemaUtil::Validate(schema, GetParam()));
EXPECT_THAT(d_map, testing::SizeIs(5));
EXPECT_THAT(
d_map["F"],
@@ -151,7 +152,7 @@ TEST(SchemaUtilTest, DependentGraphAlphabeticalOrder) {
EqualsProto(type_a.properties(0)))))));
}
-TEST(SchemaUtilTest, DependentGraphReverseAlphabeticalOrder) {
+TEST_P(SchemaUtilTest, DependentGraphReverseAlphabeticalOrder) {
// Create a schema with the following dependent relation:
// C
// / \
@@ -229,7 +230,7 @@ TEST(SchemaUtilTest, DependentGraphReverseAlphabeticalOrder) {
.AddType(type_a)
.Build();
ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
- SchemaUtil::Validate(schema));
+ SchemaUtil::Validate(schema, GetParam()));
EXPECT_THAT(d_map, testing::SizeIs(5));
EXPECT_THAT(
d_map["F"],
@@ -259,7 +260,7 @@ TEST(SchemaUtilTest, DependentGraphReverseAlphabeticalOrder) {
EqualsProto(type_a.properties(0)))))));
}
-TEST(SchemaUtilTest, DependentGraphMixedOrder) {
+TEST_P(SchemaUtilTest, DependentGraphMixedOrder) {
// Create a schema with the following dependent relation:
// C
// / \
@@ -336,7 +337,7 @@ TEST(SchemaUtilTest, DependentGraphMixedOrder) {
.AddType(type_d)
.Build();
ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
- SchemaUtil::Validate(schema));
+ SchemaUtil::Validate(schema, GetParam()));
EXPECT_THAT(d_map, testing::SizeIs(5));
EXPECT_THAT(
d_map["F"],
@@ -366,9 +367,9 @@ TEST(SchemaUtilTest, DependentGraphMixedOrder) {
EqualsProto(type_a.properties(0)))))));
}
-TEST(SchemaUtilTest, TopLevelCycle) {
- // Create a schema with the following dependent relation:
- // A - B - B - B - B....
+TEST_P(SchemaUtilTest, TopLevelCycleIndexableTrueInvalid) {
+ // Create a schema with the following nested-type relation:
+ // A - B - B - B - B.... where all edges declare index_nested_properties=true
SchemaTypeConfigProto type_a =
SchemaTypeConfigBuilder()
.SetType("A")
@@ -389,14 +390,57 @@ TEST(SchemaUtilTest, TopLevelCycle) {
.Build();
SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build();
- EXPECT_THAT(SchemaUtil::Validate(schema),
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
- HasSubstr("Infinite loop")));
+ HasSubstr("Invalid cycle")));
}
-TEST(SchemaUtilTest, MultiLevelCycle) {
+TEST_P(SchemaUtilTest, TopLevelCycleIndexableFalseNotJoinableOK) {
+ if (GetParam() != true) {
+ GTEST_SKIP() << "This is an invalid cycle if circular schema definitions "
+ "are not allowed.";
+ }
+
+ // Create a schema with the following nested-type relation and
+ // index_nested_properties definition:
+ // A -(true)-> B -(false)-> B -(false)-> B....
+ // Edge B -(false)-> B breaks the invalid cycle, so this is allowed.
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+ .Build();
+
+ SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build();
+ // Assert Validate status is OK and check dependent map
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
+ SchemaUtil::Validate(schema, GetParam()));
+ EXPECT_THAT(d_map, SizeIs(1));
+ EXPECT_THAT(d_map["B"],
+ UnorderedElementsAre(
+ Pair("A", UnorderedElementsAre(
+ Pointee(EqualsProto(type_a.properties(0))))),
+ Pair("B", UnorderedElementsAre(
+ Pointee(EqualsProto(type_b.properties(0)))))));
+}
+
+TEST_P(SchemaUtilTest, MultiLevelCycleIndexableTrueInvalid) {
// Create a schema with the following dependent relation:
// A - B - C - A - B - C - A ...
+ // where all edges declare index_nested_properties=true
SchemaTypeConfigProto type_a =
SchemaTypeConfigBuilder()
.SetType("A")
@@ -427,11 +471,1222 @@ TEST(SchemaUtilTest, MultiLevelCycle) {
SchemaProto schema =
SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build();
- EXPECT_THAT(SchemaUtil::Validate(schema),
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+ StatusIs((libtextclassifier3::StatusCode::INVALID_ARGUMENT),
+ HasSubstr("Invalid cycle")));
+}
+
+TEST_P(SchemaUtilTest, MultiLevelCycleIndexableFalseNotJoinableOK) {
+ if (GetParam() != true) {
+ GTEST_SKIP() << "This is an invalid cycle if circular schema definitions "
+ "are not allowed.";
+ }
+
+ // Create a schema with the following nested-type relation:
+ // A -(true)-> B -(false)-> C -(true)-> A -(true)-> B -(false)-> C ...
+ // B -(false)-> C breaking the infinite cycle.
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("c")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("C", /*index_nested_properties=*/false))
+ .Build();
+ SchemaTypeConfigProto type_c =
+ SchemaTypeConfigBuilder()
+ .SetType("C")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("A", /*index_nested_properties=*/true))
+ .Build();
+
+ SchemaProto schema =
+ SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+ StatusIs(libtextclassifier3::StatusCode::OK));
+}
+
+TEST_P(SchemaUtilTest, MultiLevelCycleDependentMapOk) {
+ if (GetParam() != true) {
+ GTEST_SKIP() << "This is an invalid cycle if circular schema definitions "
+ "are not allowed.";
+ }
+
+ // Create a schema with the following nested-type dependent relation:
+ // A -(false)-> B -(false)-> C -(false)-> A --> B --> C ...
+ // i.e. A is a property of B
+ // B is a property of C
+ // C is a property of A
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("c")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("C", /*index_nested_properties=*/false))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("A", /*index_nested_properties=*/false))
+ .Build();
+ SchemaTypeConfigProto type_c =
+ SchemaTypeConfigBuilder()
+ .SetType("C")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+ .Build();
+
+ SchemaProto schema =
+ SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build();
+ // Assert Validate status is OK and check dependent map
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
+ SchemaUtil::Validate(schema, GetParam()));
+ EXPECT_THAT(d_map, SizeIs(3));
+ EXPECT_THAT(
+ d_map["A"],
+ UnorderedElementsAre(Pair("A", IsEmpty()),
+ Pair("B", UnorderedElementsAre(Pointee(
+ EqualsProto(type_b.properties(0))))),
+ Pair("C", IsEmpty())));
+ EXPECT_THAT(
+ d_map["B"],
+ UnorderedElementsAre(Pair("A", IsEmpty()), Pair("B", IsEmpty()),
+ Pair("C", UnorderedElementsAre(Pointee(
+ EqualsProto(type_c.properties(0)))))));
+ EXPECT_THAT(
+ d_map["C"],
+ UnorderedElementsAre(Pair("A", UnorderedElementsAre(Pointee(
+ EqualsProto(type_a.properties(0))))),
+ Pair("B", IsEmpty()), Pair("C", IsEmpty())));
+}
+
+TEST_P(SchemaUtilTest, NestedCycleIndexableTrueInvalid) {
+ // Create a schema with the following dependent relation:
+ // A -(false)-> B <-(true)-> C -(false)-> D.
+ // B <-(true)-> C creates an invalid cycle.
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("c")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("C", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_c =
+ SchemaTypeConfigBuilder()
+ .SetType("C")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("d")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("D", /*index_nested_properties=*/false))
+ .Build();
+ SchemaTypeConfigProto type_d =
+ SchemaTypeConfigBuilder()
+ .SetType("D")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("prop")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE))
+ .Build();
+
+ SchemaProto schema = SchemaBuilder()
+ .AddType(type_a)
+ .AddType(type_b)
+ .AddType(type_c)
+ .AddType(type_d)
+ .Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+ HasSubstr("Invalid cycle")));
+}
+
+TEST_P(SchemaUtilTest, NestedCycleIndexableFalseNotJoinableOK) {
+ if (GetParam() != true) {
+ GTEST_SKIP() << "This is an invalid cycle if circular schema definitions "
+ "are not allowed.";
+ }
+
+ // Create a schema with the following nested-type relation:
+ // A -(true)-> B -(true)-> C -(false)-> B -(true)-> D.
+ // C -(false)-> B breaks the invalid cycle in B - C - B.
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("c")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("C", /*index_nested_properties=*/true))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("d")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("D", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_c =
+ SchemaTypeConfigBuilder()
+ .SetType("C")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("d")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("D", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_d =
+ SchemaTypeConfigBuilder()
+ .SetType("D")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("prop")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE))
+ .Build();
+
+ SchemaProto schema = SchemaBuilder()
+ .AddType(type_a)
+ .AddType(type_b)
+ .AddType(type_c)
+ .AddType(type_d)
+ .Build();
+ // Assert Validate status is OK and check dependent map
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
+ SchemaUtil::Validate(schema, GetParam()));
+ EXPECT_THAT(d_map, SizeIs(3));
+ EXPECT_THAT(d_map["B"],
+ UnorderedElementsAre(
+ Pair("A", UnorderedElementsAre(
+ Pointee(EqualsProto(type_a.properties(0))))),
+ Pair("B", IsEmpty()),
+ Pair("C", UnorderedElementsAre(
+ Pointee(EqualsProto(type_c.properties(0)))))));
+ EXPECT_THAT(
+ d_map["C"],
+ UnorderedElementsAre(Pair("A", IsEmpty()),
+ Pair("B", UnorderedElementsAre(Pointee(
+ EqualsProto(type_b.properties(0))))),
+ Pair("C", IsEmpty())));
+ EXPECT_THAT(d_map["D"],
+ UnorderedElementsAre(
+ Pair("A", IsEmpty()),
+ Pair("B", UnorderedElementsAre(
+ Pointee(EqualsProto(type_b.properties(1))))),
+ Pair("C", UnorderedElementsAre(
+ Pointee(EqualsProto(type_c.properties(1)))))));
+}
+
+TEST_P(SchemaUtilTest, MultiplePathsAnyPathContainsCycleIsInvalid) {
+ // Create a schema with the following nested-type relation:
+ // C -(false)-> B -(true)-> A
+ // ^ /
+ // (true)\ /(true)
+ // \ v
+ // D
+ // There is a cycle in B-A-D-B... so this is not allowed
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("d")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("D", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("A", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_c =
+ SchemaTypeConfigBuilder()
+ .SetType("C")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+ .Build();
+ SchemaTypeConfigProto type_d =
+ SchemaTypeConfigBuilder()
+ .SetType("D")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+ .Build();
+
+ SchemaProto schema = SchemaBuilder()
+ .AddType(type_a)
+ .AddType(type_d)
+ .AddType(type_c)
+ .AddType(type_b)
+ .Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+ HasSubstr("Invalid cycle")));
+}
+
+TEST_P(SchemaUtilTest, MultipleCycles_anyCycleIndexableTrueInvalid) {
+ // Create a schema with the following nested-type dependent relation:
+ // Note that the arrows in this graph shows the direction of the dependent
+ // relation, rather than nested-type relations.
+ // A -(F)-> B
+ // ^ \ |
+ // (T)| (T)\ |(T)
+ // | v v
+ // D <-(T)- C
+ // There are two cycles: A-B-C-D and A-C-D. The first cycle is allowed because
+ // A-B has nested-indexable=false, but A-C-D
+ //
+ // Schema nested-type property relation graph:
+ // A <-- B
+ // | ^ ^
+ // v \ |
+ // D --> C
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("d")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("D", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("A", /*index_nested_properties=*/false))
+ .Build();
+ SchemaTypeConfigProto type_c =
+ SchemaTypeConfigBuilder()
+ .SetType("C")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("A", /*index_nested_properties=*/true))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_d =
+ SchemaTypeConfigBuilder()
+ .SetType("D")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("c")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("C", /*index_nested_properties=*/true))
+ .Build();
+
+ SchemaProto schema = SchemaBuilder()
+ .AddType(type_d)
+ .AddType(type_c)
+ .AddType(type_b)
+ .AddType(type_a)
+ .Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST(SchemaUtilTest, NonExistentType) {
+TEST_P(SchemaUtilTest, CycleWithSameTypedProps_allPropsIndexableFalseIsOK) {
+ if (GetParam() != true) {
+ GTEST_SKIP() << "This is an invalid cycle if circular schema definitions "
+ "are not allowed.";
+ }
+
+ // Create a schema with the following nested-type relation and
+ // index_nested_properties definition:
+ // A <-(true)- B <-(false)- A -(false)-> B -(true)-> A
+ // A has 2 properties with type B. A - B breaks the invalid cycle only when
+ // both properties declare index_nested_properties=false.
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b1")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b2")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("A")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("A", /*index_nested_properties=*/true))
+ .Build();
+
+ SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build();
+ // Assert Validate status is OK and check dependent map
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
+ SchemaUtil::Validate(schema, GetParam()));
+ EXPECT_THAT(d_map, SizeIs(2));
+ EXPECT_THAT(
+ d_map["A"],
+ UnorderedElementsAre(Pair("A", IsEmpty()),
+ Pair("B", UnorderedElementsAre(Pointee(
+ EqualsProto(type_b.properties(0)))))));
+ EXPECT_THAT(d_map["B"],
+ UnorderedElementsAre(
+ Pair("A", UnorderedElementsAre(
+ Pointee(EqualsProto(type_a.properties(0))),
+ Pointee(EqualsProto(type_a.properties(1))))),
+ Pair("B", IsEmpty())));
+}
+
+TEST_P(SchemaUtilTest, CycleWithSameTypedProps_anyPropIndexableTrueIsInvalid) {
+ // Create a schema with the following nested-type relation and
+ // index_nested_properties definition:
+ // A <-(true)- B <-(true)- A -(false)-> B -(true)-> A
+ // A has 2 properties with type B. Prop 'b2' declares
+ // index_nested_properties=true, so there is an invalid cycle.
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b1")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b2")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("A")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("A", /*index_nested_properties=*/true))
+ .Build();
+
+ SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+ HasSubstr("Invalid cycle")));
+}
+
+TEST_P(SchemaUtilTest, CycleWithJoinablePropertyNotAllowed) {
+ // Create a schema with the following dependent relation:
+ // A
+ // / ^
+ // v \
+ // (joinable) B ---> C
+ // B also has a string property that is joinable on QUALIFIED_ID
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("c")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("C", /*index_nested_properties=*/false))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("joinableProp")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeJoinableString(JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ SchemaTypeConfigProto type_c =
+ SchemaTypeConfigBuilder()
+ .SetType("C")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("A", /*index_nested_properties=*/false))
+ .Build();
+
+ SchemaProto schema =
+ SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+ HasSubstr("Invalid cycle")));
+}
+
+TEST_P(SchemaUtilTest, NonNestedJoinablePropOutsideCycleOK) {
+ if (GetParam() != true) {
+ GTEST_SKIP() << "This is an invalid cycle if circular schema definitions "
+ "are not allowed.";
+ }
+
+ // Create a schema with the following dependent relation:
+ // A -(false)-> B <-(false)-> C...
+ // A has a string property that is joinable on QUALIFIED_ID, but the cycle is
+ // B-C-B, and none of B or C depends on A, so this is fine.
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("joinableProp")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeJoinableString(JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("c")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("C", /*index_nested_properties=*/false))
+ .Build();
+ SchemaTypeConfigProto type_c =
+ SchemaTypeConfigBuilder()
+ .SetType("C")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+ .Build();
+
+ SchemaProto schema =
+ SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build();
+ // Assert Validate status is OK and check dependent map
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
+ SchemaUtil::Validate(schema, GetParam()));
+ EXPECT_THAT(d_map, SizeIs(2));
+ EXPECT_THAT(d_map["B"],
+ UnorderedElementsAre(
+ Pair("A", UnorderedElementsAre(
+ Pointee(EqualsProto(type_a.properties(0))))),
+ Pair("B", IsEmpty()),
+ Pair("C", UnorderedElementsAre(
+ Pointee(EqualsProto(type_c.properties(0)))))));
+ EXPECT_THAT(
+ d_map["C"],
+ UnorderedElementsAre(Pair("A", IsEmpty()),
+ Pair("B", UnorderedElementsAre(Pointee(
+ EqualsProto(type_b.properties(0))))),
+ Pair("C", IsEmpty())));
+}
+
+TEST_P(SchemaUtilTest, DirectNestedJoinablePropOutsideCycleNotAllowed) {
+ // Create a schema with the following dependent relation:
+ // A
+ // / ^
+ // v \
+ // B ---> C ---> D(joinable)
+ // All edges have index_nested_properties=false and only D has a joinable
+ // property. The cycle A-B-C... is not allowed since there is a type in the
+ // cycle (C) which has a direct nested-type (D) with a joinable property.
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("c")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("C", /*index_nested_properties=*/false))
+ .Build();
+ SchemaTypeConfigProto type_c =
+ SchemaTypeConfigBuilder()
+ .SetType("C")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("A", /*index_nested_properties=*/false))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("d")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("D", /*index_nested_properties=*/false))
+ .Build();
+ SchemaTypeConfigProto type_d =
+ SchemaTypeConfigBuilder()
+ .SetType("D")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("joinableProp")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeJoinableString(JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+
+ SchemaProto schema = SchemaBuilder()
+ .AddType(type_a)
+ .AddType(type_b)
+ .AddType(type_c)
+ .AddType(type_d)
+ .Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+ HasSubstr("Invalid cycle")));
+}
+
+TEST_P(SchemaUtilTest, TransitiveNestedJoinablePropOutsideCycleNotAllowed) {
+ // Create a schema with the following dependent relation:
+ // A
+ // / ^
+ // v \
+ // B ---> C ---> D ---> E (joinable)
+ // All edges have index_nested_properties=false and only D has a joinable
+ // property. The cycle A-B-C... is not allowed since there is a type in the
+ // cycle (C) which has a transitive nested-type (E) with a joinable property.
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("c")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("C", /*index_nested_properties=*/false))
+ .Build();
+ SchemaTypeConfigProto type_c =
+ SchemaTypeConfigBuilder()
+ .SetType("C")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("A", /*index_nested_properties=*/false))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("d")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("D", /*index_nested_properties=*/false))
+ .Build();
+ SchemaTypeConfigProto type_d =
+ SchemaTypeConfigBuilder()
+ .SetType("D")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("e")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("E", /*index_nested_properties=*/false))
+ .Build();
+ SchemaTypeConfigProto type_e =
+ SchemaTypeConfigBuilder()
+ .SetType("E")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("joinableProp")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeJoinableString(JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+
+ SchemaProto schema = SchemaBuilder()
+ .AddType(type_a)
+ .AddType(type_b)
+ .AddType(type_c)
+ .AddType(type_d)
+ .AddType(type_e)
+ .Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+ HasSubstr("Invalid cycle")));
+}
+
+TEST_P(SchemaUtilTest,
+ NestedJoinablePropOutsideCycleNotAllowed_reverseIterationOrder) {
+ // Create a schema with the following dependent relation:
+ // E
+ // / ^
+ // v \
+ // D ---> C ---> B ---> A (joinable)
+ // All edges have index_nested_properties=false and only D has a joinable
+ // property. The cycle A-B-C... is not allowed since there is a type in the
+ // cycle (C) which has a transitive nested-type (E) with a joinable property.
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("joinableProp")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeJoinableString(JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("A", /*index_nested_properties=*/false))
+ .Build();
+ SchemaTypeConfigProto type_c =
+ SchemaTypeConfigBuilder()
+ .SetType("C")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("e")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("E", /*index_nested_properties=*/false))
+ .Build();
+ SchemaTypeConfigProto type_d =
+ SchemaTypeConfigBuilder()
+ .SetType("D")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("c")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("C", /*index_nested_properties=*/false))
+ .Build();
+ SchemaTypeConfigProto type_e =
+ SchemaTypeConfigBuilder()
+ .SetType("E")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("d")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("D", /*index_nested_properties=*/false))
+ .Build();
+
+ SchemaProto schema = SchemaBuilder()
+ .AddType(type_a)
+ .AddType(type_b)
+ .AddType(type_c)
+ .AddType(type_d)
+ .AddType(type_e)
+ .Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+ HasSubstr("Invalid cycle")));
+}
+
+TEST_P(SchemaUtilTest, ComplexCycleWithJoinablePropertyNotAllowed) {
+ // Create a schema with the following dependent relation:
+ // A
+ // / ^
+ // v \
+ // B ---> E
+ // / \ ^
+ // v v \
+ // C D --> F
+ //
+ // Cycles: A-B-E-A, A-B-D-F-E-A.
+ // All edges have index_nested_properties=false, but D has a joinable property
+ // so the second cycle is not allowed.
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("c")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("C", /*index_nested_properties=*/false))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("d")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("D", /*index_nested_properties=*/false))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("e")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("E", /*index_nested_properties=*/false))
+ .Build();
+ SchemaTypeConfigProto type_c =
+ SchemaTypeConfigBuilder()
+ .SetType("C")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("joinableProp")
+ .SetDataTypeJoinableString(JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ SchemaTypeConfigProto type_d =
+ SchemaTypeConfigBuilder()
+ .SetType("D")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("f")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("F", /*index_nested_properties=*/false))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("joinableProp")
+ .SetDataTypeJoinableString(JOINABLE_VALUE_TYPE_QUALIFIED_ID)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ SchemaTypeConfigProto type_e =
+ SchemaTypeConfigBuilder()
+ .SetType("E")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("A", /*index_nested_properties=*/false))
+ .Build();
+ SchemaTypeConfigProto type_f =
+ SchemaTypeConfigBuilder()
+ .SetType("F")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("e")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("E", /*index_nested_properties=*/false))
+ .Build();
+
+ SchemaProto schema = SchemaBuilder()
+ .AddType(type_a)
+ .AddType(type_b)
+ .AddType(type_c)
+ .AddType(type_d)
+ .AddType(type_e)
+ .AddType(type_f)
+ .Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+ HasSubstr("Invalid cycle")));
+}
+
+TEST_P(SchemaUtilTest, ComplexCycleWithIndexableTrueNotAllowed) {
+ // Create a schema with the following dependent relation:
+ // A
+ // / ^
+ // v \
+ // B ---> E
+ // / \ ^
+ // v v \
+ // C D --> F
+ //
+ // Cycles: A-B-E-A, A-B-D-F-E-A.
+ // B->E has index_nested_properties=false, so the first cycle is allowed.
+ // All edges on the second cycle are nested_indexable, so the second cycle is
+ // not allowed
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("c")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("C", /*index_nested_properties=*/false))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("d")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("D", /*index_nested_properties=*/true))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("e")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("E", /*index_nested_properties=*/false))
+ .Build();
+ SchemaTypeConfigProto type_c =
+ SchemaTypeConfigBuilder()
+ .SetType("C")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("joinableProp")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeJoinableString(JOINABLE_VALUE_TYPE_QUALIFIED_ID))
+ .Build();
+ SchemaTypeConfigProto type_d =
+ SchemaTypeConfigBuilder()
+ .SetType("D")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("f")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("F", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_e =
+ SchemaTypeConfigBuilder()
+ .SetType("E")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("A", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_f =
+ SchemaTypeConfigBuilder()
+ .SetType("F")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("e")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("E", /*index_nested_properties=*/true))
+ .Build();
+
+ SchemaProto schema = SchemaBuilder()
+ .AddType(type_a)
+ .AddType(type_b)
+ .AddType(type_c)
+ .AddType(type_d)
+ .AddType(type_e)
+ .AddType(type_f)
+ .Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+ HasSubstr("Invalid cycle")));
+}
+
+TEST_P(SchemaUtilTest, InheritanceAndNestedTypeRelations_noCycle) {
+ if (GetParam() != true) {
+ GTEST_SKIP() << "This is an invalid cycle if circular schema definitions "
+ "are not allowed.";
+ }
+
+ // Create a schema with the following relations:
+ // index_nested_properties definition:
+ // 1. Nested-type relations:
+ // A -(true)-> B -(true)-> C
+ // (false)| (false)/ \(false)
+ // B B C
+ // The properties in the second row are required for B and C to be
+ // compatible with their parents. index_nested_properties must be false in
+ // these properties so that no invalid cycle can be formed because of these
+ // self reference.
+ //
+ // 2. Inheritance relations:
+ // C -> B -> A (A is a parent of B, which is a parent of C)
+ //
+ // These two relations are separate and do not affect each other. In this
+ // case there is no cycle.
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddParentType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("c")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("C", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_c =
+ SchemaTypeConfigBuilder()
+ .SetType("C")
+ .AddParentType("B")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("c")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("C", /*index_nested_properties=*/false))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("prop")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE))
+ .Build();
+
+ SchemaProto schema =
+ SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build();
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
+ SchemaUtil::Validate(schema, GetParam()));
+ EXPECT_THAT(d_map, SizeIs(3));
+ // Both A-B and A-C are inheritance relations.
+ EXPECT_THAT(d_map["A"],
+ UnorderedElementsAre(Pair("B", IsEmpty()), Pair("C", IsEmpty())));
+ // B-A and B-B are nested-type relations, B-C is both a nested-type and an
+ // inheritance relation.
+ EXPECT_THAT(d_map["B"],
+ UnorderedElementsAre(
+ Pair("A", UnorderedElementsAre(
+ Pointee(EqualsProto(type_a.properties(0))))),
+ Pair("B", UnorderedElementsAre(
+ Pointee(EqualsProto(type_b.properties(0))))),
+ Pair("C", UnorderedElementsAre(
+ Pointee(EqualsProto(type_c.properties(0)))))));
+ // C-C, C-B and C-A are all nested-type relations.
+ EXPECT_THAT(d_map["C"],
+ UnorderedElementsAre(
+ Pair("B", UnorderedElementsAre(
+ Pointee(EqualsProto(type_b.properties(1))))),
+ Pair("C", UnorderedElementsAre(
+ Pointee(EqualsProto(type_c.properties(1))))),
+ Pair("A", IsEmpty())));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ SchemaUtil::InheritanceMap i_map,
+ SchemaUtil::BuildTransitiveInheritanceGraph(schema));
+ EXPECT_THAT(i_map, SizeIs(2));
+ EXPECT_THAT(i_map["A"],
+ UnorderedElementsAre(Pair("B", IsTrue()), Pair("C", IsFalse())));
+ EXPECT_THAT(i_map["B"], UnorderedElementsAre(Pair("C", IsTrue())));
+}
+
+TEST_P(SchemaUtilTest, InheritanceAndNestedTypeRelations_nestedTypeCycle) {
+ // Create a schema with the following relations:
+ // index_nested_properties definition:
+ // 1. Nested-type relations:
+ // A -(true)-> B -(true)-> C
+ // (true)| (false)/ \(false)
+ // B B C
+ //
+ // 2. Inheritance relations:
+ // C -> B -> A (A is a parent of B, which is a parent of C)
+ //
+ // These two relations are separate and do not affect each other, but there is
+ // a cycle in nested-type relations: B - B
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddParentType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("c")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("C", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_c =
+ SchemaTypeConfigBuilder()
+ .SetType("C")
+ .AddParentType("B")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("c")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("C", /*index_nested_properties=*/false))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("prop")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE))
+ .Build();
+
+ SchemaProto schema =
+ SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+ HasSubstr("Invalid cycle")));
+}
+
+TEST_P(SchemaUtilTest, InheritanceAndNestedTypeRelations_inheritanceCycle) {
+ // Create a schema with the following relations:
+ // index_nested_properties definition:
+ // 1. Nested-type relations:
+ // A -(true)-> B -(true)-> C
+ // (false)| (false)/ \(false)
+ // B B C
+ //
+ // 2. Inheritance relations:
+ // C -> B -> A -> B (A is a parent of B, which is a parent of C and A)
+ //
+ // These two relations are separate and do not affect each other, but there is
+ // a cycle in inheritance relation: B - A - B
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ .AddParentType("B")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddParentType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("c")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("C", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_c =
+ SchemaTypeConfigBuilder()
+ .SetType("C")
+ .AddParentType("B")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/false))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("c")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("C", /*index_nested_properties=*/false))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("prop")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE))
+ .Build();
+
+ SchemaProto schema =
+ SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+ HasSubstr("inherits from itself")));
+}
+
+TEST_P(SchemaUtilTest, NonExistentType) {
// Create a schema with the following dependent relation:
// A - B - C - X (does not exist)
SchemaTypeConfigProto type_a =
@@ -464,25 +1719,121 @@ TEST(SchemaUtilTest, NonExistentType) {
SchemaProto schema =
SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build();
- EXPECT_THAT(SchemaUtil::Validate(schema),
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST(SchemaUtilTest, SimpleInheritance) {
+TEST_P(SchemaUtilTest, SingleTypeIsBothDirectAndIndirectDependent) {
+ // Create a schema with the following dependent relation, all of which are via
+ // nested document. In this case, C is both a direct dependent and an indirect
+ // dependent of A.
+ // A
+ // | \
+ // | B
+ // | /
+ // C
+ SchemaTypeConfigProto type_a = SchemaTypeConfigBuilder().SetType("A").Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("A", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_c =
+ SchemaTypeConfigBuilder()
+ .SetType("C")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("A", /*index_nested_properties=*/true))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+ .Build();
+
+ SchemaProto schema =
+ SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build();
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
+ SchemaUtil::Validate(schema, GetParam()));
+ EXPECT_THAT(d_map, SizeIs(2));
+ EXPECT_THAT(d_map["A"],
+ UnorderedElementsAre(
+ Pair("B", UnorderedElementsAre(
+ Pointee(EqualsProto(type_b.properties(0))))),
+ Pair("C", UnorderedElementsAre(
+ Pointee(EqualsProto(type_c.properties(0)))))));
+ EXPECT_THAT(d_map["B"], UnorderedElementsAre(Pair(
+ "C", UnorderedElementsAre(Pointee(
+ EqualsProto(type_c.properties(1)))))));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ SchemaUtil::InheritanceMap i_map,
+ SchemaUtil::BuildTransitiveInheritanceGraph(schema));
+ EXPECT_THAT(i_map, IsEmpty());
+}
+
+TEST_P(SchemaUtilTest, SimpleInheritance) {
// Create a schema with the following inheritance relation:
// A <- B
SchemaTypeConfigProto type_a = SchemaTypeConfigBuilder().SetType("A").Build();
SchemaTypeConfigProto type_b =
- SchemaTypeConfigBuilder().SetType("B").SetParentType("A").Build();
+ SchemaTypeConfigBuilder().SetType("B").AddParentType("A").Build();
SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build();
ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
- SchemaUtil::Validate(schema));
+ SchemaUtil::Validate(schema, GetParam()));
EXPECT_THAT(d_map, SizeIs(1));
EXPECT_THAT(d_map["A"], UnorderedElementsAre(Pair("B", IsEmpty())));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ SchemaUtil::InheritanceMap i_map,
+ SchemaUtil::BuildTransitiveInheritanceGraph(schema));
+ EXPECT_THAT(i_map, SizeIs(1));
+ EXPECT_THAT(i_map["A"], UnorderedElementsAre(Pair("B", IsTrue())));
+}
+
+TEST_P(SchemaUtilTest, SingleInheritanceTypeIsBothDirectAndIndirectChild) {
+ // Create a schema with the following inheritance relation. In this case, C is
+ // both a direct and an indirect child of A.
+ // A
+ // | \
+ // | B
+ // | /
+ // C
+ SchemaTypeConfigProto type_a = SchemaTypeConfigBuilder().SetType("A").Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder().SetType("B").AddParentType("A").Build();
+ SchemaTypeConfigProto type_c = SchemaTypeConfigBuilder()
+ .SetType("C")
+ .AddParentType("A")
+ .AddParentType("B")
+ .Build();
+
+ SchemaProto schema =
+ SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build();
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
+ SchemaUtil::Validate(schema, GetParam()));
+ EXPECT_THAT(d_map, SizeIs(2));
+ EXPECT_THAT(d_map["A"],
+ UnorderedElementsAre(Pair("B", IsEmpty()), Pair("C", IsEmpty())));
+ EXPECT_THAT(d_map["B"], UnorderedElementsAre(Pair("C", IsEmpty())));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ SchemaUtil::InheritanceMap i_map,
+ SchemaUtil::BuildTransitiveInheritanceGraph(schema));
+ EXPECT_THAT(i_map, SizeIs(2));
+ EXPECT_THAT(i_map["A"],
+ UnorderedElementsAre(Pair("B", IsTrue()), Pair("C", IsTrue())));
+ EXPECT_THAT(i_map["B"], UnorderedElementsAre(Pair("C", IsTrue())));
}
-TEST(SchemaUtilTest, ComplexInheritance) {
+TEST_P(SchemaUtilTest, ComplexInheritance) {
// Create a schema with the following inheritance relation:
// A
// / \
@@ -493,15 +1844,15 @@ TEST(SchemaUtilTest, ComplexInheritance) {
// F
SchemaTypeConfigProto type_a = SchemaTypeConfigBuilder().SetType("A").Build();
SchemaTypeConfigProto type_b =
- SchemaTypeConfigBuilder().SetType("B").SetParentType("A").Build();
+ SchemaTypeConfigBuilder().SetType("B").AddParentType("A").Build();
SchemaTypeConfigProto type_c =
- SchemaTypeConfigBuilder().SetType("C").SetParentType("B").Build();
+ SchemaTypeConfigBuilder().SetType("C").AddParentType("B").Build();
SchemaTypeConfigProto type_d =
- SchemaTypeConfigBuilder().SetType("D").SetParentType("B").Build();
+ SchemaTypeConfigBuilder().SetType("D").AddParentType("B").Build();
SchemaTypeConfigProto type_e =
- SchemaTypeConfigBuilder().SetType("E").SetParentType("A").Build();
+ SchemaTypeConfigBuilder().SetType("E").AddParentType("A").Build();
SchemaTypeConfigProto type_f =
- SchemaTypeConfigBuilder().SetType("F").SetParentType("D").Build();
+ SchemaTypeConfigBuilder().SetType("F").AddParentType("D").Build();
SchemaProto schema = SchemaBuilder()
.AddType(type_a)
@@ -512,7 +1863,7 @@ TEST(SchemaUtilTest, ComplexInheritance) {
.AddType(type_f)
.Build();
ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
- SchemaUtil::Validate(schema));
+ SchemaUtil::Validate(schema, GetParam()));
EXPECT_THAT(d_map, SizeIs(3));
EXPECT_THAT(d_map["A"],
UnorderedElementsAre(Pair("B", IsEmpty()), Pair("C", IsEmpty()),
@@ -522,56 +1873,69 @@ TEST(SchemaUtilTest, ComplexInheritance) {
UnorderedElementsAre(Pair("C", IsEmpty()), Pair("D", IsEmpty()),
Pair("F", IsEmpty())));
EXPECT_THAT(d_map["D"], UnorderedElementsAre(Pair("F", IsEmpty())));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ SchemaUtil::InheritanceMap i_map,
+ SchemaUtil::BuildTransitiveInheritanceGraph(schema));
+ EXPECT_THAT(i_map, SizeIs(3));
+ EXPECT_THAT(i_map["A"],
+ UnorderedElementsAre(Pair("B", IsTrue()), Pair("C", IsFalse()),
+ Pair("D", IsFalse()), Pair("E", IsTrue()),
+ Pair("F", IsFalse())));
+ EXPECT_THAT(i_map["B"],
+ UnorderedElementsAre(Pair("C", IsTrue()), Pair("D", IsTrue()),
+ Pair("F", IsFalse())));
+ EXPECT_THAT(i_map["D"], UnorderedElementsAre(Pair("F", IsTrue())));
}
-TEST(SchemaUtilTest, InheritanceCycle) {
+TEST_P(SchemaUtilTest, InheritanceCycle) {
// Create a schema with the following inheritance relation:
// C <- A <- B <- C
SchemaTypeConfigProto type_a =
- SchemaTypeConfigBuilder().SetType("A").SetParentType("C").Build();
+ SchemaTypeConfigBuilder().SetType("A").AddParentType("C").Build();
SchemaTypeConfigProto type_b =
- SchemaTypeConfigBuilder().SetType("B").SetParentType("A").Build();
+ SchemaTypeConfigBuilder().SetType("B").AddParentType("A").Build();
SchemaTypeConfigProto type_c =
- SchemaTypeConfigBuilder().SetType("C").SetParentType("B").Build();
+ SchemaTypeConfigBuilder().SetType("C").AddParentType("B").Build();
SchemaProto schema =
SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build();
- EXPECT_THAT(SchemaUtil::Validate(schema),
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST(SchemaUtilTest, SelfInheritance) {
+TEST_P(SchemaUtilTest, SelfInheritance) {
SchemaTypeConfigProto type_a =
- SchemaTypeConfigBuilder().SetType("A").SetParentType("A").Build();
+ SchemaTypeConfigBuilder().SetType("A").AddParentType("A").Build();
SchemaProto schema = SchemaBuilder().AddType(type_a).Build();
- EXPECT_THAT(SchemaUtil::Validate(schema),
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST(SchemaUtilTest, NonExistentParentType) {
+TEST_P(SchemaUtilTest, NonExistentParentType) {
// Create a schema with the following inheritance relation:
// (does not exist) X <- A <- B <- C
SchemaTypeConfigProto type_a =
- SchemaTypeConfigBuilder().SetType("A").SetParentType("X").Build();
+ SchemaTypeConfigBuilder().SetType("A").AddParentType("X").Build();
SchemaTypeConfigProto type_b =
- SchemaTypeConfigBuilder().SetType("B").SetParentType("A").Build();
+ SchemaTypeConfigBuilder().SetType("B").AddParentType("A").Build();
SchemaTypeConfigProto type_c =
- SchemaTypeConfigBuilder().SetType("C").SetParentType("B").Build();
+ SchemaTypeConfigBuilder().SetType("C").AddParentType("B").Build();
SchemaProto schema =
SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build();
- EXPECT_THAT(SchemaUtil::Validate(schema),
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST(SchemaUtilTest, SimpleInheritanceWithNestedType) {
+TEST_P(SchemaUtilTest, SimpleInheritanceWithNestedType) {
// Create a schema with the following dependent relation:
// A - B (via inheritance)
// B - C (via nested document)
SchemaTypeConfigProto type_a = SchemaTypeConfigBuilder().SetType("A").Build();
SchemaTypeConfigProto type_b =
- SchemaTypeConfigBuilder().SetType("B").SetParentType("A").Build();
+ SchemaTypeConfigBuilder().SetType("B").AddParentType("A").Build();
SchemaTypeConfigProto type_c =
SchemaTypeConfigBuilder()
.SetType("C")
@@ -585,16 +1949,22 @@ TEST(SchemaUtilTest, SimpleInheritanceWithNestedType) {
SchemaProto schema =
SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build();
ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
- SchemaUtil::Validate(schema));
+ SchemaUtil::Validate(schema, GetParam()));
EXPECT_THAT(d_map, SizeIs(2));
- EXPECT_THAT(d_map["A"],
- UnorderedElementsAre(Pair("B", IsEmpty()), Pair("C", IsEmpty())));
+ // Nested-type dependency and inheritance dependencies are not transitive.
+ EXPECT_THAT(d_map["A"], UnorderedElementsAre(Pair("B", IsEmpty())));
EXPECT_THAT(d_map["B"], UnorderedElementsAre(Pair(
"C", UnorderedElementsAre(Pointee(
EqualsProto(type_c.properties(0)))))));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ SchemaUtil::InheritanceMap i_map,
+ SchemaUtil::BuildTransitiveInheritanceGraph(schema));
+ EXPECT_THAT(i_map, SizeIs(1));
+ EXPECT_THAT(i_map["A"], UnorderedElementsAre(Pair("B", IsTrue())));
}
-TEST(SchemaUtilTest, ComplexInheritanceWithNestedType) {
+TEST_P(SchemaUtilTest, ComplexInheritanceWithNestedType) {
// Create a schema with the following dependent relation:
// A
// / \
@@ -611,9 +1981,9 @@ TEST(SchemaUtilTest, ComplexInheritanceWithNestedType) {
// F has a nested document of type D
SchemaTypeConfigProto type_a = SchemaTypeConfigBuilder().SetType("A").Build();
SchemaTypeConfigProto type_b =
- SchemaTypeConfigBuilder().SetType("B").SetParentType("A").Build();
+ SchemaTypeConfigBuilder().SetType("B").AddParentType("A").Build();
SchemaTypeConfigProto type_c =
- SchemaTypeConfigBuilder().SetType("C").SetParentType("B").Build();
+ SchemaTypeConfigBuilder().SetType("C").AddParentType("B").Build();
SchemaTypeConfigProto type_d =
SchemaTypeConfigBuilder()
.SetType("D")
@@ -651,15 +2021,13 @@ TEST(SchemaUtilTest, ComplexInheritanceWithNestedType) {
.AddType(type_f)
.Build();
ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
- SchemaUtil::Validate(schema));
+ SchemaUtil::Validate(schema, GetParam()));
EXPECT_THAT(d_map, SizeIs(3));
EXPECT_THAT(
d_map["A"],
- UnorderedElementsAre(
- Pair("B", IsEmpty()), Pair("C", IsEmpty()), Pair("D", IsEmpty()),
- Pair("E", UnorderedElementsAre(
- Pointee(EqualsProto(type_e.properties(0))))),
- Pair("F", IsEmpty())));
+ UnorderedElementsAre(Pair("B", IsEmpty()), Pair("C", IsEmpty()),
+ Pair("E", UnorderedElementsAre(Pointee(
+ EqualsProto(type_e.properties(0)))))));
EXPECT_THAT(
d_map["B"],
UnorderedElementsAre(Pair("C", IsEmpty()),
@@ -669,9 +2037,17 @@ TEST(SchemaUtilTest, ComplexInheritanceWithNestedType) {
EXPECT_THAT(d_map["D"], UnorderedElementsAre(Pair(
"F", UnorderedElementsAre(Pointee(
EqualsProto(type_f.properties(0)))))));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ SchemaUtil::InheritanceMap i_map,
+ SchemaUtil::BuildTransitiveInheritanceGraph(schema));
+ EXPECT_THAT(i_map, SizeIs(2));
+ EXPECT_THAT(i_map["A"],
+ UnorderedElementsAre(Pair("B", IsTrue()), Pair("C", IsFalse())));
+ EXPECT_THAT(i_map["B"], UnorderedElementsAre(Pair("C", IsTrue())));
}
-TEST(SchemaUtilTest, InheritanceWithNestedTypeCycle) {
+TEST_P(SchemaUtilTest, InheritanceWithNestedTypeCycle) {
// Create a schema that A and B depend on each other, in the sense that B
// extends A but A has a nested document of type B.
SchemaTypeConfigProto type_a =
@@ -684,19 +2060,19 @@ TEST(SchemaUtilTest, InheritanceWithNestedTypeCycle) {
.SetDataTypeDocument("B", /*index_nested_properties=*/true))
.Build();
SchemaTypeConfigProto type_b =
- SchemaTypeConfigBuilder().SetType("B").SetParentType("A").Build();
+ SchemaTypeConfigBuilder().SetType("B").AddParentType("A").Build();
SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build();
- EXPECT_THAT(SchemaUtil::Validate(schema),
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST(SchemaUtilTest, EmptySchemaProtoIsValid) {
+TEST_P(SchemaUtilTest, EmptySchemaProtoIsValid) {
SchemaProto schema;
- ICING_ASSERT_OK(SchemaUtil::Validate(schema));
+ ICING_ASSERT_OK(SchemaUtil::Validate(schema, GetParam()));
}
-TEST(SchemaUtilTest, Valid_Nested) {
+TEST_P(SchemaUtilTest, Valid_Nested) {
SchemaProto schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder()
@@ -719,43 +2095,43 @@ TEST(SchemaUtilTest, Valid_Nested) {
.SetCardinality(CARDINALITY_REQUIRED)))
.Build();
- ICING_ASSERT_OK(SchemaUtil::Validate(schema));
+ ICING_ASSERT_OK(SchemaUtil::Validate(schema, GetParam()));
}
-TEST(SchemaUtilTest, ClearedPropertyConfigsIsValid) {
+TEST_P(SchemaUtilTest, ClearedPropertyConfigsIsValid) {
// No property fields is technically ok, but probably not realistic.
SchemaProto schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType(kEmailType))
.Build();
- ICING_ASSERT_OK(SchemaUtil::Validate(schema));
+ ICING_ASSERT_OK(SchemaUtil::Validate(schema, GetParam()));
}
-TEST(SchemaUtilTest, ClearedSchemaTypeIsInvalid) {
+TEST_P(SchemaUtilTest, ClearedSchemaTypeIsInvalid) {
SchemaProto schema =
SchemaBuilder().AddType(SchemaTypeConfigBuilder()).Build();
- ASSERT_THAT(SchemaUtil::Validate(schema),
+ ASSERT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST(SchemaUtilTest, EmptySchemaTypeIsInvalid) {
+TEST_P(SchemaUtilTest, EmptySchemaTypeIsInvalid) {
SchemaProto schema =
SchemaBuilder().AddType(SchemaTypeConfigBuilder().SetType("")).Build();
- ASSERT_THAT(SchemaUtil::Validate(schema),
+ ASSERT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST(SchemaUtilTest, AnySchemaTypeOk) {
+TEST_P(SchemaUtilTest, AnySchemaTypeOk) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType(
"abc123!@#$%^&*()_-+=[{]}|\\;:'\",<.>?你好"))
.Build();
- ICING_ASSERT_OK(SchemaUtil::Validate(schema));
+ ICING_ASSERT_OK(SchemaUtil::Validate(schema, GetParam()));
}
-TEST(SchemaUtilTest, ClearedPropertyNameIsInvalid) {
+TEST_P(SchemaUtilTest, ClearedPropertyNameIsInvalid) {
SchemaProto schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder()
@@ -766,11 +2142,11 @@ TEST(SchemaUtilTest, ClearedPropertyNameIsInvalid) {
.SetCardinality(CARDINALITY_REQUIRED)))
.Build();
schema.mutable_types(0)->mutable_properties(0)->clear_property_name();
- ASSERT_THAT(SchemaUtil::Validate(schema),
+ ASSERT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST(SchemaUtilTest, EmptyPropertyNameIsInvalid) {
+TEST_P(SchemaUtilTest, EmptyPropertyNameIsInvalid) {
SchemaProto schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder()
@@ -781,11 +2157,11 @@ TEST(SchemaUtilTest, EmptyPropertyNameIsInvalid) {
.SetCardinality(CARDINALITY_REQUIRED)))
.Build();
- ASSERT_THAT(SchemaUtil::Validate(schema),
+ ASSERT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST(SchemaUtilTest, NonAlphanumericPropertyNameIsInvalid) {
+TEST_P(SchemaUtilTest, NonAlphanumericPropertyNameIsInvalid) {
SchemaProto schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder()
@@ -796,11 +2172,11 @@ TEST(SchemaUtilTest, NonAlphanumericPropertyNameIsInvalid) {
.SetCardinality(CARDINALITY_REQUIRED)))
.Build();
- ASSERT_THAT(SchemaUtil::Validate(schema),
+ ASSERT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST(SchemaUtilTest, AlphanumericPropertyNameOk) {
+TEST_P(SchemaUtilTest, AlphanumericPropertyNameOk) {
SchemaProto schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder()
@@ -811,10 +2187,10 @@ TEST(SchemaUtilTest, AlphanumericPropertyNameOk) {
.SetCardinality(CARDINALITY_REQUIRED)))
.Build();
- ICING_ASSERT_OK(SchemaUtil::Validate(schema));
+ ICING_ASSERT_OK(SchemaUtil::Validate(schema, GetParam()));
}
-TEST(SchemaUtilTest, DuplicatePropertyNameIsInvalid) {
+TEST_P(SchemaUtilTest, DuplicatePropertyNameIsInvalid) {
SchemaProto schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder()
@@ -828,11 +2204,11 @@ TEST(SchemaUtilTest, DuplicatePropertyNameIsInvalid) {
.SetDataType(TYPE_STRING)
.SetCardinality(CARDINALITY_REQUIRED)))
.Build();
- ASSERT_THAT(SchemaUtil::Validate(schema),
+ ASSERT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::ALREADY_EXISTS));
}
-TEST(SchemaUtilTest, ClearedDataTypeIsInvalid) {
+TEST_P(SchemaUtilTest, ClearedDataTypeIsInvalid) {
SchemaProto schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder()
@@ -843,11 +2219,11 @@ TEST(SchemaUtilTest, ClearedDataTypeIsInvalid) {
.SetCardinality(CARDINALITY_REQUIRED)))
.Build();
schema.mutable_types(0)->mutable_properties(0)->clear_data_type();
- ASSERT_THAT(SchemaUtil::Validate(schema),
+ ASSERT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST(SchemaUtilTest, UnknownDataTypeIsInvalid) {
+TEST_P(SchemaUtilTest, UnknownDataTypeIsInvalid) {
SchemaProto schema =
SchemaBuilder()
.AddType(
@@ -859,11 +2235,11 @@ TEST(SchemaUtilTest, UnknownDataTypeIsInvalid) {
.SetDataType(PropertyConfigProto::DataType::UNKNOWN)
.SetCardinality(CARDINALITY_REQUIRED)))
.Build();
- ASSERT_THAT(SchemaUtil::Validate(schema),
+ ASSERT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST(SchemaUtilTest, ClearedCardinalityIsInvalid) {
+TEST_P(SchemaUtilTest, ClearedCardinalityIsInvalid) {
SchemaProto schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder()
@@ -874,11 +2250,11 @@ TEST(SchemaUtilTest, ClearedCardinalityIsInvalid) {
.SetCardinality(CARDINALITY_REQUIRED)))
.Build();
schema.mutable_types(0)->mutable_properties(0)->clear_cardinality();
- ASSERT_THAT(SchemaUtil::Validate(schema),
+ ASSERT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST(SchemaUtilTest, UnknownCardinalityIsInvalid) {
+TEST_P(SchemaUtilTest, UnknownCardinalityIsInvalid) {
SchemaProto schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder()
@@ -888,11 +2264,11 @@ TEST(SchemaUtilTest, UnknownCardinalityIsInvalid) {
.SetDataType(TYPE_STRING)
.SetCardinality(CARDINALITY_UNKNOWN)))
.Build();
- ASSERT_THAT(SchemaUtil::Validate(schema),
+ ASSERT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST(SchemaUtilTest, ClearedPropertySchemaTypeIsInvalid) {
+TEST_P(SchemaUtilTest, ClearedPropertySchemaTypeIsInvalid) {
SchemaProto schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder()
@@ -902,11 +2278,11 @@ TEST(SchemaUtilTest, ClearedPropertySchemaTypeIsInvalid) {
.SetDataType(TYPE_DOCUMENT)
.SetCardinality(CARDINALITY_REPEATED)))
.Build();
- ASSERT_THAT(SchemaUtil::Validate(schema),
+ ASSERT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST(SchemaUtilTest, Invalid_EmptyPropertySchemaType) {
+TEST_P(SchemaUtilTest, Invalid_EmptyPropertySchemaType) {
SchemaProto schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder()
@@ -919,11 +2295,11 @@ TEST(SchemaUtilTest, Invalid_EmptyPropertySchemaType) {
.SetCardinality(CARDINALITY_REQUIRED)))
.Build();
- ASSERT_THAT(SchemaUtil::Validate(schema),
+ ASSERT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST(SchemaUtilTest, NoMatchingSchemaTypeIsInvalid) {
+TEST_P(SchemaUtilTest, NoMatchingSchemaTypeIsInvalid) {
SchemaProto schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder()
@@ -936,12 +2312,12 @@ TEST(SchemaUtilTest, NoMatchingSchemaTypeIsInvalid) {
.SetCardinality(CARDINALITY_REQUIRED)))
.Build();
- ASSERT_THAT(SchemaUtil::Validate(schema),
+ ASSERT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
HasSubstr("Undefined 'schema_type'")));
}
-TEST(SchemaUtilTest, NewOptionalPropertyIsCompatible) {
+TEST_P(SchemaUtilTest, NewOptionalPropertyIsCompatible) {
// Configure old schema
SchemaProto old_schema =
SchemaBuilder()
@@ -977,7 +2353,7 @@ TEST(SchemaUtilTest, NewOptionalPropertyIsCompatible) {
Eq(schema_delta));
}
-TEST(SchemaUtilTest, NewRequiredPropertyIsIncompatible) {
+TEST_P(SchemaUtilTest, NewRequiredPropertyIsIncompatible) {
// Configure old schema
SchemaProto old_schema =
SchemaBuilder()
@@ -1013,7 +2389,7 @@ TEST(SchemaUtilTest, NewRequiredPropertyIsIncompatible) {
Eq(schema_delta));
}
-TEST(SchemaUtilTest, NewSchemaMissingPropertyIsIncompatible) {
+TEST_P(SchemaUtilTest, NewSchemaMissingPropertyIsIncompatible) {
// Configure old schema
SchemaProto old_schema =
SchemaBuilder()
@@ -1049,7 +2425,7 @@ TEST(SchemaUtilTest, NewSchemaMissingPropertyIsIncompatible) {
Eq(schema_delta));
}
-TEST(SchemaUtilTest, CompatibilityOfDifferentCardinalityOk) {
+TEST_P(SchemaUtilTest, CompatibilityOfDifferentCardinalityOk) {
// Configure less restrictive schema based on cardinality
SchemaProto less_restrictive_schema =
SchemaBuilder()
@@ -1091,7 +2467,7 @@ TEST(SchemaUtilTest, CompatibilityOfDifferentCardinalityOk) {
Eq(compatible_schema_delta));
}
-TEST(SchemaUtilTest, DifferentDataTypeIsIncompatible) {
+TEST_P(SchemaUtilTest, DifferentDataTypeIsIncompatible) {
// Configure old schema, with an int64_t property
SchemaProto old_schema =
SchemaBuilder()
@@ -1122,7 +2498,7 @@ TEST(SchemaUtilTest, DifferentDataTypeIsIncompatible) {
Eq(schema_delta));
}
-TEST(SchemaUtilTest, DifferentSchemaTypeIsIncompatible) {
+TEST_P(SchemaUtilTest, DifferentSchemaTypeIsIncompatible) {
// Configure old schema, where Property is supposed to be a Person type
SchemaProto old_schema =
SchemaBuilder()
@@ -1186,7 +2562,7 @@ TEST(SchemaUtilTest, DifferentSchemaTypeIsIncompatible) {
EXPECT_THAT(actual.schema_types_deleted, testing::IsEmpty());
}
-TEST(SchemaUtilTest, ChangingIndexedStringPropertiesMakesIndexIncompatible) {
+TEST_P(SchemaUtilTest, ChangingIndexedStringPropertiesMakesIndexIncompatible) {
// Configure old schema
SchemaProto schema_with_indexed_property =
SchemaBuilder()
@@ -1228,7 +2604,7 @@ TEST(SchemaUtilTest, ChangingIndexedStringPropertiesMakesIndexIncompatible) {
Eq(schema_delta));
}
-TEST(SchemaUtilTest, AddingNewIndexedStringPropertyMakesIndexIncompatible) {
+TEST_P(SchemaUtilTest, AddingNewIndexedStringPropertyMakesIndexIncompatible) {
// Configure old schema
SchemaProto old_schema =
SchemaBuilder()
@@ -1266,8 +2642,8 @@ TEST(SchemaUtilTest, AddingNewIndexedStringPropertyMakesIndexIncompatible) {
Eq(schema_delta));
}
-TEST(SchemaUtilTest,
- AddingNewNonIndexedStringPropertyShouldRemainIndexCompatible) {
+TEST_P(SchemaUtilTest,
+ AddingNewNonIndexedStringPropertyShouldRemainIndexCompatible) {
// Configure old schema
SchemaProto old_schema =
SchemaBuilder()
@@ -1304,7 +2680,7 @@ TEST(SchemaUtilTest,
IsEmpty());
}
-TEST(SchemaUtilTest, ChangingIndexedIntegerPropertiesMakesIndexIncompatible) {
+TEST_P(SchemaUtilTest, ChangingIndexedIntegerPropertiesMakesIndexIncompatible) {
// Configure old schema
SchemaProto schema_with_indexed_property =
SchemaBuilder()
@@ -1344,7 +2720,7 @@ TEST(SchemaUtilTest, ChangingIndexedIntegerPropertiesMakesIndexIncompatible) {
Eq(schema_delta));
}
-TEST(SchemaUtilTest, AddingNewIndexedIntegerPropertyMakesIndexIncompatible) {
+TEST_P(SchemaUtilTest, AddingNewIndexedIntegerPropertyMakesIndexIncompatible) {
// Configure old schema
SchemaProto old_schema =
SchemaBuilder()
@@ -1379,8 +2755,8 @@ TEST(SchemaUtilTest, AddingNewIndexedIntegerPropertyMakesIndexIncompatible) {
Eq(schema_delta));
}
-TEST(SchemaUtilTest,
- AddingNewNonIndexedIntegerPropertyShouldRemainIndexCompatible) {
+TEST_P(SchemaUtilTest,
+ AddingNewNonIndexedIntegerPropertyShouldRemainIndexCompatible) {
// Configure old schema
SchemaProto old_schema =
SchemaBuilder()
@@ -1414,7 +2790,7 @@ TEST(SchemaUtilTest,
IsEmpty());
}
-TEST(SchemaUtilTest, ChangingJoinablePropertiesMakesJoinIncompatible) {
+TEST_P(SchemaUtilTest, ChangingJoinablePropertiesMakesJoinIncompatible) {
// Configure old schema
SchemaProto schema_with_joinable_property =
SchemaBuilder()
@@ -1456,7 +2832,7 @@ TEST(SchemaUtilTest, ChangingJoinablePropertiesMakesJoinIncompatible) {
Eq(expected_schema_delta));
}
-TEST(SchemaUtilTest, AddingNewJoinablePropertyMakesJoinIncompatible) {
+TEST_P(SchemaUtilTest, AddingNewJoinablePropertyMakesJoinIncompatible) {
// Configure old schema
SchemaProto old_schema =
SchemaBuilder()
@@ -1494,7 +2870,7 @@ TEST(SchemaUtilTest, AddingNewJoinablePropertyMakesJoinIncompatible) {
Eq(expected_schema_delta));
}
-TEST(SchemaUtilTest, AddingNewNonJoinablePropertyShouldRemainJoinCompatible) {
+TEST_P(SchemaUtilTest, AddingNewNonJoinablePropertyShouldRemainJoinCompatible) {
// Configure old schema
SchemaProto old_schema =
SchemaBuilder()
@@ -1531,7 +2907,7 @@ TEST(SchemaUtilTest, AddingNewNonJoinablePropertyShouldRemainJoinCompatible) {
IsEmpty());
}
-TEST(SchemaUtilTest, AddingTypeIsCompatible) {
+TEST_P(SchemaUtilTest, AddingTypeIsCompatible) {
// Can add a new type, existing data isn't incompatible, since none of them
// are of this new schema type
SchemaProto old_schema =
@@ -1571,7 +2947,7 @@ TEST(SchemaUtilTest, AddingTypeIsCompatible) {
Eq(schema_delta));
}
-TEST(SchemaUtilTest, DeletingTypeIsNoted) {
+TEST_P(SchemaUtilTest, DeletingTypeIsNoted) {
// Can't remove an old type, new schema needs to at least have all the
// previously defined schema otherwise the Documents of the missing schema
// are invalid
@@ -1612,7 +2988,7 @@ TEST(SchemaUtilTest, DeletingTypeIsNoted) {
Eq(schema_delta));
}
-TEST(SchemaUtilTest, DeletingPropertyAndChangingProperty) {
+TEST_P(SchemaUtilTest, DeletingPropertyAndChangingProperty) {
SchemaProto old_schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder()
@@ -1650,7 +3026,7 @@ TEST(SchemaUtilTest, DeletingPropertyAndChangingProperty) {
EXPECT_THAT(actual, Eq(schema_delta));
}
-TEST(SchemaUtilTest, IndexNestedDocumentsIndexIncompatible) {
+TEST_P(SchemaUtilTest, IndexNestedDocumentsIndexIncompatible) {
// Make two schemas. One that sets index_nested_properties to false and one
// that sets it to true.
SchemaTypeConfigProto email_type_config =
@@ -1705,7 +3081,7 @@ TEST(SchemaUtilTest, IndexNestedDocumentsIndexIncompatible) {
EXPECT_THAT(actual, Eq(schema_delta));
}
-TEST(SchemaUtilTest, ValidateStringIndexingConfigShouldHaveTermMatchType) {
+TEST_P(SchemaUtilTest, ValidateStringIndexingConfigShouldHaveTermMatchType) {
SchemaProto schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("MyType").AddProperty(
@@ -1716,7 +3092,7 @@ TEST(SchemaUtilTest, ValidateStringIndexingConfigShouldHaveTermMatchType) {
.Build();
// Error if we don't set a term match type
- EXPECT_THAT(SchemaUtil::Validate(schema),
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
// Passes once we set a term match type
@@ -1727,10 +3103,10 @@ TEST(SchemaUtilTest, ValidateStringIndexingConfigShouldHaveTermMatchType) {
.SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_REQUIRED)))
.Build();
- EXPECT_THAT(SchemaUtil::Validate(schema), IsOk());
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk());
}
-TEST(SchemaUtilTest, ValidateStringIndexingConfigShouldHaveTokenizer) {
+TEST_P(SchemaUtilTest, ValidateStringIndexingConfigShouldHaveTokenizer) {
SchemaProto schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("MyType").AddProperty(
@@ -1741,7 +3117,7 @@ TEST(SchemaUtilTest, ValidateStringIndexingConfigShouldHaveTokenizer) {
.Build();
// Error if we don't set a tokenizer type
- EXPECT_THAT(SchemaUtil::Validate(schema),
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
// Passes once we set a tokenizer type
@@ -1752,11 +3128,11 @@ TEST(SchemaUtilTest, ValidateStringIndexingConfigShouldHaveTokenizer) {
.SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_REQUIRED)))
.Build();
- EXPECT_THAT(SchemaUtil::Validate(schema), IsOk());
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk());
}
-TEST(SchemaUtilTest,
- ValidateJoinablePropertyTypeQualifiedIdShouldHaveStringDataType) {
+TEST_P(SchemaUtilTest,
+ ValidateJoinablePropertyTypeQualifiedIdShouldHaveStringDataType) {
SchemaProto schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("MyType").AddProperty(
@@ -1769,7 +3145,7 @@ TEST(SchemaUtilTest,
.Build();
// Error if data type is not STRING for qualified id joinable value type.
- EXPECT_THAT(SchemaUtil::Validate(schema),
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
// Passes once we set STRING as the data type.
@@ -1782,10 +3158,11 @@ TEST(SchemaUtilTest,
/*propagate_delete=*/false)
.SetCardinality(CARDINALITY_REQUIRED)))
.Build();
- EXPECT_THAT(SchemaUtil::Validate(schema), IsOk());
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk());
}
-TEST(SchemaUtilTest, ValidateJoinablePropertyShouldNotHaveRepeatedCardinality) {
+TEST_P(SchemaUtilTest,
+ ValidateJoinablePropertyShouldNotHaveRepeatedCardinality) {
SchemaProto schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("MyType").AddProperty(
@@ -1798,7 +3175,7 @@ TEST(SchemaUtilTest, ValidateJoinablePropertyShouldNotHaveRepeatedCardinality) {
.Build();
// Error if using REPEATED cardinality for joinable property.
- EXPECT_THAT(SchemaUtil::Validate(schema),
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
// Passes once we use OPTIONAL cardinality with joinable property.
@@ -1811,7 +3188,7 @@ TEST(SchemaUtilTest, ValidateJoinablePropertyShouldNotHaveRepeatedCardinality) {
/*propagate_delete=*/false)
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
- EXPECT_THAT(SchemaUtil::Validate(schema), IsOk());
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk());
// Passes once we use REQUIRED cardinality with joinable property.
schema = SchemaBuilder()
@@ -1823,7 +3200,7 @@ TEST(SchemaUtilTest, ValidateJoinablePropertyShouldNotHaveRepeatedCardinality) {
/*propagate_delete=*/false)
.SetCardinality(CARDINALITY_REQUIRED)))
.Build();
- EXPECT_THAT(SchemaUtil::Validate(schema), IsOk());
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk());
// Passes once we use REPEATED cardinality with non-joinable property.
schema = SchemaBuilder()
@@ -1835,11 +3212,11 @@ TEST(SchemaUtilTest, ValidateJoinablePropertyShouldNotHaveRepeatedCardinality) {
/*propagate_delete=*/false)
.SetCardinality(CARDINALITY_REPEATED)))
.Build();
- EXPECT_THAT(SchemaUtil::Validate(schema), IsOk());
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk());
}
-TEST(SchemaUtilTest,
- ValidateJoinablePropertyWithDeletePropagationShouldHaveTypeQualifiedId) {
+TEST_P(SchemaUtilTest,
+ ValidateJoinablePropertyWithDeletePropagationShouldHaveTypeQualifiedId) {
SchemaProto schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("MyType").AddProperty(
@@ -1853,7 +3230,7 @@ TEST(SchemaUtilTest,
// Error if enabling delete propagation with non qualified id joinable value
// type.
- EXPECT_THAT(SchemaUtil::Validate(schema),
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
// Passes once we set qualified id joinable value type with delete propagation
@@ -1867,7 +3244,7 @@ TEST(SchemaUtilTest,
/*propagate_delete=*/true)
.SetCardinality(CARDINALITY_REQUIRED)))
.Build();
- EXPECT_THAT(SchemaUtil::Validate(schema), IsOk());
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk());
// Passes once we disable delete propagation.
schema = SchemaBuilder()
@@ -1879,11 +3256,11 @@ TEST(SchemaUtilTest,
/*propagate_delete=*/false)
.SetCardinality(CARDINALITY_REQUIRED)))
.Build();
- EXPECT_THAT(SchemaUtil::Validate(schema), IsOk());
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk());
}
-TEST(SchemaUtilTest,
- ValidateNestedJoinablePropertyShouldNotHaveNestedRepeatedCardinality) {
+TEST_P(SchemaUtilTest,
+ ValidateNestedJoinablePropertyShouldNotHaveNestedRepeatedCardinality) {
// Dependency and nested document property cardinality:
// "C" --(REPEATED)--> "B" --(OPTIONAL)--> "A"
// where "A" contains joinable property. This should not be allowed.
@@ -1909,7 +3286,7 @@ TEST(SchemaUtilTest,
/*index_nested_properties=*/false)
.SetCardinality(CARDINALITY_REPEATED)))
.Build();
- EXPECT_THAT(SchemaUtil::Validate(schema),
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
// Passes once we use non-REPEATED cardinality for "C.b", i.e. the dependency
@@ -1936,10 +3313,10 @@ TEST(SchemaUtilTest,
/*index_nested_properties=*/false)
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
- EXPECT_THAT(SchemaUtil::Validate(schema), IsOk());
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk());
}
-TEST(
+TEST_P(
SchemaUtilTest,
ValidateNestedJoinablePropertyShouldAllowRepeatedCardinalityIfNoJoinableProperty) {
// Dependency and nested document property cardinality:
@@ -1979,11 +3356,11 @@ TEST(
// Passes since nested schema type with REPEATED cardinality doesn't have
// joinable property.
- EXPECT_THAT(SchemaUtil::Validate(schema), IsOk());
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk());
}
-TEST(SchemaUtilTest,
- ValidateNestedJoinablePropertyMultiplePropertiesWithSameSchema) {
+TEST_P(SchemaUtilTest,
+ ValidateNestedJoinablePropertyMultiplePropertiesWithSameSchema) {
// Dependency and nested document property cardinality:
// --(a1: OPTIONAL)--
// / \
@@ -2015,7 +3392,7 @@ TEST(SchemaUtilTest,
/*index_nested_properties=*/false)
.SetCardinality(CARDINALITY_REPEATED)))
.Build();
- EXPECT_THAT(SchemaUtil::Validate(schema),
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
// Passes once we use non-REPEATED cardinality for "B.a2", i.e. the dependency
@@ -2049,10 +3426,10 @@ TEST(SchemaUtilTest,
/*index_nested_properties=*/false)
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
- EXPECT_THAT(SchemaUtil::Validate(schema), IsOk());
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk());
}
-TEST(SchemaUtilTest, ValidateNestedJoinablePropertyDiamondRelationship) {
+TEST_P(SchemaUtilTest, ValidateNestedJoinablePropertyDiamondRelationship) {
// Dependency and nested document property cardinality:
// B
// / \
@@ -2100,7 +3477,7 @@ TEST(SchemaUtilTest, ValidateNestedJoinablePropertyDiamondRelationship) {
/*index_nested_properties=*/false)
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
- EXPECT_THAT(SchemaUtil::Validate(schema), IsOk());
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk());
// Fails once we change any of edge to REPEATED cardinality.
// B
@@ -2148,7 +3525,7 @@ TEST(SchemaUtilTest, ValidateNestedJoinablePropertyDiamondRelationship) {
/*index_nested_properties=*/false)
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
- EXPECT_THAT(SchemaUtil::Validate(schema),
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
// B
@@ -2196,7 +3573,7 @@ TEST(SchemaUtilTest, ValidateNestedJoinablePropertyDiamondRelationship) {
/*index_nested_properties=*/false)
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
- EXPECT_THAT(SchemaUtil::Validate(schema),
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
// B
@@ -2244,7 +3621,7 @@ TEST(SchemaUtilTest, ValidateNestedJoinablePropertyDiamondRelationship) {
/*index_nested_properties=*/false)
.SetCardinality(CARDINALITY_REPEATED)))
.Build();
- EXPECT_THAT(SchemaUtil::Validate(schema),
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
// B
@@ -2292,11 +3669,11 @@ TEST(SchemaUtilTest, ValidateNestedJoinablePropertyDiamondRelationship) {
/*index_nested_properties=*/false)
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
- EXPECT_THAT(SchemaUtil::Validate(schema),
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST(SchemaUtilTest, MultipleReferencesToSameNestedSchemaOk) {
+TEST_P(SchemaUtilTest, MultipleReferencesToSameNestedSchemaOk) {
SchemaProto schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("InnerSchema"))
@@ -2316,10 +3693,10 @@ TEST(SchemaUtilTest, MultipleReferencesToSameNestedSchemaOk) {
.SetCardinality(CARDINALITY_REPEATED)))
.Build();
- EXPECT_THAT(SchemaUtil::Validate(schema), IsOk());
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk());
}
-TEST(SchemaUtilTest, InvalidSelfReference) {
+TEST_P(SchemaUtilTest, InvalidSelfReference) {
// Create a schema with a self-reference cycle in it: OwnSchema -> OwnSchema
SchemaProto schema =
SchemaBuilder()
@@ -2333,12 +3710,12 @@ TEST(SchemaUtilTest, InvalidSelfReference) {
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
- EXPECT_THAT(SchemaUtil::Validate(schema),
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
- HasSubstr("Infinite loop")));
+ HasSubstr("Invalid cycle")));
}
-TEST(SchemaUtilTest, InvalidSelfReferenceEvenWithOtherProperties) {
+TEST_P(SchemaUtilTest, InvalidSelfReferenceEvenWithOtherProperties) {
// Create a schema with a self-reference cycle in it: OwnSchema -> OwnSchema
SchemaProto schema =
SchemaBuilder()
@@ -2357,12 +3734,12 @@ TEST(SchemaUtilTest, InvalidSelfReferenceEvenWithOtherProperties) {
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
- EXPECT_THAT(SchemaUtil::Validate(schema),
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
- HasSubstr("Infinite loop")));
+ HasSubstr("Invalid cycle")));
}
-TEST(SchemaUtilTest, InvalidInfiniteLoopTwoDegrees) {
+TEST_P(SchemaUtilTest, InvalidInfiniteLoopTwoDegrees) {
// Create a schema for the outer schema
SchemaProto schema =
SchemaBuilder()
@@ -2379,7 +3756,7 @@ TEST(SchemaUtilTest, InvalidInfiniteLoopTwoDegrees) {
.AddType(
SchemaTypeConfigBuilder()
.SetType("B")
- // Reference the schema A, causing an infinite loop of
+ // Reference the schema A, causing an invalid cycle of
// references.
.AddProperty(PropertyConfigBuilder()
.SetName("NestedDocument")
@@ -2389,12 +3766,12 @@ TEST(SchemaUtilTest, InvalidInfiniteLoopTwoDegrees) {
.Build();
// Two degrees of referencing: A -> B -> A
- EXPECT_THAT(SchemaUtil::Validate(schema),
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
- HasSubstr("Infinite loop")));
+ HasSubstr("Invalid cycle")));
}
-TEST(SchemaUtilTest, InvalidInfiniteLoopThreeDegrees) {
+TEST_P(SchemaUtilTest, InvalidInfiniteLoopThreeDegrees) {
SchemaProto schema =
SchemaBuilder()
// Create a schema for the outer schema
@@ -2430,11 +3807,445 @@ TEST(SchemaUtilTest, InvalidInfiniteLoopThreeDegrees) {
.Build();
// Three degrees of referencing: A -> B -> C -> A
- EXPECT_THAT(SchemaUtil::Validate(schema),
+ EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
- HasSubstr("Infinite loop")));
+ HasSubstr("Invalid cycle")));
+}
+
+TEST_P(SchemaUtilTest, ChildMissingOptionalAndRepeatedPropertiesNotOk) {
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("text")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder().SetType("B").AddParentType("A").Build();
+
+ SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build();
+ EXPECT_THAT(
+ SchemaUtil::Validate(schema, GetParam()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+ HasSubstr("Property text is not present in child type")));
}
+TEST_P(SchemaUtilTest, ChildMissingRequiredPropertyNotOk) {
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("text")
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder().SetType("B").AddParentType("A").Build();
+
+ SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build();
+ EXPECT_THAT(
+ SchemaUtil::Validate(schema, GetParam()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+ HasSubstr("Property text is not present in child type")));
+}
+
+TEST_P(SchemaUtilTest, ChildCompatiblePropertyOk) {
+ SchemaTypeConfigProto message_type =
+ SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("text")
+ .SetCardinality(CARDINALITY_REPEATED)
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("person")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument(
+ "Person", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto artist_message_type =
+ SchemaTypeConfigBuilder()
+ .SetType("ArtistMessage")
+ .AddParentType("Message")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("text")
+ // OPTIONAL is compatible with REPEATED.
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .AddProperty(
+ // An extra text is compatible.
+ PropertyConfigBuilder()
+ .SetName("extraText")
+ .SetCardinality(CARDINALITY_REPEATED)
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .AddProperty(
+ // An extra double is compatible
+ PropertyConfigBuilder()
+ .SetName("extraDouble")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataType(TYPE_DOUBLE))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("person")
+ // REQUIRED is compatible with OPTIONAL.
+ .SetCardinality(CARDINALITY_REQUIRED)
+ // Artist is compatible with Person.
+ .SetDataTypeDocument(
+ "Artist", /*index_nested_properties=*/true))
+ .Build();
+
+ SchemaTypeConfigProto person_type =
+ SchemaTypeConfigBuilder().SetType("Person").Build();
+ SchemaTypeConfigProto artist_type = SchemaTypeConfigBuilder()
+ .SetType("Artist")
+ .AddParentType("Person")
+ .Build();
+
+ SchemaProto schema = SchemaBuilder()
+ .AddType(message_type)
+ .AddType(artist_message_type)
+ .AddType(person_type)
+ .AddType(artist_type)
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
+ SchemaUtil::Validate(schema, GetParam()));
+ EXPECT_THAT(d_map, SizeIs(3));
+ EXPECT_THAT(d_map["Message"],
+ UnorderedElementsAre(Pair("ArtistMessage", IsEmpty())));
+ EXPECT_THAT(d_map["Person"],
+ UnorderedElementsAre(
+ Pair("Message", UnorderedElementsAre(Pointee(EqualsProto(
+ message_type.properties(1))))),
+ Pair("Artist", IsEmpty())));
+ EXPECT_THAT(d_map["Artist"],
+ UnorderedElementsAre(Pair(
+ "ArtistMessage", UnorderedElementsAre(Pointee(EqualsProto(
+ artist_message_type.properties(3)))))));
+}
+
+TEST_P(SchemaUtilTest, ChildIncompatibleCardinalityPropertyNotOk) {
+ SchemaTypeConfigProto message_type =
+ SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("text")
+ .SetCardinality(CARDINALITY_REPEATED)
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("person")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument(
+ "Person", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto artist_message_type =
+ SchemaTypeConfigBuilder()
+ .SetType("ArtistMessage")
+ .AddParentType("Message")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("text")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("extraText")
+ .SetCardinality(CARDINALITY_REPEATED)
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("person")
+ // Overwrite OPTIONAL to REPEATED is not ok.
+ .SetCardinality(CARDINALITY_REPEATED)
+ .SetDataTypeDocument(
+ "Artist", /*index_nested_properties=*/true))
+ .Build();
+
+ SchemaTypeConfigProto person_type =
+ SchemaTypeConfigBuilder().SetType("Person").Build();
+ SchemaTypeConfigProto artist_type = SchemaTypeConfigBuilder()
+ .SetType("Artist")
+ .AddParentType("Person")
+ .Build();
+
+ SchemaProto schema = SchemaBuilder()
+ .AddType(message_type)
+ .AddType(artist_message_type)
+ .AddType(person_type)
+ .AddType(artist_type)
+ .Build();
+ EXPECT_THAT(
+ SchemaUtil::Validate(schema, GetParam()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+ HasSubstr("Property person from child type ArtistMessage is not "
+ "compatible to the parent type Message.")));
+}
+
+TEST_P(SchemaUtilTest, ChildIncompatibleDataTypePropertyNotOk) {
+ SchemaTypeConfigProto message_type =
+ SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("text")
+ .SetCardinality(CARDINALITY_REPEATED)
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("person")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument(
+ "Person", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto artist_message_type =
+ SchemaTypeConfigBuilder()
+ .SetType("ArtistMessage")
+ .AddParentType("Message")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("text")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ // Double is not compatible to string.
+ .SetDataType(TYPE_DOUBLE))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("extraText")
+ .SetCardinality(CARDINALITY_REPEATED)
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("person")
+ .SetCardinality(CARDINALITY_REQUIRED)
+ .SetDataTypeDocument(
+ "Artist", /*index_nested_properties=*/true))
+ .Build();
+
+ SchemaTypeConfigProto person_type =
+ SchemaTypeConfigBuilder().SetType("Person").Build();
+ SchemaTypeConfigProto artist_type = SchemaTypeConfigBuilder()
+ .SetType("Artist")
+ .AddParentType("Person")
+ .Build();
+
+ SchemaProto schema = SchemaBuilder()
+ .AddType(message_type)
+ .AddType(artist_message_type)
+ .AddType(person_type)
+ .AddType(artist_type)
+ .Build();
+ EXPECT_THAT(
+ SchemaUtil::Validate(schema, GetParam()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+ HasSubstr("Property text from child type ArtistMessage is not "
+ "compatible to the parent type Message.")));
+}
+
+TEST_P(SchemaUtilTest, ChildIncompatibleDocumentTypePropertyNotOk) {
+ SchemaTypeConfigProto message_type =
+ SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("text")
+ .SetCardinality(CARDINALITY_REPEATED)
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("person")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument(
+ "Person", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto artist_message_type =
+ SchemaTypeConfigBuilder()
+ .SetType("ArtistMessage")
+ .AddParentType("Message")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("text")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("extraText")
+ .SetCardinality(CARDINALITY_REPEATED)
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("person")
+ .SetCardinality(CARDINALITY_REQUIRED)
+ // Artist is not a subtype of Person, thus incompatible
+ .SetDataTypeDocument("Artist",
+ /*index_nested_properties=*/true))
+ .Build();
+
+ SchemaTypeConfigProto person_type =
+ SchemaTypeConfigBuilder().SetType("Person").Build();
+ // In this test, Artist is not a subtype of Person.
+ SchemaTypeConfigProto artist_type =
+ SchemaTypeConfigBuilder().SetType("Artist").Build();
+
+ SchemaProto schema = SchemaBuilder()
+ .AddType(message_type)
+ .AddType(artist_message_type)
+ .AddType(person_type)
+ .AddType(artist_type)
+ .Build();
+ EXPECT_THAT(
+ SchemaUtil::Validate(schema, GetParam()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+ HasSubstr("Property person from child type ArtistMessage is not "
+ "compatible to the parent type Message.")));
+}
+
+TEST_P(SchemaUtilTest, ChildCompatibleMultipleParentPropertyOk) {
+ SchemaTypeConfigProto email_type =
+ SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("sender")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("recipient")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ SchemaTypeConfigProto message_type =
+ SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("content")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ SchemaTypeConfigProto email_message_type =
+ SchemaTypeConfigBuilder()
+ .SetType("EmailMessage")
+ .AddParentType("Email")
+ .AddParentType("Message")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("sender")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("recipient")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("content")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+
+ SchemaProto schema = SchemaBuilder()
+ .AddType(email_type)
+ .AddType(message_type)
+ .AddType(email_message_type)
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map,
+ SchemaUtil::Validate(schema, GetParam()));
+ EXPECT_THAT(d_map, SizeIs(2));
+ EXPECT_THAT(d_map["Email"],
+ UnorderedElementsAre(Pair("EmailMessage", IsEmpty())));
+ EXPECT_THAT(d_map["Message"],
+ UnorderedElementsAre(Pair("EmailMessage", IsEmpty())));
+}
+
+TEST_P(SchemaUtilTest, ChildIncompatibleMultipleParentPropertyNotOk) {
+ SchemaTypeConfigProto email_type =
+ SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("sender")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("recipient")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ SchemaTypeConfigProto message_type =
+ SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("content")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+
+ // Missing the "sender" field from parent "Email", thus incompatible.
+ SchemaTypeConfigProto email_message_type1 =
+ SchemaTypeConfigBuilder()
+ .SetType("EmailMessage")
+ .AddParentType("Email")
+ .AddParentType("Message")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("recipient")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("content")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ SchemaProto schema1 = SchemaBuilder()
+ .AddType(email_type)
+ .AddType(message_type)
+ .AddType(email_message_type1)
+ .Build();
+ EXPECT_THAT(
+ SchemaUtil::Validate(schema1, GetParam()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+ HasSubstr(
+ "Property sender is not present in child type EmailMessage, "
+ "but it is defined in the parent type Email.")));
+
+ // Missing the "content" field from parent "Message", thus incompatible.
+ SchemaTypeConfigProto email_message_type2 =
+ SchemaTypeConfigBuilder()
+ .SetType("EmailMessage")
+ .AddParentType("Email")
+ .AddParentType("Message")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("sender")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("recipient")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ SchemaProto schema2 = SchemaBuilder()
+ .AddType(email_type)
+ .AddType(message_type)
+ .AddType(email_message_type2)
+ .Build();
+ EXPECT_THAT(
+ SchemaUtil::Validate(schema2, GetParam()),
+ StatusIs(
+ libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+ HasSubstr(
+ "Property content is not present in child type EmailMessage, "
+ "but it is defined in the parent type Message.")));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+ SchemaUtilTest, SchemaUtilTest,
+ testing::Values(/*allow_circular_schema_definitions=*/true, false));
+
} // namespace
} // namespace lib
diff --git a/icing/schema/section.h b/icing/schema/section.h
index 65149b9..3685a29 100644
--- a/icing/schema/section.h
+++ b/icing/schema/section.h
@@ -33,6 +33,8 @@ inline constexpr int kSectionIdBits = 6;
inline constexpr SectionId kTotalNumSections = (1 << kSectionIdBits);
inline constexpr SectionId kInvalidSectionId = kTotalNumSections;
inline constexpr SectionId kMaxSectionId = kTotalNumSections - 1;
+// Prior versions of Icing only supported 16 indexed properties.
+inline constexpr SectionId kOldTotalNumSections = 16;
inline constexpr SectionId kMinSectionId = 0;
constexpr bool IsSectionIdValid(SectionId section_id) {
return section_id >= kMinSectionId && section_id <= kMaxSectionId;
diff --git a/icing/scoring/advanced_scoring/advanced-scorer_test.cc b/icing/scoring/advanced_scoring/advanced-scorer_test.cc
index c962bc5..65d4cff 100644
--- a/icing/scoring/advanced_scoring/advanced-scorer_test.cc
+++ b/icing/scoring/advanced_scoring/advanced-scorer_test.cc
@@ -109,7 +109,9 @@ class AdvancedScorerTest : public testing::Test {
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
- ICING_ASSERT_OK(schema_store_->SetSchema(test_email_schema));
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ test_email_schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
}
void TearDown() override {
diff --git a/icing/scoring/score-and-rank_benchmark.cc b/icing/scoring/score-and-rank_benchmark.cc
index 076e36a..ddc21a2 100644
--- a/icing/scoring/score-and-rank_benchmark.cc
+++ b/icing/scoring/score-and-rank_benchmark.cc
@@ -108,7 +108,8 @@ void BM_ScoreAndRankDocumentHitsByDocumentScore(benchmark::State& state) {
// Creates file directories
Filesystem filesystem;
filesystem.DeleteDirectoryRecursively(base_dir.c_str());
- ASSERT_TRUE(filesystem.CreateDirectoryRecursively(document_store_dir.c_str()));
+ ASSERT_TRUE(
+ filesystem.CreateDirectoryRecursively(document_store_dir.c_str()));
ASSERT_TRUE(filesystem.CreateDirectoryRecursively(schema_store_dir.c_str()));
Clock clock;
@@ -123,7 +124,9 @@ void BM_ScoreAndRankDocumentHitsByDocumentScore(benchmark::State& state) {
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
- ICING_ASSERT_OK(schema_store->SetSchema(CreateSchemaWithEmailType()));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ CreateSchemaWithEmailType(), /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ScoringSpecProto scoring_spec;
scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
@@ -209,7 +212,8 @@ void BM_ScoreAndRankDocumentHitsByCreationTime(benchmark::State& state) {
// Creates file directories
Filesystem filesystem;
filesystem.DeleteDirectoryRecursively(base_dir.c_str());
- ASSERT_TRUE(filesystem.CreateDirectoryRecursively(document_store_dir.c_str()));
+ ASSERT_TRUE(
+ filesystem.CreateDirectoryRecursively(document_store_dir.c_str()));
ASSERT_TRUE(filesystem.CreateDirectoryRecursively(schema_store_dir.c_str()));
Clock clock;
@@ -224,7 +228,9 @@ void BM_ScoreAndRankDocumentHitsByCreationTime(benchmark::State& state) {
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
- ICING_ASSERT_OK(schema_store->SetSchema(CreateSchemaWithEmailType()));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ CreateSchemaWithEmailType(), /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ScoringSpecProto scoring_spec;
scoring_spec.set_rank_by(
@@ -313,7 +319,8 @@ void BM_ScoreAndRankDocumentHitsNoScoring(benchmark::State& state) {
// Creates file directories
Filesystem filesystem;
filesystem.DeleteDirectoryRecursively(base_dir.c_str());
- ASSERT_TRUE(filesystem.CreateDirectoryRecursively(document_store_dir.c_str()));
+ ASSERT_TRUE(
+ filesystem.CreateDirectoryRecursively(document_store_dir.c_str()));
ASSERT_TRUE(filesystem.CreateDirectoryRecursively(schema_store_dir.c_str()));
Clock clock;
@@ -328,7 +335,9 @@ void BM_ScoreAndRankDocumentHitsNoScoring(benchmark::State& state) {
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
- ICING_ASSERT_OK(schema_store->SetSchema(CreateSchemaWithEmailType()));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ CreateSchemaWithEmailType(), /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ScoringSpecProto scoring_spec;
scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::NONE);
@@ -411,7 +420,8 @@ void BM_ScoreAndRankDocumentHitsByRelevanceScoring(benchmark::State& state) {
// Creates file directories
Filesystem filesystem;
filesystem.DeleteDirectoryRecursively(base_dir.c_str());
- ASSERT_TRUE(filesystem.CreateDirectoryRecursively(document_store_dir.c_str()));
+ ASSERT_TRUE(
+ filesystem.CreateDirectoryRecursively(document_store_dir.c_str()));
ASSERT_TRUE(filesystem.CreateDirectoryRecursively(schema_store_dir.c_str()));
Clock clock;
@@ -426,7 +436,9 @@ void BM_ScoreAndRankDocumentHitsByRelevanceScoring(benchmark::State& state) {
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
- ICING_ASSERT_OK(schema_store->SetSchema(CreateSchemaWithEmailType()));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ CreateSchemaWithEmailType(), /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ScoringSpecProto scoring_spec;
scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE);
diff --git a/icing/scoring/scorer_test.cc b/icing/scoring/scorer_test.cc
index 2649c95..1c7d2ab 100644
--- a/icing/scoring/scorer_test.cc
+++ b/icing/scoring/scorer_test.cc
@@ -83,7 +83,9 @@ class ScorerTest : public ::testing::TestWithParam<ScorerTestingMode> {
.SetCardinality(CARDINALITY_REQUIRED)))
.Build();
- ICING_ASSERT_OK(schema_store_->SetSchema(test_email_schema));
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ test_email_schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
}
void TearDown() override {
diff --git a/icing/scoring/scoring-processor_test.cc b/icing/scoring/scoring-processor_test.cc
index 5c42236..10f3eb5 100644
--- a/icing/scoring/scoring-processor_test.cc
+++ b/icing/scoring/scoring-processor_test.cc
@@ -93,7 +93,9 @@ class ScoringProcessorTest
.SetDataType(TYPE_STRING)
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
- ICING_ASSERT_OK(schema_store_->SetSchema(test_email_schema));
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ test_email_schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
}
void TearDown() override {
diff --git a/icing/scoring/section-weights_test.cc b/icing/scoring/section-weights_test.cc
index 02205f5..28b1797 100644
--- a/icing/scoring/section-weights_test.cc
+++ b/icing/scoring/section-weights_test.cc
@@ -87,7 +87,9 @@ class SectionWeightsTest : public testing::Test {
SchemaProto schema =
SchemaBuilder().AddType(sender_schema).AddType(email_schema).Build();
- ICING_ASSERT_OK(schema_store_->SetSchema(schema));
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
}
void TearDown() override {
diff --git a/icing/store/document-store.cc b/icing/store/document-store.cc
index ae8bfc0..b49d0de 100644
--- a/icing/store/document-store.cc
+++ b/icing/store/document-store.cc
@@ -285,6 +285,45 @@ libtextclassifier3::StatusOr<DocumentStore::CreateResult> DocumentStore::Create(
return create_result;
}
+/* static */ libtextclassifier3::Status DocumentStore::DiscardDerivedFiles(
+ const Filesystem* filesystem, const std::string& base_dir) {
+ // Header
+ const std::string header_filename = MakeHeaderFilename(base_dir);
+ if (!filesystem->DeleteFile(MakeHeaderFilename(base_dir).c_str())) {
+ return absl_ports::InternalError("Couldn't delete header file");
+ }
+
+ // Document key mapper
+ ICING_RETURN_IF_ERROR(
+ DynamicTrieKeyMapper<DocumentId>::Delete(*filesystem, base_dir));
+
+ // Document id mapper
+ ICING_RETURN_IF_ERROR(FileBackedVector<int64_t>::Delete(
+ *filesystem, MakeDocumentIdMapperFilename(base_dir)));
+
+ // Document associated score cache
+ ICING_RETURN_IF_ERROR(FileBackedVector<DocumentAssociatedScoreData>::Delete(
+ *filesystem, MakeScoreCacheFilename(base_dir)));
+
+ // Filter cache
+ ICING_RETURN_IF_ERROR(FileBackedVector<DocumentFilterData>::Delete(
+ *filesystem, MakeFilterCacheFilename(base_dir)));
+
+ // Namespace mapper
+ ICING_RETURN_IF_ERROR(DynamicTrieKeyMapper<NamespaceId>::Delete(
+ *filesystem, MakeNamespaceMapperFilename(base_dir)));
+
+ // Corpus mapper
+ ICING_RETURN_IF_ERROR(DynamicTrieKeyMapper<CorpusId>::Delete(
+ *filesystem, MakeCorpusMapperFilename(base_dir)));
+
+ // Corpus associated score cache
+ ICING_RETURN_IF_ERROR(FileBackedVector<CorpusAssociatedScoreData>::Delete(
+ *filesystem, MakeCorpusScoreCache(base_dir)));
+
+ return libtextclassifier3::Status::OK;
+}
+
libtextclassifier3::StatusOr<DataLoss> DocumentStore::Initialize(
bool force_recovery_and_revalidate_documents,
InitializeStatsProto* initialize_stats) {
diff --git a/icing/store/document-store.h b/icing/store/document-store.h
index 88050ce..3bb04f4 100644
--- a/icing/store/document-store.h
+++ b/icing/store/document-store.h
@@ -146,6 +146,14 @@ class DocumentStore {
int32_t compression_level,
InitializeStatsProto* initialize_stats);
+ // Discards all derived data in the document store.
+ //
+ // Returns:
+ // OK on success or nothing to discard
+ // INTERNAL_ERROR on any I/O errors
+ static libtextclassifier3::Status DiscardDerivedFiles(
+ const Filesystem* filesystem, const std::string& base_dir);
+
// Returns the maximum DocumentId that the DocumentStore has assigned. If
// there has not been any DocumentIds assigned, i.e. the DocumentStore is
// empty, then kInvalidDocumentId is returned. This does not filter out
diff --git a/icing/store/document-store_benchmark.cc b/icing/store/document-store_benchmark.cc
index 99e17c7..61906a9 100644
--- a/icing/store/document-store_benchmark.cc
+++ b/icing/store/document-store_benchmark.cc
@@ -116,7 +116,9 @@ std::unique_ptr<SchemaStore> CreateSchemaStore(Filesystem filesystem,
std::unique_ptr<SchemaStore> schema_store =
SchemaStore::Create(&filesystem, schema_store_dir, clock).ValueOrDie();
- auto set_schema_status = schema_store->SetSchema(CreateSchema());
+ auto set_schema_status = schema_store->SetSchema(
+ CreateSchema(), /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false);
if (!set_schema_status.ok()) {
ICING_LOG(ERROR) << set_schema_status.status().error_message();
}
diff --git a/icing/store/document-store_test.cc b/icing/store/document-store_test.cc
index 896d852..146191f 100644
--- a/icing/store/document-store_test.cc
+++ b/icing/store/document-store_test.cc
@@ -189,7 +189,10 @@ class DocumentStoreTest : public ::testing::Test {
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
language_segmenter_factory::SegmenterOptions segmenter_options(ULOC_US);
ICING_ASSERT_OK_AND_ASSIGN(
@@ -198,6 +201,8 @@ class DocumentStoreTest : public ::testing::Test {
}
void TearDown() override {
+ lang_segmenter_.reset();
+ schema_store_.reset();
filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
}
@@ -717,7 +722,9 @@ TEST_F(DocumentStoreTest, DeleteBySchemaTypeOk) {
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
- ICING_ASSERT_OK(schema_store->SetSchema(schema));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
@@ -844,7 +851,9 @@ TEST_F(DocumentStoreTest, DeleteBySchemaTypeRecoversOk) {
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
- ICING_ASSERT_OK(schema_store->SetSchema(schema));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
DocumentId email_document_id;
DocumentId message_document_id;
@@ -935,7 +944,9 @@ TEST_F(DocumentStoreTest, DeletedSchemaTypeFromSchemaStoreRecoversOk) {
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
- ICING_ASSERT_OK(schema_store->SetSchema(schema));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
DocumentId email_document_id;
DocumentId message_document_id;
@@ -989,7 +1000,8 @@ TEST_F(DocumentStoreTest, DeletedSchemaTypeFromSchemaStoreRecoversOk) {
.AddType(SchemaTypeConfigBuilder().SetType("message"))
.Build();
ICING_EXPECT_OK(schema_store->SetSchema(
- new_schema, /*ignore_errors_and_delete_documents=*/true));
+ new_schema, /*ignore_errors_and_delete_documents=*/true,
+ /*allow_circular_schema_definitions=*/false));
// Successfully recover from a corrupt derived file issue.
ICING_ASSERT_OK_AND_ASSIGN(
@@ -1264,11 +1276,16 @@ TEST_F(DocumentStoreTest, ShouldRecoverFromCorruptDerivedFile) {
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
EXPECT_THAT(doc_store->Get(document_id2),
IsOkAndHolds(EqualsProto(test_document2_)));
+
+ EXPECT_THAT(doc_store->ReportUsage(CreateUsageReport(
+ /*name_space=*/"icing", /*uri=*/"email/2",
+ /*timestamp_ms=*/0, UsageReport::USAGE_TYPE1)),
+ IsOk());
}
- // "Corrupt" one of the derived files by adding non-checksummed data to
- // it. This will mess up the checksum and throw an error on the derived file's
- // initialization.
+ // "Corrupt" one of the derived files by modifying an existing data without
+ // calling PersistToDisk() or updating its checksum. This will mess up the
+ // checksum and throw an error on the derived file's initialization.
const std::string document_id_mapper_file =
absl_ports::StrCat(document_store_dir_, "/document_id_mapper");
ICING_ASSERT_OK_AND_ASSIGN(
@@ -1276,13 +1293,14 @@ TEST_F(DocumentStoreTest, ShouldRecoverFromCorruptDerivedFile) {
FileBackedVector<int64_t>::Create(
filesystem_, document_id_mapper_file,
MemoryMappedFile::READ_WRITE_AUTO_SYNC));
- int64_t corrupt_document_id = 3;
- int64_t corrupt_offset = 3;
+ int64_t corrupt_document_id = 1;
+ int64_t corrupt_offset = 123456;
EXPECT_THAT(document_id_mapper->Set(corrupt_document_id, corrupt_offset),
IsOk());
+ // Will get error when initializing document id mapper file, so it will
+ // trigger RegenerateDerivedFiles.
// Successfully recover from a corrupt derived file issue.
- // NOTE: this doesn't trigger RegenerateDerivedFiles.
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
@@ -1304,8 +1322,100 @@ TEST_F(DocumentStoreTest, ShouldRecoverFromCorruptDerivedFile) {
/*namespace_id=*/0,
/*schema_type_id=*/0, document2_expiration_timestamp_)));
- // Checks derived score cache - note that they aren't regenerated from
+ // Checks derived score cache
+ EXPECT_THAT(
+ doc_store->GetDocumentAssociatedScoreData(document_id2),
+ IsOkAndHolds(DocumentAssociatedScoreData(
+ /*corpus_id=*/0, document2_score_, document2_creation_timestamp_,
+ /*length_in_tokens=*/4)));
+ EXPECT_THAT(doc_store->GetCorpusAssociatedScoreData(/*corpus_id=*/0),
+ IsOkAndHolds(CorpusAssociatedScoreData(
+ /*num_docs=*/1, /*sum_length_in_tokens=*/4)));
+
+ // Checks usage score data - note that they aren't regenerated from
// scratch.
+ UsageStore::UsageScores expected_scores;
+ expected_scores.usage_type1_count = 1;
+ ICING_ASSERT_HAS_VALUE_AND_ASSIGN(UsageStore::UsageScores actual_scores,
+ doc_store->GetUsageScores(document_id2));
+ EXPECT_THAT(actual_scores, Eq(expected_scores));
+}
+
+TEST_F(DocumentStoreTest, ShouldRecoverFromDiscardDerivedFiles) {
+ DocumentId document_id1, document_id2;
+ {
+ // Can put and delete fine.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ document_id1,
+ doc_store->Put(DocumentProto(test_document1_), /*num_tokens=*/4));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ document_id2,
+ doc_store->Put(DocumentProto(test_document2_), /*num_tokens=*/4));
+ EXPECT_THAT(doc_store->Get(document_id1),
+ IsOkAndHolds(EqualsProto(test_document1_)));
+ EXPECT_THAT(doc_store->Get(document_id2),
+ IsOkAndHolds(EqualsProto(test_document2_)));
+ // Checks derived score cache
+ EXPECT_THAT(
+ doc_store->GetDocumentAssociatedScoreData(document_id1),
+ IsOkAndHolds(DocumentAssociatedScoreData(
+ /*corpus_id=*/0, document1_score_, document1_creation_timestamp_,
+ /*length_in_tokens=*/4)));
+ EXPECT_THAT(
+ doc_store->GetDocumentAssociatedScoreData(document_id2),
+ IsOkAndHolds(DocumentAssociatedScoreData(
+ /*corpus_id=*/0, document2_score_, document2_creation_timestamp_,
+ /*length_in_tokens=*/4)));
+ EXPECT_THAT(doc_store->GetCorpusAssociatedScoreData(/*corpus_id=*/0),
+ IsOkAndHolds(CorpusAssociatedScoreData(
+ /*num_docs=*/2, /*sum_length_in_tokens=*/8)));
+ // Delete document 1
+ EXPECT_THAT(doc_store->Delete("icing", "email/1"), IsOk());
+ EXPECT_THAT(doc_store->Get(document_id1),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ EXPECT_THAT(doc_store->Get(document_id2),
+ IsOkAndHolds(EqualsProto(test_document2_)));
+
+ EXPECT_THAT(doc_store->ReportUsage(CreateUsageReport(
+ /*name_space=*/"icing", /*uri=*/"email/2",
+ /*timestamp_ms=*/0, UsageReport::USAGE_TYPE1)),
+ IsOk());
+ }
+
+ // Discard all derived files.
+ ICING_ASSERT_OK(
+ DocumentStore::DiscardDerivedFiles(&filesystem_, document_store_dir_));
+
+ // Successfully recover after discarding all derived files.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ EXPECT_THAT(doc_store->Get(document_id1),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ EXPECT_THAT(doc_store->Get(document_id2),
+ IsOkAndHolds(EqualsProto(test_document2_)));
+
+ // Checks derived filter cache
+ ICING_ASSERT_HAS_VALUE_AND_ASSIGN(
+ DocumentFilterData doc_filter_data,
+ doc_store->GetAliveDocumentFilterData(document_id2));
+ EXPECT_THAT(doc_filter_data,
+ Eq(DocumentFilterData(
+ /*namespace_id=*/0,
+ /*schema_type_id=*/0, document2_expiration_timestamp_)));
+
+ // Checks derived score cache.
EXPECT_THAT(
doc_store->GetDocumentAssociatedScoreData(document_id2),
IsOkAndHolds(DocumentAssociatedScoreData(
@@ -1313,7 +1423,15 @@ TEST_F(DocumentStoreTest, ShouldRecoverFromCorruptDerivedFile) {
/*length_in_tokens=*/4)));
EXPECT_THAT(doc_store->GetCorpusAssociatedScoreData(/*corpus_id=*/0),
IsOkAndHolds(CorpusAssociatedScoreData(
- /*num_docs=*/2, /*sum_length_in_tokens=*/8)));
+ /*num_docs=*/1, /*sum_length_in_tokens=*/4)));
+
+ // Checks usage score data - note that they aren't regenerated from
+ // scratch.
+ UsageStore::UsageScores expected_scores;
+ expected_scores.usage_type1_count = 1;
+ ICING_ASSERT_HAS_VALUE_AND_ASSIGN(UsageStore::UsageScores actual_scores,
+ doc_store->GetUsageScores(document_id2));
+ EXPECT_THAT(actual_scores, Eq(expected_scores));
}
TEST_F(DocumentStoreTest, ShouldRecoverFromBadChecksum) {
@@ -2177,7 +2295,9 @@ TEST_F(DocumentStoreTest, RegenerateDerivedFilesSkipsUnknownSchemaTypeIds) {
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.AddType(SchemaTypeConfigBuilder().SetType("message"))
.Build();
- ICING_EXPECT_OK(schema_store->SetSchema(schema));
+ ICING_EXPECT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId email_schema_type_id,
schema_store->GetSchemaTypeId("email"));
@@ -2232,7 +2352,9 @@ TEST_F(DocumentStoreTest, RegenerateDerivedFilesSkipsUnknownSchemaTypeIds) {
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ICING_EXPECT_OK(schema_store->SetSchema(schema));
+ ICING_EXPECT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId email_schema_type_id,
schema_store->GetSchemaTypeId("email"));
@@ -2286,7 +2408,9 @@ TEST_F(DocumentStoreTest, UpdateSchemaStoreUpdatesSchemaTypeIds) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
- ICING_EXPECT_OK(schema_store->SetSchema(schema));
+ ICING_EXPECT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId old_email_schema_type_id,
schema_store->GetSchemaTypeId("email"));
@@ -2334,7 +2458,9 @@ TEST_F(DocumentStoreTest, UpdateSchemaStoreUpdatesSchemaTypeIds) {
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ICING_EXPECT_OK(schema_store->SetSchema(schema));
+ ICING_EXPECT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId new_email_schema_type_id,
schema_store->GetSchemaTypeId("email"));
@@ -2377,7 +2503,9 @@ TEST_F(DocumentStoreTest, UpdateSchemaStoreDeletesInvalidDocuments) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
- ICING_EXPECT_OK(schema_store->SetSchema(schema));
+ ICING_EXPECT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
// Add two documents, with and without a subject
DocumentProto email_without_subject = DocumentBuilder()
@@ -2419,7 +2547,8 @@ TEST_F(DocumentStoreTest, UpdateSchemaStoreDeletesInvalidDocuments) {
PropertyConfigProto::Cardinality::REQUIRED);
ICING_EXPECT_OK(schema_store->SetSchema(
- schema, /*ignore_errors_and_delete_documents=*/true));
+ schema, /*ignore_errors_and_delete_documents=*/true,
+ /*allow_circular_schema_definitions=*/false));
ICING_EXPECT_OK(document_store->UpdateSchemaStore(schema_store.get()));
@@ -2448,7 +2577,9 @@ TEST_F(DocumentStoreTest,
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
- ICING_EXPECT_OK(schema_store->SetSchema(schema));
+ ICING_EXPECT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
// Add a "email" and "message" document
DocumentProto email_document = DocumentBuilder()
@@ -2490,7 +2621,8 @@ TEST_F(DocumentStoreTest,
ICING_EXPECT_OK(
schema_store->SetSchema(new_schema,
- /*ignore_errors_and_delete_documents=*/true));
+ /*ignore_errors_and_delete_documents=*/true,
+ /*allow_circular_schema_definitions=*/false));
ICING_EXPECT_OK(document_store->UpdateSchemaStore(schema_store.get()));
@@ -2518,7 +2650,9 @@ TEST_F(DocumentStoreTest, OptimizedUpdateSchemaStoreUpdatesSchemaTypeIds) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
- ICING_EXPECT_OK(schema_store->SetSchema(schema));
+ ICING_EXPECT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId old_email_schema_type_id,
schema_store->GetSchemaTypeId("email"));
@@ -2566,8 +2700,11 @@ TEST_F(DocumentStoreTest, OptimizedUpdateSchemaStoreUpdatesSchemaTypeIds) {
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
- ICING_ASSERT_OK_AND_ASSIGN(SchemaStore::SetSchemaResult set_schema_result,
- schema_store->SetSchema(schema));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ SchemaStore::SetSchemaResult set_schema_result,
+ schema_store->SetSchema(schema,
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId new_email_schema_type_id,
schema_store->GetSchemaTypeId("email"));
@@ -2611,7 +2748,9 @@ TEST_F(DocumentStoreTest, OptimizedUpdateSchemaStoreDeletesInvalidDocuments) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
- ICING_EXPECT_OK(schema_store->SetSchema(schema));
+ ICING_EXPECT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
// Add two documents, with and without a subject
DocumentProto email_without_subject = DocumentBuilder()
@@ -2655,7 +2794,8 @@ TEST_F(DocumentStoreTest, OptimizedUpdateSchemaStoreDeletesInvalidDocuments) {
ICING_ASSERT_OK_AND_ASSIGN(
SchemaStore::SetSchemaResult set_schema_result,
schema_store->SetSchema(schema,
- /*ignore_errors_and_delete_documents=*/true));
+ /*ignore_errors_and_delete_documents=*/true,
+ /*allow_circular_schema_definitions=*/false));
ICING_EXPECT_OK(document_store->OptimizedUpdateSchemaStore(
schema_store.get(), set_schema_result));
@@ -2685,7 +2825,9 @@ TEST_F(DocumentStoreTest,
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
- ICING_EXPECT_OK(schema_store->SetSchema(schema));
+ ICING_EXPECT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
// Add a "email" and "message" document
DocumentProto email_document = DocumentBuilder()
@@ -2728,7 +2870,8 @@ TEST_F(DocumentStoreTest,
ICING_ASSERT_OK_AND_ASSIGN(
SchemaStore::SetSchemaResult set_schema_result,
schema_store->SetSchema(new_schema,
- /*ignore_errors_and_delete_documents=*/true));
+ /*ignore_errors_and_delete_documents=*/true,
+ /*allow_circular_schema_definitions=*/false));
ICING_EXPECT_OK(document_store->OptimizedUpdateSchemaStore(
schema_store.get(), set_schema_result));
@@ -3475,7 +3618,10 @@ TEST_F(DocumentStoreTest, InitializeForceRecoveryUpdatesTypeIds) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
- ASSERT_THAT(schema_store->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// The typeid for "email" should be 0.
ASSERT_THAT(schema_store->GetSchemaTypeId("email"), IsOkAndHolds(0));
@@ -3524,7 +3670,10 @@ TEST_F(DocumentStoreTest, InitializeForceRecoveryUpdatesTypeIds) {
.SetCardinality(CARDINALITY_OPTIONAL)))
.AddType(email_type_config)
.Build();
- ASSERT_THAT(schema_store->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// Adding a new type should cause ids to be reassigned. Ids are assigned in
// order of appearance so 'alarm' should be 0 and 'email' should be 1.
ASSERT_THAT(schema_store->GetSchemaTypeId("alarm"), IsOkAndHolds(0));
@@ -3578,7 +3727,10 @@ TEST_F(DocumentStoreTest, InitializeDontForceRecoveryDoesntUpdateTypeIds) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
- ASSERT_THAT(schema_store->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// The typeid for "email" should be 0.
ASSERT_THAT(schema_store->GetSchemaTypeId("email"), IsOkAndHolds(0));
@@ -3627,7 +3779,10 @@ TEST_F(DocumentStoreTest, InitializeDontForceRecoveryDoesntUpdateTypeIds) {
.SetCardinality(CARDINALITY_OPTIONAL)))
.AddType(email_type_config)
.Build();
- ASSERT_THAT(schema_store->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// Adding a new type should cause ids to be reassigned. Ids are assigned in
// order of appearance so 'alarm' should be 0 and 'email' should be 1.
ASSERT_THAT(schema_store->GetSchemaTypeId("alarm"), IsOkAndHolds(0));
@@ -3673,7 +3828,10 @@ TEST_F(DocumentStoreTest, InitializeForceRecoveryDeletesInvalidDocument) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
- ASSERT_THAT(schema_store->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
DocumentProto docWithBody =
DocumentBuilder()
@@ -3733,7 +3891,8 @@ TEST_F(DocumentStoreTest, InitializeForceRecoveryDeletesInvalidDocument) {
.Build();
schema = SchemaBuilder().AddType(email_type_config).Build();
ASSERT_THAT(schema_store->SetSchema(
- schema, /*ignore_errors_and_delete_documents=*/true),
+ schema, /*ignore_errors_and_delete_documents=*/true,
+ /*allow_circular_schema_definitions=*/false),
IsOk());
{
@@ -3782,7 +3941,10 @@ TEST_F(DocumentStoreTest, InitializeDontForceRecoveryKeepsInvalidDocument) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
- ASSERT_THAT(schema_store->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
DocumentProto docWithBody =
DocumentBuilder()
@@ -3842,7 +4004,8 @@ TEST_F(DocumentStoreTest, InitializeDontForceRecoveryKeepsInvalidDocument) {
.Build();
schema = SchemaBuilder().AddType(email_type_config).Build();
ASSERT_THAT(schema_store->SetSchema(
- schema, /*ignore_errors_and_delete_documents=*/true),
+ schema, /*ignore_errors_and_delete_documents=*/true,
+ /*allow_circular_schema_definitions=*/false),
IsOk());
{
@@ -3889,7 +4052,10 @@ TEST_F(DocumentStoreTest, MigrateToPortableFileBackedProtoLog) {
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
- ASSERT_THAT(schema_store->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
// Create dst directory that we'll initialize the DocumentStore over.
std::string document_store_dir = document_store_dir_ + "_migrate";
@@ -4015,7 +4181,9 @@ TEST_F(DocumentStoreTest, GetDebugInfo) {
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
- ICING_ASSERT_OK(schema_store->SetSchema(schema));
+ ICING_ASSERT_OK(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
diff --git a/icing/testing/numeric/normal-distribution-number-generator.h b/icing/testing/numeric/normal-distribution-number-generator.h
new file mode 100644
index 0000000..73cdd1f
--- /dev/null
+++ b/icing/testing/numeric/normal-distribution-number-generator.h
@@ -0,0 +1,42 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_TESTING_NUMERIC_NORMAL_DISTRIBUTION_NUMBER_GENERATOR_H_
+#define ICING_TESTING_NUMERIC_NORMAL_DISTRIBUTION_NUMBER_GENERATOR_H_
+
+#include <cmath>
+#include <random>
+
+#include "icing/testing/numeric/number-generator.h"
+
+namespace icing {
+namespace lib {
+
+template <typename T>
+class NormalDistributionNumberGenerator : public NumberGenerator<T> {
+ public:
+ explicit NormalDistributionNumberGenerator(int seed, double mean,
+ double stddev)
+ : NumberGenerator<T>(seed), distribution_(mean, stddev) {}
+
+ T Generate() override { return std::round(distribution_(this->engine_)); }
+
+ private:
+ std::normal_distribution<> distribution_;
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_TESTING_NUMERIC_NORMAL_DISTRIBUTION_NUMBER_GENERATOR_H_
diff --git a/icing/testing/numeric/uniform-distribution-integer-generator.h b/icing/testing/numeric/uniform-distribution-integer-generator.h
index 00d8459..569eebd 100644
--- a/icing/testing/numeric/uniform-distribution-integer-generator.h
+++ b/icing/testing/numeric/uniform-distribution-integer-generator.h
@@ -15,6 +15,8 @@
#ifndef ICING_TESTING_NUMERIC_UNIFORM_DISTRIBUTION_INTEGER_GENERATOR_H_
#define ICING_TESTING_NUMERIC_UNIFORM_DISTRIBUTION_INTEGER_GENERATOR_H_
+#include <random>
+
#include "icing/testing/numeric/number-generator.h"
namespace icing {
diff --git a/icing/tokenization/icu/icu-language-segmenter.cc b/icing/tokenization/icu/icu-language-segmenter.cc
index 59bcc18..cac12f7 100644
--- a/icing/tokenization/icu/icu-language-segmenter.cc
+++ b/icing/tokenization/icu/icu-language-segmenter.cc
@@ -375,8 +375,7 @@ void IcuLanguageSegmenter::ReturnBreakIterator(UBreakIterator* itr) const {
}
libtextclassifier3::StatusOr<std::unique_ptr<LanguageSegmenter::Iterator>>
-IcuLanguageSegmenter::Segment(const std::string_view text,
- LanguageSegmenter::AccessType) const {
+IcuLanguageSegmenter::Segment(const std::string_view text) const {
return IcuLanguageSegmenterIterator::Create(this, ProduceBreakIterator(),
text, locale_);
}
@@ -385,7 +384,7 @@ libtextclassifier3::StatusOr<std::vector<std::string_view>>
IcuLanguageSegmenter::GetAllTerms(const std::string_view text) const {
ICING_ASSIGN_OR_RETURN(
std::unique_ptr<LanguageSegmenter::Iterator> iterator,
- Segment(text, LanguageSegmenter::AccessType::kForwardIterator));
+ Segment(text));
std::vector<std::string_view> terms;
while (iterator->Advance()) {
terms.push_back(iterator->GetTerm());
diff --git a/icing/tokenization/icu/icu-language-segmenter.h b/icing/tokenization/icu/icu-language-segmenter.h
index 1ca70c5..44de5a2 100644
--- a/icing/tokenization/icu/icu-language-segmenter.h
+++ b/icing/tokenization/icu/icu-language-segmenter.h
@@ -64,7 +64,7 @@ class IcuLanguageSegmenter : public LanguageSegmenter {
// An iterator of terms on success
// INTERNAL_ERROR if any error occurs
libtextclassifier3::StatusOr<std::unique_ptr<LanguageSegmenter::Iterator>>
- Segment(std::string_view text, LanguageSegmenter::AccessType) const override;
+ Segment(std::string_view text) const override;
// The segmentation depends on the language detected in the input text.
//
diff --git a/icing/tokenization/icu/icu-language-segmenter_test.cc b/icing/tokenization/icu/icu-language-segmenter_test.cc
index d1bf5c6..3bacbc6 100644
--- a/icing/tokenization/icu/icu-language-segmenter_test.cc
+++ b/icing/tokenization/icu/icu-language-segmenter_test.cc
@@ -419,10 +419,8 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, ContinuousWhitespaces) {
// iterator is done.
text_with_spaces = absl_ports::StrCat(std::string(kNumSeparators, ' '),
"Hello", " ", "World");
- ICING_ASSERT_OK_AND_ASSIGN(
- auto itr,
- language_segmenter->Segment(
- text_with_spaces, LanguageSegmenter::AccessType::kForwardIterator));
+ ICING_ASSERT_OK_AND_ASSIGN(auto itr,
+ language_segmenter->Segment(text_with_spaces));
std::vector<std::string_view> terms;
while (itr->Advance()) {
terms.push_back(itr->GetTerm());
@@ -518,10 +516,8 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, ResetToStartUtf32WordConnector) {
auto segmenter, language_segmenter_factory::Create(
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
constexpr std::string_view kText = "com.google.android is package";
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<LanguageSegmenter::Iterator> itr,
- segmenter->Segment(kText,
- LanguageSegmenter::AccessType::kForwardIterator));
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ segmenter->Segment(kText));
// String: "com.google.android is package"
// ^ ^^ ^^
@@ -537,10 +533,8 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, NewIteratorResetToStartUtf32) {
auto segmenter, language_segmenter_factory::Create(
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
constexpr std::string_view kText = "How are you你好吗お元気ですか";
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<LanguageSegmenter::Iterator> itr,
- segmenter->Segment(kText,
- LanguageSegmenter::AccessType::kForwardIterator));
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ segmenter->Segment(kText));
// String: "How are you你好吗お元気ですか"
// ^ ^^ ^^ ^ ^ ^ ^ ^ ^
@@ -556,10 +550,8 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest,
auto segmenter, language_segmenter_factory::Create(
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
constexpr std::string_view kText = "How are you你好吗お元気ですか";
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<LanguageSegmenter::Iterator> itr,
- segmenter->Segment(kText,
- LanguageSegmenter::AccessType::kForwardIterator));
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ segmenter->Segment(kText));
// String: "How are you你好吗お元気ですか"
// ^ ^^ ^^ ^ ^ ^ ^ ^ ^
@@ -576,10 +568,8 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest,
auto segmenter, language_segmenter_factory::Create(
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
constexpr std::string_view kText = "How are you你好吗お元気ですか";
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<LanguageSegmenter::Iterator> itr,
- segmenter->Segment(kText,
- LanguageSegmenter::AccessType::kForwardIterator));
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ segmenter->Segment(kText));
// String: "How are you你好吗お元気ですか"
// ^ ^^ ^^ ^ ^ ^ ^ ^ ^
@@ -598,10 +588,8 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, IteratorDoneResetToStartUtf32) {
auto segmenter, language_segmenter_factory::Create(
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
constexpr std::string_view kText = "How are you你好吗お元気ですか";
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<LanguageSegmenter::Iterator> itr,
- segmenter->Segment(kText,
- LanguageSegmenter::AccessType::kForwardIterator));
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ segmenter->Segment(kText));
// String: "How are you你好吗お元気ですか"
// ^ ^^ ^^ ^ ^ ^ ^ ^ ^
@@ -619,10 +607,8 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, ResetToTermAfterUtf32WordConnector) {
auto segmenter, language_segmenter_factory::Create(
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
constexpr std::string_view kText = "package com.google.android name";
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<LanguageSegmenter::Iterator> itr,
- segmenter->Segment(kText,
- LanguageSegmenter::AccessType::kForwardIterator));
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ segmenter->Segment(kText));
// String: "package com.google.android name"
// ^ ^^ ^^
@@ -644,10 +630,8 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, ResetToTermAfterUtf32OutOfBounds) {
auto segmenter, language_segmenter_factory::Create(
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
constexpr std::string_view kText = "How are you你好吗お元気ですか";
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<LanguageSegmenter::Iterator> itr,
- segmenter->Segment(kText,
- LanguageSegmenter::AccessType::kForwardIterator));
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ segmenter->Segment(kText));
// String: "How are you你好吗お元気ですか"
// ^ ^^ ^^ ^ ^ ^ ^ ^ ^
@@ -677,15 +661,13 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest,
constexpr std::string_view kText = "How are𡔖 you你好吗お元気ですか";
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> advance_itr,
- segmenter->Segment(kText,
- LanguageSegmenter::AccessType::kForwardIterator));
+ segmenter->Segment(kText));
std::vector<std::string_view> advance_terms =
GetAllTermsAdvance(advance_itr.get());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr,
- segmenter->Segment(kText,
- LanguageSegmenter::AccessType::kForwardIterator));
+ segmenter->Segment(kText));
std::vector<std::string_view> reset_terms =
GetAllTermsResetAfterUtf32(reset_to_term_itr.get());
@@ -701,15 +683,13 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest,
constexpr std::string_view kThai = "ฉันเดินไปทำงานทุกวัน";
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> advance_itr,
- segmenter->Segment(kThai,
- LanguageSegmenter::AccessType::kForwardIterator));
+ segmenter->Segment(kThai));
std::vector<std::string_view> advance_terms =
GetAllTermsAdvance(advance_itr.get());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr,
- segmenter->Segment(kThai,
- LanguageSegmenter::AccessType::kForwardIterator));
+ segmenter->Segment(kThai));
std::vector<std::string_view> reset_terms =
GetAllTermsResetAfterUtf32(reset_to_term_itr.get());
@@ -725,15 +705,13 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest,
constexpr std::string_view kKorean = "나는 매일 출근합니다.";
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> advance_itr,
- segmenter->Segment(kKorean,
- LanguageSegmenter::AccessType::kForwardIterator));
+ segmenter->Segment(kKorean));
std::vector<std::string_view> advance_terms =
GetAllTermsAdvance(advance_itr.get());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr,
- segmenter->Segment(kKorean,
- LanguageSegmenter::AccessType::kForwardIterator));
+ segmenter->Segment(kKorean));
std::vector<std::string_view> reset_terms =
GetAllTermsResetAfterUtf32(reset_to_term_itr.get());
@@ -753,15 +731,13 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest,
constexpr std::string_view kText = "How are𡔖 you你好吗お元気ですか";
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> advance_itr,
- segmenter->Segment(kText,
- LanguageSegmenter::AccessType::kForwardIterator));
+ segmenter->Segment(kText));
std::vector<std::string_view> advance_terms =
GetAllTermsAdvance(advance_itr.get());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> advance_and_reset_itr,
- segmenter->Segment(kText,
- LanguageSegmenter::AccessType::kForwardIterator));
+ segmenter->Segment(kText));
std::vector<std::string_view> advance_and_reset_terms =
GetAllTermsAdvanceAndResetAfterUtf32(advance_and_reset_itr.get());
@@ -778,15 +754,13 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest,
constexpr std::string_view kThai = "ฉันเดินไปทำงานทุกวัน";
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> advance_itr,
- segmenter->Segment(kThai,
- LanguageSegmenter::AccessType::kForwardIterator));
+ segmenter->Segment(kThai));
std::vector<std::string_view> advance_terms =
GetAllTermsAdvance(advance_itr.get());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> advance_and_reset_itr,
- segmenter->Segment(kThai,
- LanguageSegmenter::AccessType::kForwardIterator));
+ segmenter->Segment(kThai));
std::vector<std::string_view> advance_and_reset_terms =
GetAllTermsAdvanceAndResetAfterUtf32(advance_and_reset_itr.get());
@@ -803,15 +777,13 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest,
constexpr std::string_view kKorean = "나는 매일 출근합니다.";
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> advance_itr,
- segmenter->Segment(kKorean,
- LanguageSegmenter::AccessType::kForwardIterator));
+ segmenter->Segment(kKorean));
std::vector<std::string_view> advance_terms =
GetAllTermsAdvance(advance_itr.get());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> advance_and_reset_itr,
- segmenter->Segment(kKorean,
- LanguageSegmenter::AccessType::kForwardIterator));
+ segmenter->Segment(kKorean));
std::vector<std::string_view> advance_and_reset_terms =
GetAllTermsAdvanceAndResetAfterUtf32(advance_and_reset_itr.get());
@@ -828,9 +800,7 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest,
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> itr,
- language_segmenter->Segment(
- "How are you你好吗お元気ですか",
- LanguageSegmenter::AccessType::kForwardIterator));
+ language_segmenter->Segment("How are you你好吗お元気ですか"));
// String: "How are you你好吗お元気ですか"
// ^ ^^ ^^ ^ ^ ^ ^ ^ ^
@@ -867,10 +837,8 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest,
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
// Multiple continuous whitespaces are treated as one.
constexpr std::string_view kTextWithSpace = "Hello World";
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<LanguageSegmenter::Iterator> itr,
- language_segmenter->Segment(
- kTextWithSpace, LanguageSegmenter::AccessType::kForwardIterator));
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ language_segmenter->Segment(kTextWithSpace));
// String: "Hello World"
// ^ ^ ^
@@ -909,10 +877,8 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, ChineseResetToTermAfterUtf32) {
// CJKT (Chinese, Japanese, Khmer, Thai) are the 4 main languages that
// don't have whitespaces as word delimiter. Chinese
constexpr std::string_view kChinese = "我每天走路去上班。";
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<LanguageSegmenter::Iterator> itr,
- language_segmenter->Segment(
- kChinese, LanguageSegmenter::AccessType::kForwardIterator));
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ language_segmenter->Segment(kChinese));
// String: "我每天走路去上班。"
// ^ ^ ^ ^^ ^
// UTF-8 idx: 0 3 9 15 18 24
@@ -938,10 +904,8 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, JapaneseResetToTermAfterUtf32) {
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
// Japanese
constexpr std::string_view kJapanese = "私は毎日仕事に歩いています。";
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<LanguageSegmenter::Iterator> itr,
- language_segmenter->Segment(
- kJapanese, LanguageSegmenter::AccessType::kForwardIterator));
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ language_segmenter->Segment(kJapanese));
// String: "私は毎日仕事に歩いています。"
// ^ ^ ^ ^ ^ ^ ^ ^ ^ ^
// UTF-8 idx: 0 3 6 12 18212427 33 39
@@ -966,10 +930,8 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, KhmerResetToTermAfterUtf32) {
language_segmenter_factory::Create(
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
constexpr std::string_view kKhmer = "ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។";
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<LanguageSegmenter::Iterator> itr,
- language_segmenter->Segment(
- kKhmer, LanguageSegmenter::AccessType::kForwardIterator));
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ language_segmenter->Segment(kKhmer));
// String: "ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។"
// ^ ^ ^ ^ ^
// UTF-8 idx: 0 9 24 45 69
@@ -995,10 +957,8 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, ThaiResetToTermAfterUtf32) {
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
// Thai
constexpr std::string_view kThai = "ฉันเดินไปทำงานทุกวัน";
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<LanguageSegmenter::Iterator> itr,
- language_segmenter->Segment(
- kThai, LanguageSegmenter::AccessType::kForwardIterator));
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ language_segmenter->Segment(kThai));
// String: "ฉันเดินไปทำงานทุกวัน"
// ^ ^ ^ ^ ^ ^
// UTF-8 idx: 0 9 21 27 42 51
@@ -1023,10 +983,8 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest,
auto segmenter, language_segmenter_factory::Create(
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
constexpr std::string_view kText = "package name com.google.android!";
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<LanguageSegmenter::Iterator> itr,
- segmenter->Segment(kText,
- LanguageSegmenter::AccessType::kForwardIterator));
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ segmenter->Segment(kText));
// String: "package name com.google.android!"
// ^ ^^ ^^ ^
@@ -1048,10 +1006,8 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, ResetToTermBeforeOutOfBoundsUtf32) {
auto segmenter, language_segmenter_factory::Create(
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
constexpr std::string_view kText = "How are you你好吗お元気ですか";
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<LanguageSegmenter::Iterator> itr,
- segmenter->Segment(kText,
- LanguageSegmenter::AccessType::kForwardIterator));
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ segmenter->Segment(kText));
// String: "How are you你好吗お元気ですか"
// ^ ^^ ^^ ^ ^ ^ ^ ^ ^
@@ -1081,15 +1037,13 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest,
constexpr std::string_view kText = "How are𡔖 you你好吗お元気ですか";
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> advance_itr,
- segmenter->Segment(kText,
- LanguageSegmenter::AccessType::kForwardIterator));
+ segmenter->Segment(kText));
std::vector<std::string_view> advance_terms =
GetAllTermsAdvance(advance_itr.get());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr,
- segmenter->Segment(kText,
- LanguageSegmenter::AccessType::kForwardIterator));
+ segmenter->Segment(kText));
std::vector<std::string_view> reset_terms =
GetAllTermsResetBeforeUtf32(reset_to_term_itr.get());
std::reverse(reset_terms.begin(), reset_terms.end());
@@ -1107,15 +1061,13 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest,
constexpr std::string_view kThai = "ฉันเดินไปทำงานทุกวัน";
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> advance_itr,
- segmenter->Segment(kThai,
- LanguageSegmenter::AccessType::kForwardIterator));
+ segmenter->Segment(kThai));
std::vector<std::string_view> advance_terms =
GetAllTermsAdvance(advance_itr.get());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr,
- segmenter->Segment(kThai,
- LanguageSegmenter::AccessType::kForwardIterator));
+ segmenter->Segment(kThai));
std::vector<std::string_view> reset_terms =
GetAllTermsResetBeforeUtf32(reset_to_term_itr.get());
std::reverse(reset_terms.begin(), reset_terms.end());
@@ -1132,15 +1084,13 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest,
constexpr std::string_view kKorean = "나는 매일 출근합니다.";
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> advance_itr,
- segmenter->Segment(kKorean,
- LanguageSegmenter::AccessType::kForwardIterator));
+ segmenter->Segment(kKorean));
std::vector<std::string_view> advance_terms =
GetAllTermsAdvance(advance_itr.get());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr,
- segmenter->Segment(kKorean,
- LanguageSegmenter::AccessType::kForwardIterator));
+ segmenter->Segment(kKorean));
std::vector<std::string_view> reset_terms =
GetAllTermsResetBeforeUtf32(reset_to_term_itr.get());
std::reverse(reset_terms.begin(), reset_terms.end());
@@ -1157,9 +1107,7 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest,
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> itr,
- language_segmenter->Segment(
- "How are you你好吗お元気ですか",
- LanguageSegmenter::AccessType::kForwardIterator));
+ language_segmenter->Segment("How are you你好吗お元気ですか"));
// String: "How are you你好吗お元気ですか"
// ^ ^^ ^^ ^ ^ ^ ^ ^ ^
@@ -1197,10 +1145,8 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest,
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
// Multiple continuous whitespaces are treated as one.
constexpr std::string_view kTextWithSpace = "Hello World";
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<LanguageSegmenter::Iterator> itr,
- language_segmenter->Segment(
- kTextWithSpace, LanguageSegmenter::AccessType::kForwardIterator));
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ language_segmenter->Segment(kTextWithSpace));
// String: "Hello World"
// ^ ^ ^
@@ -1238,10 +1184,8 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, ChineseResetToTermBeforeUtf32) {
// CJKT (Chinese, Japanese, Khmer, Thai) are the 4 main languages that
// don't have whitespaces as word delimiter. Chinese
constexpr std::string_view kChinese = "我每天走路去上班。";
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<LanguageSegmenter::Iterator> itr,
- language_segmenter->Segment(
- kChinese, LanguageSegmenter::AccessType::kForwardIterator));
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ language_segmenter->Segment(kChinese));
// String: "我每天走路去上班。"
// ^ ^ ^ ^^
// UTF-8 idx: 0 3 9 15 18
@@ -1264,10 +1208,8 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, JapaneseResetToTermBeforeUtf32) {
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
// Japanese
constexpr std::string_view kJapanese = "私は毎日仕事に歩いています。";
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<LanguageSegmenter::Iterator> itr,
- language_segmenter->Segment(
- kJapanese, LanguageSegmenter::AccessType::kForwardIterator));
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ language_segmenter->Segment(kJapanese));
// String: "私は毎日仕事に歩いています。"
// ^ ^ ^ ^ ^ ^ ^ ^ ^
// UTF-8 idx: 0 3 6 12 18212427 33
@@ -1289,10 +1231,8 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, KhmerResetToTermBeforeUtf32) {
language_segmenter_factory::Create(
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
constexpr std::string_view kKhmer = "ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។";
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<LanguageSegmenter::Iterator> itr,
- language_segmenter->Segment(
- kKhmer, LanguageSegmenter::AccessType::kForwardIterator));
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ language_segmenter->Segment(kKhmer));
// String: "ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។"
// ^ ^ ^ ^
// UTF-8 idx: 0 9 24 45
@@ -1315,10 +1255,8 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, ThaiResetToTermBeforeUtf32) {
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
// Thai
constexpr std::string_view kThai = "ฉันเดินไปทำงานทุกวัน";
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<LanguageSegmenter::Iterator> itr,
- language_segmenter->Segment(
- kThai, LanguageSegmenter::AccessType::kForwardIterator));
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ language_segmenter->Segment(kThai));
// String: "ฉันเดินไปทำงานทุกวัน"
// ^ ^ ^ ^ ^ ^
// UTF-8 idx: 0 9 21 27 42 51
@@ -1360,13 +1298,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, MultipleLangSegmentersTest) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> iterator_one,
- language_segmenter->Segment(
- "foo bar baz", LanguageSegmenter::AccessType::kForwardIterator));
+ language_segmenter->Segment("foo bar baz"));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> iterator_two,
- language_segmenter->Segment(
- "abra kadabra alakazam",
- LanguageSegmenter::AccessType::kForwardIterator));
+ language_segmenter->Segment("abra kadabra alakazam"));
ASSERT_TRUE(iterator_one->Advance());
ASSERT_TRUE(iterator_two->Advance());
diff --git a/icing/tokenization/language-segmenter-iterator_test.cc b/icing/tokenization/language-segmenter-iterator_test.cc
index b14ce19..3aff45c 100644
--- a/icing/tokenization/language-segmenter-iterator_test.cc
+++ b/icing/tokenization/language-segmenter-iterator_test.cc
@@ -54,10 +54,8 @@ TEST_F(LanguageSegmenterIteratorTest, AdvanceAndGetTerm) {
ICING_ASSERT_OK_AND_ASSIGN(
auto language_segmenter,
language_segmenter_factory::Create(std::move(options)));
- ICING_ASSERT_OK_AND_ASSIGN(
- auto iterator,
- language_segmenter->Segment(
- "foo bar", LanguageSegmenter::AccessType::kForwardIterator));
+ ICING_ASSERT_OK_AND_ASSIGN(auto iterator,
+ language_segmenter->Segment("foo bar"));
EXPECT_TRUE(iterator->Advance());
EXPECT_THAT(iterator->GetTerm(), Eq("foo"));
@@ -78,10 +76,8 @@ TEST_F(LanguageSegmenterIteratorTest,
ICING_ASSERT_OK_AND_ASSIGN(
auto language_segmenter,
language_segmenter_factory::Create(std::move(options)));
- ICING_ASSERT_OK_AND_ASSIGN(
- auto iterator,
- language_segmenter->Segment(
- "foo bar", LanguageSegmenter::AccessType::kBidirectionalIterator));
+ ICING_ASSERT_OK_AND_ASSIGN(auto iterator,
+ language_segmenter->Segment("foo bar"));
EXPECT_THAT(iterator->ResetToTermStartingAfterUtf32(/*offset=*/0),
IsOkAndHolds(3)); // The term " "
@@ -98,10 +94,8 @@ TEST_F(LanguageSegmenterIteratorTest,
ICING_ASSERT_OK_AND_ASSIGN(
auto language_segmenter,
language_segmenter_factory::Create(std::move(options)));
- ICING_ASSERT_OK_AND_ASSIGN(
- auto iterator,
- language_segmenter->Segment(
- "foo bar", LanguageSegmenter::AccessType::kBidirectionalIterator));
+ ICING_ASSERT_OK_AND_ASSIGN(auto iterator,
+ language_segmenter->Segment("foo bar"));
EXPECT_THAT(iterator->ResetToTermStartingAfterUtf32(/*offset=*/-1), IsOk());
@@ -119,10 +113,7 @@ TEST_F(LanguageSegmenterIteratorTest,
ICING_ASSERT_OK_AND_ASSIGN(
auto language_segmenter,
language_segmenter_factory::Create(std::move(options)));
- ICING_ASSERT_OK_AND_ASSIGN(
- auto iterator,
- language_segmenter->Segment(
- text, LanguageSegmenter::AccessType::kBidirectionalIterator));
+ ICING_ASSERT_OK_AND_ASSIGN(auto iterator, language_segmenter->Segment(text));
EXPECT_THAT(iterator->ResetToTermStartingAfterUtf32(/*offset=*/text.length()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
@@ -136,10 +127,7 @@ TEST_F(LanguageSegmenterIteratorTest,
ICING_ASSERT_OK_AND_ASSIGN(
auto language_segmenter,
language_segmenter_factory::Create(std::move(options)));
- ICING_ASSERT_OK_AND_ASSIGN(
- auto iterator,
- language_segmenter->Segment(
- text, LanguageSegmenter::AccessType::kBidirectionalIterator));
+ ICING_ASSERT_OK_AND_ASSIGN(auto iterator, language_segmenter->Segment(text));
EXPECT_THAT(iterator->ResetToTermStartingAfterUtf32(/*offset=*/100),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
@@ -152,10 +140,8 @@ TEST_F(LanguageSegmenterIteratorTest,
ICING_ASSERT_OK_AND_ASSIGN(
auto language_segmenter,
language_segmenter_factory::Create(std::move(options)));
- ICING_ASSERT_OK_AND_ASSIGN(
- auto iterator,
- language_segmenter->Segment(
- "foo bar", LanguageSegmenter::AccessType::kBidirectionalIterator));
+ ICING_ASSERT_OK_AND_ASSIGN(auto iterator,
+ language_segmenter->Segment("foo bar"));
EXPECT_THAT(iterator->ResetToTermEndingBeforeUtf32(/*offset=*/6),
IsOkAndHolds(3)); // The term " "
@@ -172,10 +158,8 @@ TEST_F(LanguageSegmenterIteratorTest,
ICING_ASSERT_OK_AND_ASSIGN(
auto language_segmenter,
language_segmenter_factory::Create(std::move(options)));
- ICING_ASSERT_OK_AND_ASSIGN(
- auto iterator,
- language_segmenter->Segment(
- "foo bar", LanguageSegmenter::AccessType::kBidirectionalIterator));
+ ICING_ASSERT_OK_AND_ASSIGN(auto iterator,
+ language_segmenter->Segment("foo bar"));
// Zero is a valid argument, but there aren't any terms that end before it.
EXPECT_THAT(iterator->ResetToTermEndingBeforeUtf32(/*offset=*/0),
@@ -189,10 +173,8 @@ TEST_F(LanguageSegmenterIteratorTest,
ICING_ASSERT_OK_AND_ASSIGN(
auto language_segmenter,
language_segmenter_factory::Create(std::move(options)));
- ICING_ASSERT_OK_AND_ASSIGN(
- auto iterator,
- language_segmenter->Segment(
- "foo bar", LanguageSegmenter::AccessType::kBidirectionalIterator));
+ ICING_ASSERT_OK_AND_ASSIGN(auto iterator,
+ language_segmenter->Segment("foo bar"));
EXPECT_THAT(iterator->ResetToTermEndingBeforeUtf32(/*offset=*/-1),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
@@ -209,10 +191,7 @@ TEST_F(LanguageSegmenterIteratorTest,
ICING_ASSERT_OK_AND_ASSIGN(
auto language_segmenter,
language_segmenter_factory::Create(std::move(options)));
- ICING_ASSERT_OK_AND_ASSIGN(
- auto iterator,
- language_segmenter->Segment(
- text, LanguageSegmenter::AccessType::kBidirectionalIterator));
+ ICING_ASSERT_OK_AND_ASSIGN(auto iterator, language_segmenter->Segment(text));
EXPECT_THAT(iterator->ResetToTermEndingBeforeUtf32(/*offset=*/text.length()),
IsOk());
diff --git a/icing/tokenization/language-segmenter.h b/icing/tokenization/language-segmenter.h
index 83a47d4..913386a 100644
--- a/icing/tokenization/language-segmenter.h
+++ b/icing/tokenization/language-segmenter.h
@@ -38,11 +38,6 @@ namespace lib {
// segmenter->GetAllTerms(text));
class LanguageSegmenter {
public:
- enum class AccessType {
- kForwardIterator,
- kBidirectionalIterator,
- };
-
virtual ~LanguageSegmenter() = default;
// An iterator helping to find terms in the input text.
@@ -170,7 +165,7 @@ class LanguageSegmenter {
// outlives the returned iterator.
virtual libtextclassifier3::StatusOr<
std::unique_ptr<LanguageSegmenter::Iterator>>
- Segment(std::string_view text, AccessType access_type) const = 0;
+ Segment(std::string_view text) const = 0;
// Segments and returns all terms in the input text.
//
diff --git a/icing/tokenization/language-segmenter_benchmark.cc b/icing/tokenization/language-segmenter_benchmark.cc
index 50c625e..748a322 100644
--- a/icing/tokenization/language-segmenter_benchmark.cc
+++ b/icing/tokenization/language-segmenter_benchmark.cc
@@ -68,10 +68,7 @@ void BM_SegmentNoSpace(benchmark::State& state) {
for (auto _ : state) {
std::unique_ptr<LanguageSegmenter::Iterator> iterator =
- language_segmenter
- ->Segment(input_string,
- LanguageSegmenter::AccessType::kForwardIterator)
- .ValueOrDie();
+ language_segmenter->Segment(input_string).ValueOrDie();
while (iterator->Advance()) {
iterator->GetTerm();
}
@@ -111,10 +108,7 @@ void BM_SegmentWithSpaces(benchmark::State& state) {
for (auto _ : state) {
std::unique_ptr<LanguageSegmenter::Iterator> iterator =
- language_segmenter
- ->Segment(input_string,
- LanguageSegmenter::AccessType::kForwardIterator)
- .ValueOrDie();
+ language_segmenter->Segment(input_string).ValueOrDie();
while (iterator->Advance()) {
iterator->GetTerm();
}
@@ -154,10 +148,7 @@ void BM_SegmentCJK(benchmark::State& state) {
for (auto _ : state) {
std::unique_ptr<LanguageSegmenter::Iterator> iterator =
- language_segmenter
- ->Segment(input_string,
- LanguageSegmenter::AccessType::kForwardIterator)
- .ValueOrDie();
+ language_segmenter->Segment(input_string).ValueOrDie();
while (iterator->Advance()) {
iterator->GetTerm();
}
diff --git a/icing/tokenization/plain-tokenizer.cc b/icing/tokenization/plain-tokenizer.cc
index 9175f3a..d40022b 100644
--- a/icing/tokenization/plain-tokenizer.cc
+++ b/icing/tokenization/plain-tokenizer.cc
@@ -130,19 +130,17 @@ class PlainTokenIterator : public Tokenizer::Iterator {
};
libtextclassifier3::StatusOr<std::unique_ptr<Tokenizer::Iterator>>
-PlainTokenizer::Tokenize(std::string_view text,
- LanguageSegmenter::AccessType access_type) const {
+PlainTokenizer::Tokenize(std::string_view text) const {
ICING_ASSIGN_OR_RETURN(
std::unique_ptr<LanguageSegmenter::Iterator> base_iterator,
- language_segmenter_.Segment(text, access_type));
+ language_segmenter_.Segment(text));
return std::make_unique<PlainTokenIterator>(std::move(base_iterator));
}
libtextclassifier3::StatusOr<std::vector<Token>> PlainTokenizer::TokenizeAll(
std::string_view text) const {
- ICING_ASSIGN_OR_RETURN(
- std::unique_ptr<Tokenizer::Iterator> iterator,
- Tokenize(text, LanguageSegmenter::AccessType::kForwardIterator));
+ ICING_ASSIGN_OR_RETURN(std::unique_ptr<Tokenizer::Iterator> iterator,
+ Tokenize(text));
std::vector<Token> tokens;
while (iterator->Advance()) {
std::vector<Token> batch_tokens = iterator->GetTokens();
diff --git a/icing/tokenization/plain-tokenizer.h b/icing/tokenization/plain-tokenizer.h
index 61a8b5a..25b40fd 100644
--- a/icing/tokenization/plain-tokenizer.h
+++ b/icing/tokenization/plain-tokenizer.h
@@ -33,8 +33,7 @@ class PlainTokenizer : public Tokenizer {
: language_segmenter_(*language_segmenter) {}
libtextclassifier3::StatusOr<std::unique_ptr<Tokenizer::Iterator>> Tokenize(
- std::string_view text,
- LanguageSegmenter::AccessType access_type) const override;
+ std::string_view text) const override;
libtextclassifier3::StatusOr<std::vector<Token>> TokenizeAll(
std::string_view text) const override;
diff --git a/icing/tokenization/plain-tokenizer_test.cc b/icing/tokenization/plain-tokenizer_test.cc
index f94a558..6c426da 100644
--- a/icing/tokenization/plain-tokenizer_test.cc
+++ b/icing/tokenization/plain-tokenizer_test.cc
@@ -25,7 +25,6 @@
#include "icing/testing/jni-test-helpers.h"
#include "icing/testing/test-data.h"
#include "icing/tokenization/language-segmenter-factory.h"
-#include "icing/tokenization/language-segmenter.h"
#include "icing/tokenization/tokenizer-factory.h"
#include "unicode/uloc.h"
@@ -68,10 +67,8 @@ TEST_F(PlainTokenizerTest, NoTokensBeforeAdvancing) {
language_segmenter.get()));
constexpr std::string_view kText = "Hello, world!";
- ICING_ASSERT_OK_AND_ASSIGN(
- auto token_iterator,
- plain_tokenizer->Tokenize(
- kText, LanguageSegmenter::AccessType::kForwardIterator));
+ ICING_ASSERT_OK_AND_ASSIGN(auto token_iterator,
+ plain_tokenizer->Tokenize(kText));
// We should get no tokens if we get the token before advancing.
EXPECT_THAT(token_iterator->GetTokens(), IsEmpty());
@@ -89,10 +86,8 @@ TEST_F(PlainTokenizerTest, LastTokenAfterFullyAdvanced) {
language_segmenter.get()));
constexpr std::string_view kText = "Hello, world!";
- ICING_ASSERT_OK_AND_ASSIGN(
- auto token_iterator,
- plain_tokenizer->Tokenize(
- kText, LanguageSegmenter::AccessType::kForwardIterator));
+ ICING_ASSERT_OK_AND_ASSIGN(auto token_iterator,
+ plain_tokenizer->Tokenize(kText));
while (token_iterator->Advance()) {}
@@ -349,10 +344,7 @@ TEST_F(PlainTokenizerTest, ResetToTokenStartingAfterSimple) {
language_segmenter.get()));
constexpr std::string_view kText = "f b";
- auto iterator =
- plain_tokenizer
- ->Tokenize(kText, LanguageSegmenter::AccessType::kBidirectionalIterator)
- .ValueOrDie();
+ auto iterator = plain_tokenizer->Tokenize(kText).ValueOrDie();
EXPECT_TRUE(iterator->ResetToTokenStartingAfter(0));
EXPECT_THAT(iterator->GetTokens(),
@@ -373,10 +365,7 @@ TEST_F(PlainTokenizerTest, ResetToTokenEndingBeforeSimple) {
language_segmenter.get()));
constexpr std::string_view kText = "f b";
- auto iterator =
- plain_tokenizer
- ->Tokenize(kText, LanguageSegmenter::AccessType::kBidirectionalIterator)
- .ValueOrDie();
+ auto iterator = plain_tokenizer->Tokenize(kText).ValueOrDie();
EXPECT_TRUE(iterator->ResetToTokenEndingBefore(2));
EXPECT_THAT(iterator->GetTokens(),
@@ -423,10 +412,7 @@ TEST_F(PlainTokenizerTest, ResetToTokenStartingAfter) {
"bat", // 16: " bat"
};
- auto iterator =
- plain_tokenizer
- ->Tokenize(kText, LanguageSegmenter::AccessType::kBidirectionalIterator)
- .ValueOrDie();
+ auto iterator = plain_tokenizer->Tokenize(kText).ValueOrDie();
EXPECT_TRUE(iterator->Advance());
EXPECT_THAT(iterator->GetTokens(),
ElementsAre(EqualsToken(Token::Type::REGULAR, "foo")));
@@ -480,10 +466,7 @@ TEST_F(PlainTokenizerTest, ResetToTokenEndingBefore) {
"foo", // 4: "foo "
};
- auto iterator =
- plain_tokenizer
- ->Tokenize(kText, LanguageSegmenter::AccessType::kBidirectionalIterator)
- .ValueOrDie();
+ auto iterator = plain_tokenizer->Tokenize(kText).ValueOrDie();
EXPECT_TRUE(iterator->Advance());
EXPECT_THAT(iterator->GetTokens(),
ElementsAre(EqualsToken(Token::Type::REGULAR, "foo")));
diff --git a/icing/tokenization/raw-query-tokenizer.cc b/icing/tokenization/raw-query-tokenizer.cc
index aca317c..1dcbf9b 100644
--- a/icing/tokenization/raw-query-tokenizer.cc
+++ b/icing/tokenization/raw-query-tokenizer.cc
@@ -690,8 +690,7 @@ class RawQueryTokenIterator : public Tokenizer::Iterator {
} // namespace
libtextclassifier3::StatusOr<std::unique_ptr<Tokenizer::Iterator>>
-RawQueryTokenizer::Tokenize(std::string_view text,
- LanguageSegmenter::AccessType) const {
+RawQueryTokenizer::Tokenize(std::string_view text) const {
ICING_ASSIGN_OR_RETURN(std::vector<Token> tokens, TokenizeAll(text));
return std::make_unique<RawQueryTokenIterator>(std::move(tokens));
}
diff --git a/icing/tokenization/raw-query-tokenizer.h b/icing/tokenization/raw-query-tokenizer.h
index 1087b04..6316e45 100644
--- a/icing/tokenization/raw-query-tokenizer.h
+++ b/icing/tokenization/raw-query-tokenizer.h
@@ -33,7 +33,7 @@ class RawQueryTokenizer : public Tokenizer {
: language_segmenter_(*language_segmenter) {}
libtextclassifier3::StatusOr<std::unique_ptr<Tokenizer::Iterator>> Tokenize(
- std::string_view text, LanguageSegmenter::AccessType) const override;
+ std::string_view text) const override;
libtextclassifier3::StatusOr<std::vector<Token>> TokenizeAll(
std::string_view text) const override;
diff --git a/icing/tokenization/raw-query-tokenizer_test.cc b/icing/tokenization/raw-query-tokenizer_test.cc
index 2044f95..a00f2f7 100644
--- a/icing/tokenization/raw-query-tokenizer_test.cc
+++ b/icing/tokenization/raw-query-tokenizer_test.cc
@@ -21,7 +21,6 @@
#include "icing/testing/icu-data-file-helper.h"
#include "icing/testing/test-data.h"
#include "icing/tokenization/language-segmenter-factory.h"
-#include "icing/tokenization/language-segmenter.h"
#include "icing/tokenization/tokenizer-factory.h"
#include "icing/tokenization/tokenizer.h"
#include "unicode/uloc.h"
@@ -61,10 +60,8 @@ TEST_F(RawQueryTokenizerTest, NoTokensBeforeAdvancing) {
language_segmenter.get()));
constexpr std::string_view kText = "Hello, world!";
- ICING_ASSERT_OK_AND_ASSIGN(
- auto token_iterator,
- raw_query_tokenizer->Tokenize(
- kText, LanguageSegmenter::AccessType::kForwardIterator));
+ ICING_ASSERT_OK_AND_ASSIGN(auto token_iterator,
+ raw_query_tokenizer->Tokenize(kText));
// We should get no tokens if we get the token before advancing.
EXPECT_THAT(token_iterator->GetTokens(), IsEmpty());
diff --git a/icing/tokenization/reverse_jni/reverse-jni-break-iterator.cc b/icing/tokenization/reverse_jni/reverse-jni-break-iterator.cc
index 4bb7991..dbd7f5a 100644
--- a/icing/tokenization/reverse_jni/reverse-jni-break-iterator.cc
+++ b/icing/tokenization/reverse_jni/reverse-jni-break-iterator.cc
@@ -31,13 +31,18 @@
namespace icing {
namespace lib {
+namespace {
+// Chosen based on results in go/reverse-jni-benchmarks
+static constexpr int kBatchSize = 100;
+} // namespace
+
// -----------------------------------------------------------------------------
// Implementations that call out to JVM. Behold the beauty.
// -----------------------------------------------------------------------------
libtextclassifier3::StatusOr<std::unique_ptr<ReverseJniBreakIterator>>
ReverseJniBreakIterator::Create(const JniCache* jni_cache,
- std::string_view text, std::string_view locale,
- int batch_size) {
+ std::string_view text,
+ std::string_view locale) {
if (jni_cache == nullptr) {
return absl_ports::InvalidArgumentError(
"Create must be called with a valid JniCache pointer!");
@@ -85,17 +90,15 @@ ReverseJniBreakIterator::Create(const JniCache* jni_cache,
ICING_RETURN_IF_ERROR(libtextclassifier3::JniHelper::CallVoidMethod(
jenv, iterator_batcher.get(), jni_cache->breakiterator_settext,
java_text.get()));
- return std::unique_ptr<ReverseJniBreakIterator>(new ReverseJniBreakIterator(
- jni_cache, std::move(iterator_batcher), batch_size));
+ return std::unique_ptr<ReverseJniBreakIterator>(
+ new ReverseJniBreakIterator(jni_cache, std::move(iterator_batcher)));
}
ReverseJniBreakIterator::ReverseJniBreakIterator(
const JniCache* jni_cache,
- libtextclassifier3::ScopedGlobalRef<jobject> iterator_batcher,
- int batch_size)
+ libtextclassifier3::ScopedGlobalRef<jobject> iterator_batcher)
: jni_cache_(jni_cache),
iterator_batcher_(std::move(iterator_batcher)),
- batch_size_(batch_size),
is_done_(false),
is_almost_done_(false) {}
@@ -110,7 +113,7 @@ int ReverseJniBreakIterator::Next() {
is_done_ = true;
return ReverseJniBreakIterator::kDone;
}
- is_almost_done_ = break_indices_cache_.size() < batch_size_;
+ is_almost_done_ = break_indices_cache_.size() < kBatchSize;
}
int break_index = break_indices_cache_.front();
break_indices_cache_.pop();
@@ -153,7 +156,7 @@ int ReverseJniBreakIterator::FetchNextBatch() {
libtextclassifier3::ScopedLocalRef<jintArray> break_indices,
libtextclassifier3::JniHelper::CallObjectMethod<jintArray>(
jni_cache_->GetEnv(), iterator_batcher_.get(),
- jni_cache_->breakiterator_next, batch_size_),
+ jni_cache_->breakiterator_next, kBatchSize),
ReverseJniBreakIterator::kDone);
if (break_indices == nullptr || jni_cache_->ExceptionCheckAndClear()) {
return ReverseJniBreakIterator::kDone;
diff --git a/icing/tokenization/reverse_jni/reverse-jni-break-iterator.h b/icing/tokenization/reverse_jni/reverse-jni-break-iterator.h
index b1dcc87..537666c 100644
--- a/icing/tokenization/reverse_jni/reverse-jni-break-iterator.h
+++ b/icing/tokenization/reverse_jni/reverse-jni-break-iterator.h
@@ -44,9 +44,6 @@ namespace lib {
// EXPECT_THAT(nexts, ElementsAre(1, 3, 5, 6, 8));
class ReverseJniBreakIterator {
public:
- // Chosen based on results in go/reverse-jni-benchmarks
- static constexpr int kBatchSize = 100;
-
static constexpr int kDone = -1;
// Creates a ReverseJniBreakiterator with the given text and locale.
@@ -57,7 +54,7 @@ class ReverseJniBreakIterator {
// INTERNAL if unable to create any of the required Java objects
static libtextclassifier3::StatusOr<std::unique_ptr<ReverseJniBreakIterator>>
Create(const JniCache* jni_cache, std::string_view text,
- std::string_view locale, int batch_size);
+ std::string_view locale);
// Returns the UTF-16 boundary following the current boundary. If the current
// boundary is the last text boundary, it returns
@@ -91,10 +88,9 @@ class ReverseJniBreakIterator {
private:
ReverseJniBreakIterator(
const JniCache* jni_cache,
- libtextclassifier3::ScopedGlobalRef<jobject> iterator_batcher,
- int batch_size);
+ libtextclassifier3::ScopedGlobalRef<jobject> iterator_batcher);
- // Fetches the results of up to batch_size next calls and stores them in
+ // Fetches the results of up to kBatchSize next calls and stores them in
// break_indices_cache_. Returns the number of results or kDone if no more
// results could be fetched.
int FetchNextBatch();
@@ -113,11 +109,9 @@ class ReverseJniBreakIterator {
// BreakIteratorBatcher#next.
std::queue<int> break_indices_cache_;
- int batch_size_;
-
bool is_done_;
- // The last batch was incomplete (< batch_size_ results were returned). The
+ // The last batch was incomplete (< kBatchSize results were returned). The
// next call to BreakIteratorBatcher#next is guaranteed to return an
// empty array. Once the results from the last batch are evicted from
// break_indices_cache, ReverseJniBreakIterator will transition to is_done_.
diff --git a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter.cc b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter.cc
index e6bcf4b..bd80718 100644
--- a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter.cc
+++ b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter.cc
@@ -293,28 +293,18 @@ class ReverseJniLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
};
libtextclassifier3::StatusOr<std::unique_ptr<LanguageSegmenter::Iterator>>
-ReverseJniLanguageSegmenter::Segment(
- const std::string_view text,
- LanguageSegmenter::AccessType access_type) const {
- // Only batch if we're only doing forward iteration. Bidirectional iteration
- // will result in us frequently discarding unconsumed batched word breaks.
- // Therefore, we won't bother batching them.
- int batch_size =
- (access_type == LanguageSegmenter::AccessType::kForwardIterator)
- ? ReverseJniBreakIterator::kBatchSize
- : 1;
+ReverseJniLanguageSegmenter::Segment(const std::string_view text) const {
ICING_ASSIGN_OR_RETURN(
std::unique_ptr<ReverseJniBreakIterator> break_iterator,
- ReverseJniBreakIterator::Create(jni_cache_, text, locale_, batch_size));
+ ReverseJniBreakIterator::Create(jni_cache_, text, locale_));
return std::make_unique<ReverseJniLanguageSegmenterIterator>(
text, std::move(break_iterator));
}
libtextclassifier3::StatusOr<std::vector<std::string_view>>
ReverseJniLanguageSegmenter::GetAllTerms(const std::string_view text) const {
- ICING_ASSIGN_OR_RETURN(
- std::unique_ptr<LanguageSegmenter::Iterator> iterator,
- Segment(text, LanguageSegmenter::AccessType::kForwardIterator));
+ ICING_ASSIGN_OR_RETURN(std::unique_ptr<LanguageSegmenter::Iterator> iterator,
+ Segment(text));
std::vector<std::string_view> terms;
while (iterator->Advance()) {
terms.push_back(iterator->GetTerm());
diff --git a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter.h b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter.h
index e9f84ad..29df4ee 100644
--- a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter.h
+++ b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter.h
@@ -34,8 +34,7 @@ class ReverseJniLanguageSegmenter : public LanguageSegmenter {
: locale_(std::move(locale)), jni_cache_(jni_cache) {}
libtextclassifier3::StatusOr<std::unique_ptr<LanguageSegmenter::Iterator>>
- Segment(std::string_view text,
- LanguageSegmenter::AccessType access_type) const override;
+ Segment(std::string_view text) const override;
libtextclassifier3::StatusOr<std::vector<std::string_view>> GetAllTerms(
std::string_view text) const override;
diff --git a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter_test.cc b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter_test.cc
index be652ff..47a01fe 100644
--- a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter_test.cc
+++ b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter_test.cc
@@ -394,10 +394,8 @@ TEST_P(ReverseJniLanguageSegmenterTest, ContinuousWhitespaces) {
// iterator is done.
text_with_spaces = absl_ports::StrCat(std::string(kNumSeparators, ' '),
"Hello", " ", "World");
- ICING_ASSERT_OK_AND_ASSIGN(
- auto itr,
- language_segmenter->Segment(
- text_with_spaces, LanguageSegmenter::AccessType::kForwardIterator));
+ ICING_ASSERT_OK_AND_ASSIGN(auto itr,
+ language_segmenter->Segment(text_with_spaces));
std::vector<std::string_view> terms;
while (itr->Advance()) {
terms.push_back(itr->GetTerm());
@@ -493,10 +491,8 @@ TEST_P(ReverseJniLanguageSegmenterTest, ResetToStartUtf32WordConnector) {
auto segmenter, language_segmenter_factory::Create(
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
constexpr std::string_view kText = "com:google:android is package";
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<LanguageSegmenter::Iterator> itr,
- segmenter->Segment(
- kText, LanguageSegmenter::AccessType::kBidirectionalIterator));
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ segmenter->Segment(kText));
// String: "com:google:android is package"
// ^ ^^ ^^
@@ -512,10 +508,8 @@ TEST_P(ReverseJniLanguageSegmenterTest, NewIteratorResetToStartUtf32) {
auto segmenter, language_segmenter_factory::Create(
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
constexpr std::string_view kText = "How are you你好吗お元気ですか";
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<LanguageSegmenter::Iterator> itr,
- segmenter->Segment(
- kText, LanguageSegmenter::AccessType::kBidirectionalIterator));
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ segmenter->Segment(kText));
// String: "How are you你好吗お元気ですか"
// ^ ^^ ^^ ^ ^ ^ ^ ^ ^
@@ -530,10 +524,8 @@ TEST_P(ReverseJniLanguageSegmenterTest, IteratorOneAdvanceResetToStartUtf32) {
auto segmenter, language_segmenter_factory::Create(
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
constexpr std::string_view kText = "How are you你好吗お元気ですか";
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<LanguageSegmenter::Iterator> itr,
- segmenter->Segment(
- kText, LanguageSegmenter::AccessType::kBidirectionalIterator));
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ segmenter->Segment(kText));
// String: "How are you你好吗お元気ですか"
// ^ ^^ ^^ ^ ^ ^ ^ ^ ^
@@ -550,10 +542,8 @@ TEST_P(ReverseJniLanguageSegmenterTest,
auto segmenter, language_segmenter_factory::Create(
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
constexpr std::string_view kText = "How are you你好吗お元気ですか";
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<LanguageSegmenter::Iterator> itr,
- segmenter->Segment(
- kText, LanguageSegmenter::AccessType::kBidirectionalIterator));
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ segmenter->Segment(kText));
// String: "How are you你好吗お元気ですか"
// ^ ^^ ^^ ^ ^ ^ ^ ^ ^
@@ -572,10 +562,8 @@ TEST_P(ReverseJniLanguageSegmenterTest, IteratorDoneResetToStartUtf32) {
auto segmenter, language_segmenter_factory::Create(
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
constexpr std::string_view kText = "How are you你好吗お元気ですか";
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<LanguageSegmenter::Iterator> itr,
- segmenter->Segment(
- kText, LanguageSegmenter::AccessType::kBidirectionalIterator));
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ segmenter->Segment(kText));
// String: "How are you你好吗お元気ですか"
// ^ ^^ ^^ ^ ^ ^ ^ ^ ^
@@ -593,10 +581,8 @@ TEST_P(ReverseJniLanguageSegmenterTest, ResetToTermAfterUtf32WordConnector) {
auto segmenter, language_segmenter_factory::Create(
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
constexpr std::string_view kText = "package com:google:android name";
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<LanguageSegmenter::Iterator> itr,
- segmenter->Segment(
- kText, LanguageSegmenter::AccessType::kBidirectionalIterator));
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ segmenter->Segment(kText));
// String: "package com:google:android name"
// ^ ^^ ^^
@@ -618,10 +604,8 @@ TEST_P(ReverseJniLanguageSegmenterTest, ResetToTermAfterUtf32OutOfBounds) {
auto segmenter, language_segmenter_factory::Create(
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
constexpr std::string_view kText = "How are you你好吗お元気ですか";
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<LanguageSegmenter::Iterator> itr,
- segmenter->Segment(
- kText, LanguageSegmenter::AccessType::kBidirectionalIterator));
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ segmenter->Segment(kText));
// String: "How are you你好吗お元気ですか"
// ^ ^^ ^^ ^ ^ ^ ^ ^ ^
@@ -651,15 +635,13 @@ TEST_P(ReverseJniLanguageSegmenterTest,
constexpr std::string_view kText = "How are𡔖 you你好吗お元気ですか";
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> advance_itr,
- segmenter->Segment(kText,
- LanguageSegmenter::AccessType::kForwardIterator));
+ segmenter->Segment(kText));
std::vector<std::string_view> advance_terms =
GetAllTermsAdvance(advance_itr.get());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr,
- segmenter->Segment(
- kText, LanguageSegmenter::AccessType::kBidirectionalIterator));
+ segmenter->Segment(kText));
std::vector<std::string_view> reset_terms =
GetAllTermsResetAfterUtf32(reset_to_term_itr.get());
@@ -675,15 +657,13 @@ TEST_P(ReverseJniLanguageSegmenterTest,
constexpr std::string_view kThai = "ฉันเดินไปทำงานทุกวัน";
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> advance_itr,
- segmenter->Segment(kThai,
- LanguageSegmenter::AccessType::kForwardIterator));
+ segmenter->Segment(kThai));
std::vector<std::string_view> advance_terms =
GetAllTermsAdvance(advance_itr.get());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr,
- segmenter->Segment(
- kThai, LanguageSegmenter::AccessType::kBidirectionalIterator));
+ segmenter->Segment(kThai));
std::vector<std::string_view> reset_terms =
GetAllTermsResetAfterUtf32(reset_to_term_itr.get());
@@ -699,15 +679,13 @@ TEST_P(ReverseJniLanguageSegmenterTest,
constexpr std::string_view kKorean = "나는 매일 출근합니다.";
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> advance_itr,
- segmenter->Segment(kKorean,
- LanguageSegmenter::AccessType::kForwardIterator));
+ segmenter->Segment(kKorean));
std::vector<std::string_view> advance_terms =
GetAllTermsAdvance(advance_itr.get());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr,
- segmenter->Segment(
- kKorean, LanguageSegmenter::AccessType::kBidirectionalIterator));
+ segmenter->Segment(kKorean));
std::vector<std::string_view> reset_terms =
GetAllTermsResetAfterUtf32(reset_to_term_itr.get());
@@ -727,15 +705,13 @@ TEST_P(ReverseJniLanguageSegmenterTest,
constexpr std::string_view kText = "How are𡔖 you你好吗お元気ですか";
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> advance_itr,
- segmenter->Segment(kText,
- LanguageSegmenter::AccessType::kForwardIterator));
+ segmenter->Segment(kText));
std::vector<std::string_view> advance_terms =
GetAllTermsAdvance(advance_itr.get());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> advance_and_reset_itr,
- segmenter->Segment(
- kText, LanguageSegmenter::AccessType::kBidirectionalIterator));
+ segmenter->Segment(kText));
std::vector<std::string_view> advance_and_reset_terms =
GetAllTermsAdvanceAndResetAfterUtf32(advance_and_reset_itr.get());
@@ -752,15 +728,13 @@ TEST_P(ReverseJniLanguageSegmenterTest,
constexpr std::string_view kThai = "ฉันเดินไปทำงานทุกวัน";
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> advance_itr,
- segmenter->Segment(kThai,
- LanguageSegmenter::AccessType::kForwardIterator));
+ segmenter->Segment(kThai));
std::vector<std::string_view> advance_terms =
GetAllTermsAdvance(advance_itr.get());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> advance_and_reset_itr,
- segmenter->Segment(
- kThai, LanguageSegmenter::AccessType::kBidirectionalIterator));
+ segmenter->Segment(kThai));
std::vector<std::string_view> advance_and_reset_terms =
GetAllTermsAdvanceAndResetAfterUtf32(advance_and_reset_itr.get());
@@ -777,15 +751,13 @@ TEST_P(ReverseJniLanguageSegmenterTest,
constexpr std::string_view kKorean = "나는 매일 출근합니다.";
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> advance_itr,
- segmenter->Segment(kKorean,
- LanguageSegmenter::AccessType::kForwardIterator));
+ segmenter->Segment(kKorean));
std::vector<std::string_view> advance_terms =
GetAllTermsAdvance(advance_itr.get());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> advance_and_reset_itr,
- segmenter->Segment(
- kKorean, LanguageSegmenter::AccessType::kBidirectionalIterator));
+ segmenter->Segment(kKorean));
std::vector<std::string_view> advance_and_reset_terms =
GetAllTermsAdvanceAndResetAfterUtf32(advance_and_reset_itr.get());
@@ -801,9 +773,7 @@ TEST_P(ReverseJniLanguageSegmenterTest, MixedLanguagesResetToTermAfterUtf32) {
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> itr,
- language_segmenter->Segment(
- "How are you你好吗お元気ですか",
- LanguageSegmenter::AccessType::kBidirectionalIterator));
+ language_segmenter->Segment("How are you你好吗お元気ですか"));
// String: "How are you你好吗お元気ですか"
// ^ ^^ ^^ ^ ^ ^ ^ ^ ^
@@ -840,11 +810,8 @@ TEST_P(ReverseJniLanguageSegmenterTest,
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
// Multiple continuous whitespaces are treated as one.
constexpr std::string_view kTextWithSpace = "Hello World";
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<LanguageSegmenter::Iterator> itr,
- language_segmenter->Segment(
- kTextWithSpace,
- LanguageSegmenter::AccessType::kBidirectionalIterator));
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ language_segmenter->Segment(kTextWithSpace));
// String: "Hello World"
// ^ ^ ^
@@ -883,10 +850,8 @@ TEST_P(ReverseJniLanguageSegmenterTest, ChineseResetToTermAfterUtf32) {
// CJKT (Chinese, Japanese, Khmer, Thai) are the 4 main languages that
// don't have whitespaces as word delimiter. Chinese
constexpr std::string_view kChinese = "我每天走路去上班。";
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<LanguageSegmenter::Iterator> itr,
- language_segmenter->Segment(
- kChinese, LanguageSegmenter::AccessType::kBidirectionalIterator));
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ language_segmenter->Segment(kChinese));
// String: "我每天走路去上班。"
// ^ ^ ^ ^^ ^
// UTF-8 idx: 0 3 9 15 18 24
@@ -912,10 +877,8 @@ TEST_P(ReverseJniLanguageSegmenterTest, JapaneseResetToTermAfterUtf32) {
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
// Japanese
constexpr std::string_view kJapanese = "私は毎日仕事に歩いています。";
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<LanguageSegmenter::Iterator> itr,
- language_segmenter->Segment(
- kJapanese, LanguageSegmenter::AccessType::kBidirectionalIterator));
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ language_segmenter->Segment(kJapanese));
// String: "私は毎日仕事に歩いています。"
// ^ ^ ^ ^ ^ ^ ^ ^ ^ ^
// UTF-8 idx: 0 3 6 12 18212427 33 39
@@ -940,10 +903,8 @@ TEST_P(ReverseJniLanguageSegmenterTest, KhmerResetToTermAfterUtf32) {
language_segmenter_factory::Create(
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
constexpr std::string_view kKhmer = "ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។";
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<LanguageSegmenter::Iterator> itr,
- language_segmenter->Segment(
- kKhmer, LanguageSegmenter::AccessType::kBidirectionalIterator));
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ language_segmenter->Segment(kKhmer));
// String: "ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។"
// ^ ^ ^ ^ ^
// UTF-8 idx: 0 9 24 45 69
@@ -969,10 +930,8 @@ TEST_P(ReverseJniLanguageSegmenterTest, ThaiResetToTermAfterUtf32) {
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
// Thai
constexpr std::string_view kThai = "ฉันเดินไปทำงานทุกวัน";
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<LanguageSegmenter::Iterator> itr,
- language_segmenter->Segment(
- kThai, LanguageSegmenter::AccessType::kBidirectionalIterator));
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ language_segmenter->Segment(kThai));
// String: "ฉันเดินไปทำงานทุกวัน"
// ^ ^ ^ ^ ^ ^
// UTF-8 idx: 0 9 21 27 42 51
@@ -996,10 +955,8 @@ TEST_P(ReverseJniLanguageSegmenterTest, ResetToTermBeforeWordConnectorUtf32) {
auto segmenter, language_segmenter_factory::Create(
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
constexpr std::string_view kText = "package name com:google:android!";
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<LanguageSegmenter::Iterator> itr,
- segmenter->Segment(
- kText, LanguageSegmenter::AccessType::kBidirectionalIterator));
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ segmenter->Segment(kText));
// String: "package name com:google:android!"
// ^ ^^ ^^ ^
@@ -1021,10 +978,8 @@ TEST_P(ReverseJniLanguageSegmenterTest, ResetToTermBeforeOutOfBoundsUtf32) {
auto segmenter, language_segmenter_factory::Create(
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
constexpr std::string_view kText = "How are you你好吗お元気ですか";
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<LanguageSegmenter::Iterator> itr,
- segmenter->Segment(
- kText, LanguageSegmenter::AccessType::kBidirectionalIterator));
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ segmenter->Segment(kText));
// String: "How are you你好吗お元気ですか"
// ^ ^^ ^^ ^ ^ ^ ^ ^ ^
@@ -1054,15 +1009,13 @@ TEST_P(ReverseJniLanguageSegmenterTest,
constexpr std::string_view kText = "How are𡔖 you你好吗お元気ですか";
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> advance_itr,
- segmenter->Segment(kText,
- LanguageSegmenter::AccessType::kForwardIterator));
+ segmenter->Segment(kText));
std::vector<std::string_view> advance_terms =
GetAllTermsAdvance(advance_itr.get());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr,
- segmenter->Segment(
- kText, LanguageSegmenter::AccessType::kBidirectionalIterator));
+ segmenter->Segment(kText));
std::vector<std::string_view> reset_terms =
GetAllTermsResetBeforeUtf32(reset_to_term_itr.get());
std::reverse(reset_terms.begin(), reset_terms.end());
@@ -1080,15 +1033,13 @@ TEST_P(ReverseJniLanguageSegmenterTest,
constexpr std::string_view kThai = "ฉันเดินไปทำงานทุกวัน";
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> advance_itr,
- segmenter->Segment(kThai,
- LanguageSegmenter::AccessType::kForwardIterator));
+ segmenter->Segment(kThai));
std::vector<std::string_view> advance_terms =
GetAllTermsAdvance(advance_itr.get());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr,
- segmenter->Segment(
- kThai, LanguageSegmenter::AccessType::kBidirectionalIterator));
+ segmenter->Segment(kThai));
std::vector<std::string_view> reset_terms =
GetAllTermsResetBeforeUtf32(reset_to_term_itr.get());
std::reverse(reset_terms.begin(), reset_terms.end());
@@ -1105,15 +1056,13 @@ TEST_P(ReverseJniLanguageSegmenterTest,
constexpr std::string_view kKorean = "나는 매일 출근합니다.";
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> advance_itr,
- segmenter->Segment(kKorean,
- LanguageSegmenter::AccessType::kForwardIterator));
+ segmenter->Segment(kKorean));
std::vector<std::string_view> advance_terms =
GetAllTermsAdvance(advance_itr.get());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr,
- segmenter->Segment(
- kKorean, LanguageSegmenter::AccessType::kBidirectionalIterator));
+ segmenter->Segment(kKorean));
std::vector<std::string_view> reset_terms =
GetAllTermsResetBeforeUtf32(reset_to_term_itr.get());
std::reverse(reset_terms.begin(), reset_terms.end());
@@ -1129,9 +1078,7 @@ TEST_P(ReverseJniLanguageSegmenterTest, MixedLanguagesResetToTermBeforeUtf32) {
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<LanguageSegmenter::Iterator> itr,
- language_segmenter->Segment(
- "How are you你好吗お元気ですか",
- LanguageSegmenter::AccessType::kBidirectionalIterator));
+ language_segmenter->Segment("How are you你好吗お元気ですか"));
// String: "How are you你好吗お元気ですか"
// ^ ^^ ^^ ^ ^ ^ ^ ^ ^
@@ -1169,11 +1116,8 @@ TEST_P(ReverseJniLanguageSegmenterTest,
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
// Multiple continuous whitespaces are treated as one.
constexpr std::string_view kTextWithSpace = "Hello World";
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<LanguageSegmenter::Iterator> itr,
- language_segmenter->Segment(
- kTextWithSpace,
- LanguageSegmenter::AccessType::kBidirectionalIterator));
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ language_segmenter->Segment(kTextWithSpace));
// String: "Hello World"
// ^ ^ ^
@@ -1211,10 +1155,8 @@ TEST_P(ReverseJniLanguageSegmenterTest, ChineseResetToTermBeforeUtf32) {
// CJKT (Chinese, Japanese, Khmer, Thai) are the 4 main languages that
// don't have whitespaces as word delimiter. Chinese
constexpr std::string_view kChinese = "我每天走路去上班。";
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<LanguageSegmenter::Iterator> itr,
- language_segmenter->Segment(
- kChinese, LanguageSegmenter::AccessType::kBidirectionalIterator));
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ language_segmenter->Segment(kChinese));
// String: "我每天走路去上班。"
// ^ ^ ^ ^^
// UTF-8 idx: 0 3 9 15 18
@@ -1237,10 +1179,8 @@ TEST_P(ReverseJniLanguageSegmenterTest, JapaneseResetToTermBeforeUtf32) {
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
// Japanese
constexpr std::string_view kJapanese = "私は毎日仕事に歩いています。";
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<LanguageSegmenter::Iterator> itr,
- language_segmenter->Segment(
- kJapanese, LanguageSegmenter::AccessType::kBidirectionalIterator));
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ language_segmenter->Segment(kJapanese));
// String: "私は毎日仕事に歩いています。"
// ^ ^ ^ ^ ^ ^ ^ ^ ^
// UTF-8 idx: 0 3 6 12 18212427 33
@@ -1262,10 +1202,8 @@ TEST_P(ReverseJniLanguageSegmenterTest, KhmerResetToTermBeforeUtf32) {
language_segmenter_factory::Create(
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
constexpr std::string_view kKhmer = "ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។";
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<LanguageSegmenter::Iterator> itr,
- language_segmenter->Segment(
- kKhmer, LanguageSegmenter::AccessType::kBidirectionalIterator));
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ language_segmenter->Segment(kKhmer));
// String: "ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។"
// ^ ^ ^ ^
// UTF-8 idx: 0 9 24 45
@@ -1288,10 +1226,8 @@ TEST_P(ReverseJniLanguageSegmenterTest, ThaiResetToTermBeforeUtf32) {
GetSegmenterOptions(GetLocale(), jni_cache_.get())));
// Thai
constexpr std::string_view kThai = "ฉันเดินไปทำงานทุกวัน";
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<LanguageSegmenter::Iterator> itr,
- language_segmenter->Segment(
- kThai, LanguageSegmenter::AccessType::kBidirectionalIterator));
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ language_segmenter->Segment(kThai));
// String: "ฉันเดินไปทำงานทุกวัน"
// ^ ^ ^ ^ ^ ^
// UTF-8 idx: 0 9 21 27 42 51
diff --git a/icing/tokenization/rfc822-tokenizer.cc b/icing/tokenization/rfc822-tokenizer.cc
index 35b82ca..13c58c5 100644
--- a/icing/tokenization/rfc822-tokenizer.cc
+++ b/icing/tokenization/rfc822-tokenizer.cc
@@ -778,15 +778,14 @@ class Rfc822TokenIterator : public Tokenizer::Iterator {
};
libtextclassifier3::StatusOr<std::unique_ptr<Tokenizer::Iterator>>
-Rfc822Tokenizer::Tokenize(std::string_view text,
- LanguageSegmenter::AccessType) const {
+Rfc822Tokenizer::Tokenize(std::string_view text) const {
return std::make_unique<Rfc822TokenIterator>(text);
}
libtextclassifier3::StatusOr<std::vector<Token>> Rfc822Tokenizer::TokenizeAll(
std::string_view text) const {
- std::unique_ptr<Tokenizer::Iterator> iterator =
- std::make_unique<Rfc822TokenIterator>(text);
+ ICING_ASSIGN_OR_RETURN(std::unique_ptr<Tokenizer::Iterator> iterator,
+ Tokenize(text));
std::vector<Token> tokens;
while (iterator->Advance()) {
std::vector<Token> batch_tokens = iterator->GetTokens();
diff --git a/icing/tokenization/rfc822-tokenizer.h b/icing/tokenization/rfc822-tokenizer.h
index 094f1cf..09e4624 100644
--- a/icing/tokenization/rfc822-tokenizer.h
+++ b/icing/tokenization/rfc822-tokenizer.h
@@ -17,7 +17,6 @@
#include <vector>
-#include "icing/tokenization/language-segmenter.h"
#include "icing/tokenization/tokenizer.h"
namespace icing {
@@ -26,7 +25,7 @@ namespace lib {
class Rfc822Tokenizer : public Tokenizer {
public:
libtextclassifier3::StatusOr<std::unique_ptr<Tokenizer::Iterator>> Tokenize(
- std::string_view text, LanguageSegmenter::AccessType) const override;
+ std::string_view text) const override;
libtextclassifier3::StatusOr<std::vector<Token>> TokenizeAll(
std::string_view text) const override;
diff --git a/icing/tokenization/rfc822-tokenizer_test.cc b/icing/tokenization/rfc822-tokenizer_test.cc
index e1a7fc8..ee3a95d 100644
--- a/icing/tokenization/rfc822-tokenizer_test.cc
+++ b/icing/tokenization/rfc822-tokenizer_test.cc
@@ -21,7 +21,6 @@
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "icing/testing/common-matchers.h"
-#include "icing/tokenization/language-segmenter.h"
namespace icing {
namespace lib {
@@ -32,10 +31,7 @@ using ::testing::IsEmpty;
TEST(Rfc822TokenizerTest, StartingState) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string text = "a@g.c";
- auto token_iterator =
- rfc822_tokenizer
- .Tokenize(text, LanguageSegmenter::AccessType::kForwardIterator)
- .ValueOrDie();
+ auto token_iterator = rfc822_tokenizer.Tokenize(text).ValueOrDie();
ASSERT_THAT(token_iterator->GetTokens(), IsEmpty());
ASSERT_TRUE(token_iterator->Advance());
@@ -966,10 +962,7 @@ TEST(Rfc822TokenizerTest, Commas) {
TEST(Rfc822TokenizerTest, ResetToTokenStartingAfter) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string text = "a@g.c,b@g.c";
- auto token_iterator =
- rfc822_tokenizer
- .Tokenize(text, LanguageSegmenter::AccessType::kBidirectionalIterator)
- .ValueOrDie();
+ auto token_iterator = rfc822_tokenizer.Tokenize(text).ValueOrDie();
ASSERT_TRUE(token_iterator->Advance());
ASSERT_TRUE(token_iterator->Advance());
@@ -985,10 +978,7 @@ TEST(Rfc822TokenizerTest, ResetToTokenStartingAfter) {
TEST(Rfc822TokenizerTest, ResetToTokenEndingBefore) {
Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer();
std::string text = "a@g.c,b@g.c";
- auto token_iterator =
- rfc822_tokenizer
- .Tokenize(text, LanguageSegmenter::AccessType::kBidirectionalIterator)
- .ValueOrDie();
+ auto token_iterator = rfc822_tokenizer.Tokenize(text).ValueOrDie();
token_iterator->Advance();
ASSERT_TRUE(token_iterator->ResetToTokenEndingBefore(5));
diff --git a/icing/tokenization/tokenizer.h b/icing/tokenization/tokenizer.h
index 3336266..fb7613f 100644
--- a/icing/tokenization/tokenizer.h
+++ b/icing/tokenization/tokenizer.h
@@ -22,7 +22,6 @@
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/absl_ports/canonical_errors.h"
-#include "icing/tokenization/language-segmenter.h"
#include "icing/tokenization/token.h"
#include "icing/util/character-iterator.h"
@@ -34,10 +33,8 @@ namespace lib {
// iterator or a list of tokens. Example usage:
//
// std::unique_ptr<Tokenizer> tokenizer = GetTokenizer();
-// ICING_ASSIGN_OR_RETURN(
-// std::unique_ptr<Tokenizer::Iterator> iter,
-// tokenizer->Tokenize(text,
-// LanguageSegmenter::AccessType::kForwardIterator));
+// ICING_ASSIGN_OR_RETURN(std::unique_ptr<Tokenizer::Iterator> iter,
+// tokenizer->Tokenize(text));
// ICING_ASSIGN_OR_RETURN(std::vector<Token> tokens,
// tokenizer->TokenizeAll(text));
class Tokenizer {
@@ -79,10 +76,7 @@ class Tokenizer {
// offset. Returns false if there are no valid tokens starting after
// offset.
// Ex.
- // auto iterator =
- // tokenizer.Tokenize("foo bar baz",
- // LanguageSegmenter::AccessType::kForwardIterator)
- // .ValueOrDie();
+ // auto iterator = tokenizer.Tokenize("foo bar baz").ValueOrDie();
// iterator.ResetToTokenStartingAfter(4);
// // The first full token starting after position 4 (the 'b' in "bar") is
// // "baz".
@@ -95,10 +89,8 @@ class Tokenizer {
// offset. Returns false if there are no valid tokens ending
// before offset.
// Ex.
- // auto iterator =
- // tokenizer.Tokenize("foo bar baz",
- // LanguageSegmenter::AccessType::kForwardIterator)
- // .ValueOrDie(); // iterator.ResetToTokenEndingBefore(4);
+ // auto iterator = tokenizer.Tokenize("foo bar baz").ValueOrDie();
+ // iterator.ResetToTokenEndingBefore(4);
// // The first full token ending before position 4 (the 'b' in "bar") is
// // "foo".
// PrintToken(iterator.GetToken()); // prints "foo"
@@ -119,8 +111,7 @@ class Tokenizer {
// types.
// INTERNAL_ERROR if any other errors occur
virtual libtextclassifier3::StatusOr<std::unique_ptr<Iterator>> Tokenize(
- std::string_view text,
- LanguageSegmenter::AccessType access_type) const = 0;
+ std::string_view text) const = 0;
// Tokenizes and returns all tokens in the input text. The input text should
// outlive the returned vector.
diff --git a/icing/tokenization/verbatim-tokenizer.cc b/icing/tokenization/verbatim-tokenizer.cc
index cf6d5e3..9ca611d 100644
--- a/icing/tokenization/verbatim-tokenizer.cc
+++ b/icing/tokenization/verbatim-tokenizer.cc
@@ -124,15 +124,14 @@ class VerbatimTokenIterator : public Tokenizer::Iterator {
};
libtextclassifier3::StatusOr<std::unique_ptr<Tokenizer::Iterator>>
-VerbatimTokenizer::Tokenize(std::string_view text,
- LanguageSegmenter::AccessType) const {
+VerbatimTokenizer::Tokenize(std::string_view text) const {
return std::make_unique<VerbatimTokenIterator>(text);
}
libtextclassifier3::StatusOr<std::vector<Token>> VerbatimTokenizer::TokenizeAll(
std::string_view text) const {
- std::unique_ptr<Tokenizer::Iterator> iterator =
- std::make_unique<VerbatimTokenIterator>(text);
+ ICING_ASSIGN_OR_RETURN(std::unique_ptr<Tokenizer::Iterator> iterator,
+ Tokenize(text));
std::vector<Token> tokens;
while (iterator->Advance()) {
std::vector<Token> batch = iterator->GetTokens();
diff --git a/icing/tokenization/verbatim-tokenizer.h b/icing/tokenization/verbatim-tokenizer.h
index c3746af..8404cf1 100644
--- a/icing/tokenization/verbatim-tokenizer.h
+++ b/icing/tokenization/verbatim-tokenizer.h
@@ -20,7 +20,6 @@
#include <vector>
#include "icing/text_classifier/lib3/utils/base/statusor.h"
-#include "icing/tokenization/language-segmenter.h"
#include "icing/tokenization/tokenizer.h"
namespace icing {
@@ -30,7 +29,7 @@ namespace lib {
class VerbatimTokenizer : public Tokenizer {
public:
libtextclassifier3::StatusOr<std::unique_ptr<Tokenizer::Iterator>> Tokenize(
- std::string_view text, LanguageSegmenter::AccessType) const override;
+ std::string_view text) const override;
libtextclassifier3::StatusOr<std::vector<Token>> TokenizeAll(
std::string_view text) const override;
diff --git a/icing/tokenization/verbatim-tokenizer_test.cc b/icing/tokenization/verbatim-tokenizer_test.cc
index 5aeb343..bae69ff 100644
--- a/icing/tokenization/verbatim-tokenizer_test.cc
+++ b/icing/tokenization/verbatim-tokenizer_test.cc
@@ -22,7 +22,6 @@
#include "icing/testing/jni-test-helpers.h"
#include "icing/testing/test-data.h"
#include "icing/tokenization/language-segmenter-factory.h"
-#include "icing/tokenization/language-segmenter.h"
#include "icing/tokenization/token.h"
#include "icing/tokenization/tokenizer-factory.h"
#include "icing/util/character-iterator.h"
@@ -95,10 +94,7 @@ TEST_F(VerbatimTokenizerTest, NoTokensBeforeAdvancing) {
language_segmenter_.get()));
constexpr std::string_view kText = "Hello, world!";
- auto token_iterator =
- verbatim_tokenizer
- ->Tokenize(kText, LanguageSegmenter::AccessType::kForwardIterator)
- .ValueOrDie();
+ auto token_iterator = verbatim_tokenizer->Tokenize(kText).ValueOrDie();
// We should get no tokens if we get the token before advancing.
EXPECT_THAT(token_iterator->GetTokens(), IsEmpty());
@@ -111,10 +107,7 @@ TEST_F(VerbatimTokenizerTest, ResetToTokenEndingBefore) {
language_segmenter_.get()));
constexpr std::string_view kText = "Hello, world!";
- auto token_iterator =
- verbatim_tokenizer
- ->Tokenize(kText, LanguageSegmenter::AccessType::kBidirectionalIterator)
- .ValueOrDie();
+ auto token_iterator = verbatim_tokenizer->Tokenize(kText).ValueOrDie();
// Reset to beginning of verbatim of token. We provide an offset of 13 as it
// is larger than the final index (12) of the verbatim token.
@@ -141,10 +134,7 @@ TEST_F(VerbatimTokenizerTest, ResetToTokenStartingAfter) {
language_segmenter_.get()));
constexpr std::string_view kText = "Hello, world!";
- auto token_iterator =
- verbatim_tokenizer
- ->Tokenize(kText, LanguageSegmenter::AccessType::kBidirectionalIterator)
- .ValueOrDie();
+ auto token_iterator = verbatim_tokenizer->Tokenize(kText).ValueOrDie();
// Get token without resetting
EXPECT_TRUE(token_iterator->Advance());
@@ -169,10 +159,7 @@ TEST_F(VerbatimTokenizerTest, ResetToStart) {
language_segmenter_.get()));
constexpr std::string_view kText = "Hello, world!";
- auto token_iterator =
- verbatim_tokenizer
- ->Tokenize(kText, LanguageSegmenter::AccessType::kBidirectionalIterator)
- .ValueOrDie();
+ auto token_iterator = verbatim_tokenizer->Tokenize(kText).ValueOrDie();
// Get token without resetting
EXPECT_TRUE(token_iterator->Advance());
@@ -192,10 +179,7 @@ TEST_F(VerbatimTokenizerTest, CalculateTokenStart) {
language_segmenter_.get()));
constexpr std::string_view kText = "Hello, world!";
- auto token_iterator =
- verbatim_tokenizer
- ->Tokenize(kText, LanguageSegmenter::AccessType::kForwardIterator)
- .ValueOrDie();
+ auto token_iterator = verbatim_tokenizer->Tokenize(kText).ValueOrDie();
ICING_ASSERT_OK_AND_ASSIGN(CharacterIterator start_character_iterator,
token_iterator->CalculateTokenStart());
@@ -211,10 +195,7 @@ TEST_F(VerbatimTokenizerTest, CalculateTokenEnd) {
language_segmenter_.get()));
constexpr std::string_view kText = "Hello, world!";
- auto token_iterator =
- verbatim_tokenizer
- ->Tokenize(kText, LanguageSegmenter::AccessType::kForwardIterator)
- .ValueOrDie();
+ auto token_iterator = verbatim_tokenizer->Tokenize(kText).ValueOrDie();
ICING_ASSERT_OK_AND_ASSIGN(CharacterIterator end_character_iterator,
token_iterator->CalculateTokenEndExclusive());
diff --git a/icing/util/document-validator.cc b/icing/util/document-validator.cc
index ca15ee3..9d5fea7 100644
--- a/icing/util/document-validator.cc
+++ b/icing/util/document-validator.cc
@@ -151,15 +151,19 @@ libtextclassifier3::Status DocumentValidator::Validate(
// fail, we don't need to validate the extra documents.
if (property_config.data_type() ==
PropertyConfigProto::DataType::DOCUMENT) {
- const std::string_view nested_type_expected =
- property_config.schema_type();
+ ICING_ASSIGN_OR_RETURN(
+ const std::unordered_set<SchemaTypeId>* nested_type_ids_expected,
+ schema_store_->GetSchemaTypeIdsWithChildren(
+ property_config.schema_type()));
for (const DocumentProto& nested_document : property.document_values()) {
- if (nested_type_expected.compare(nested_document.schema()) != 0) {
+ libtextclassifier3::StatusOr<SchemaTypeId> nested_document_type_id_or =
+ schema_store_->GetSchemaTypeId(nested_document.schema());
+ if (!nested_document_type_id_or.ok() ||
+ nested_type_ids_expected->count(
+ nested_document_type_id_or.ValueOrDie()) == 0) {
return absl_ports::InvalidArgumentError(absl_ports::StrCat(
- "Property '", property.name(), "' should have type '",
- nested_type_expected,
- "' but actual "
- "value has type '",
+ "Property '", property.name(), "' should be type or subtype of '",
+ property_config.schema_type(), "' but actual value has type '",
nested_document.schema(), "' for key: (", document.namespace_(),
", ", document.uri(), ")."));
}
diff --git a/icing/util/document-validator_test.cc b/icing/util/document-validator_test.cc
index 310494a..9d10b36 100644
--- a/icing/util/document-validator_test.cc
+++ b/icing/util/document-validator_test.cc
@@ -35,13 +35,16 @@ namespace {
using ::testing::HasSubstr;
-// type and property names of EmailMessage
+// type and property names of EmailMessage and EmailMessageWithNote
constexpr char kTypeEmail[] = "EmailMessage";
+constexpr char kTypeEmailWithNote[] = "EmailMessageWithNote";
constexpr char kPropertySubject[] = "subject";
constexpr char kPropertyText[] = "text";
constexpr char kPropertyRecipients[] = "recipients";
+constexpr char kPropertyNote[] = "note";
// type and property names of Conversation
constexpr char kTypeConversation[] = "Conversation";
+constexpr char kTypeConversationWithEmailNote[] = "ConversationWithEmailNote";
constexpr char kPropertyName[] = "name";
constexpr char kPropertyEmails[] = "emails";
// Other values
@@ -72,6 +75,26 @@ class DocumentValidatorTest : public ::testing::Test {
.SetCardinality(CARDINALITY_REPEATED)))
.AddType(
SchemaTypeConfigBuilder()
+ .SetType(kTypeEmailWithNote)
+ .AddParentType(kTypeEmail)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertySubject)
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyText)
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyRecipients)
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_REPEATED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyNote)
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(
+ SchemaTypeConfigBuilder()
.SetType(kTypeConversation)
.AddProperty(PropertyConfigBuilder()
.SetName(kPropertyName)
@@ -83,6 +106,19 @@ class DocumentValidatorTest : public ::testing::Test {
.SetDataTypeDocument(
kTypeEmail, /*index_nested_properties=*/true)
.SetCardinality(CARDINALITY_REPEATED)))
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType(kTypeConversationWithEmailNote)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyName)
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyEmails)
+ .SetDataTypeDocument(
+ kTypeEmailWithNote,
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_REPEATED)))
.Build();
schema_dir_ = GetTestTempDir() + "/schema_store";
@@ -90,13 +126,16 @@ class DocumentValidatorTest : public ::testing::Test {
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
SchemaStore::Create(&filesystem_, schema_dir_, &fake_clock_));
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
document_validator_ =
std::make_unique<DocumentValidator>(schema_store_.get());
}
- static DocumentBuilder SimpleEmailBuilder() {
+ DocumentBuilder SimpleEmailBuilder() {
return DocumentBuilder()
.SetKey(kDefaultNamespace, "email/1")
.SetSchema(kTypeEmail)
@@ -106,7 +145,18 @@ class DocumentValidatorTest : public ::testing::Test {
kDefaultString);
}
- static DocumentBuilder SimpleConversationBuilder() {
+ DocumentBuilder SimpleEmailWithNoteBuilder() {
+ return DocumentBuilder()
+ .SetKey(kDefaultNamespace, "email_with_note/1")
+ .SetSchema(kTypeEmailWithNote)
+ .AddStringProperty(kPropertySubject, kDefaultString)
+ .AddStringProperty(kPropertyText, kDefaultString)
+ .AddStringProperty(kPropertyRecipients, kDefaultString, kDefaultString,
+ kDefaultString)
+ .AddStringProperty(kPropertyNote, kDefaultString);
+ }
+
+ DocumentBuilder SimpleConversationBuilder() {
return DocumentBuilder()
.SetKey(kDefaultNamespace, "conversation/1")
.SetSchema(kTypeConversation)
@@ -299,10 +349,82 @@ TEST_F(DocumentValidatorTest,
SimpleEmailBuilder().Build())
.Build();
- EXPECT_THAT(document_validator_->Validate(conversation),
- StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
- HasSubstr("'emails' should have type 'EmailMessage' but "
- "actual value has type 'Conversation'")));
+ EXPECT_THAT(
+ document_validator_->Validate(conversation),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+ HasSubstr("'emails' should be type or subtype of 'EmailMessage' "
+ "but actual value has type 'Conversation'")));
+}
+
+TEST_F(DocumentValidatorTest, ValidateNestedPropertyMatchSubtypeOk) {
+ DocumentProto conversation =
+ DocumentBuilder()
+ .SetKey(kDefaultNamespace, "conversation/1")
+ .SetSchema(kTypeConversation)
+ .AddStringProperty(kPropertyName, kDefaultString)
+ .AddDocumentProperty(kPropertyEmails, SimpleEmailBuilder().Build(),
+ // This is a subtype, which is ok.
+ SimpleEmailWithNoteBuilder().Build(),
+ SimpleEmailBuilder().Build())
+ .Build();
+
+ EXPECT_THAT(document_validator_->Validate(conversation), IsOk());
+}
+
+TEST_F(DocumentValidatorTest, ValidateNestedPropertyNonexistentTypeInvalid) {
+ DocumentProto conversation =
+ DocumentBuilder()
+ .SetKey(kDefaultNamespace, "conversation/1")
+ .SetSchema(kTypeConversation)
+ .AddStringProperty(kPropertyName, kDefaultString)
+ .AddDocumentProperty(
+ kPropertyEmails, SimpleEmailBuilder().Build(),
+ // Nonexistent type is not allowed
+ DocumentBuilder()
+ .SetKey(kDefaultNamespace, "email_with_note/1")
+ .SetSchema("Nonexistent")
+ .Build(),
+ SimpleEmailBuilder().Build())
+ .Build();
+
+ EXPECT_THAT(
+ document_validator_->Validate(conversation),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+ HasSubstr("'emails' should be type or subtype of 'EmailMessage' "
+ "but actual value has type 'Nonexistent'")));
+}
+
+TEST_F(DocumentValidatorTest, ValidateNestedPropertyMatchSuperTypeInvalid) {
+ DocumentProto conversation1 =
+ DocumentBuilder()
+ .SetKey(kDefaultNamespace, "conversation_with_email_note/1")
+ .SetSchema(kTypeConversationWithEmailNote)
+ .AddStringProperty(kPropertyName, kDefaultString)
+ .AddDocumentProperty(kPropertyEmails,
+ SimpleEmailWithNoteBuilder().Build(),
+ SimpleEmailWithNoteBuilder().Build(),
+ SimpleEmailWithNoteBuilder().Build())
+ .Build();
+ EXPECT_THAT(document_validator_->Validate(conversation1), IsOk());
+
+ DocumentProto conversation2 =
+ DocumentBuilder()
+ .SetKey(kDefaultNamespace, "conversation_with_email_note/2")
+ .SetSchema(kTypeConversationWithEmailNote)
+ .AddStringProperty(kPropertyName, kDefaultString)
+ .AddDocumentProperty(kPropertyEmails,
+ SimpleEmailWithNoteBuilder().Build(),
+ // This is a super type, which is not ok.
+ SimpleEmailBuilder().Build(),
+ SimpleEmailWithNoteBuilder().Build())
+ .Build();
+ EXPECT_THAT(
+ document_validator_->Validate(conversation2),
+ StatusIs(
+ libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+ HasSubstr(
+ "'emails' should be type or subtype of 'EmailMessageWithNote' "
+ "but actual value has type 'EmailMessage'")));
}
TEST_F(DocumentValidatorTest, ValidateNestedPropertyInvalid) {
@@ -351,7 +473,10 @@ TEST_F(DocumentValidatorTest, HandleTypeConfigMapChangesOk) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem_, custom_schema_dir, &fake_clock_));
- ASSERT_THAT(schema_store->SetSchema(email_schema), IsOk());
+ ASSERT_THAT(schema_store->SetSchema(
+ email_schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
DocumentValidator document_validator(schema_store.get());
@@ -381,7 +506,11 @@ TEST_F(DocumentValidatorTest, HandleTypeConfigMapChangesOk) {
// DocumentValidator should be able to handle the SchemaStore getting updated
// separately
- ASSERT_THAT(schema_store->SetSchema(email_and_conversation_schema), IsOk());
+ ASSERT_THAT(
+ schema_store->SetSchema(email_and_conversation_schema,
+ /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
ICING_EXPECT_OK(document_validator.Validate(conversation));
}
diff --git a/icing/util/tokenized-document.cc b/icing/util/tokenized-document.cc
index 004181e..19aaddf 100644
--- a/icing/util/tokenized-document.cc
+++ b/icing/util/tokenized-document.cc
@@ -45,10 +45,8 @@ libtextclassifier3::StatusOr<std::vector<TokenizedSection>> Tokenize(
section.metadata.tokenizer, language_segmenter));
std::vector<std::string_view> token_sequence;
for (std::string_view subcontent : section.content) {
- ICING_ASSIGN_OR_RETURN(
- std::unique_ptr<Tokenizer::Iterator> itr,
- tokenizer->Tokenize(subcontent,
- LanguageSegmenter::AccessType::kForwardIterator));
+ ICING_ASSIGN_OR_RETURN(std::unique_ptr<Tokenizer::Iterator> itr,
+ tokenizer->Tokenize(subcontent));
while (itr->Advance()) {
std::vector<Token> batch_tokens = itr->GetTokens();
for (const Token& token : batch_tokens) {
diff --git a/icing/util/tokenized-document_test.cc b/icing/util/tokenized-document_test.cc
index c0b20bb..7c97776 100644
--- a/icing/util/tokenized-document_test.cc
+++ b/icing/util/tokenized-document_test.cc
@@ -44,9 +44,9 @@ namespace lib {
namespace {
+using ::icing::lib::portable_equals_proto::EqualsProto;
using ::testing::ElementsAre;
using ::testing::Eq;
-using ::icing::lib::portable_equals_proto::EqualsProto;
using ::testing::IsEmpty;
using ::testing::SizeIs;
@@ -168,7 +168,9 @@ class TokenizedDocumentTest : public ::testing::Test {
JOINABLE_VALUE_TYPE_QUALIFIED_ID)
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
- ICING_ASSERT_OK(schema_store_->SetSchema(schema));
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false));
}
void TearDown() override {
diff --git a/proto/icing/proto/initialize.proto b/proto/icing/proto/initialize.proto
index db5dbed..18884c6 100644
--- a/proto/icing/proto/initialize.proto
+++ b/proto/icing/proto/initialize.proto
@@ -23,7 +23,7 @@ option java_package = "com.google.android.icing.proto";
option java_multiple_files = true;
option objc_class_prefix = "ICNG";
-// Next tag: 8
+// Next tag: 9
message IcingSearchEngineOptions {
// Directory to persist files for Icing. Required.
// If Icing was previously initialized with this directory, it will reload
@@ -82,6 +82,20 @@ message IcingSearchEngineOptions {
// Optional.
optional int32 compression_level = 7 [default = 3];
+ // OPTIONAL: Whether to allow circular references between schema types for
+ // the schema definition.
+ //
+ // Even when set to true, circular references are still not allowed in the
+ // following cases:
+ // 1. All edges of a cycle have index_nested_properties=true
+ // 2. One of the types in the cycle has a joinable property, or depends on
+ // a type with a joinable property.
+ // This is because such a cycle would lead to an infinite number of
+ // indexed/joinable properties:
+ //
+ // The default value is false.
+ optional bool allow_circular_schema_definitions = 8;
+
reserved 2;
}
diff --git a/proto/icing/proto/logging.proto b/proto/icing/proto/logging.proto
index 04f655d..ca795cd 100644
--- a/proto/icing/proto/logging.proto
+++ b/proto/icing/proto/logging.proto
@@ -49,6 +49,9 @@ message InitializeStatsProto {
// The document log is using legacy format.
LEGACY_DOCUMENT_LOG_FORMAT = 5;
+
+ // The current code version is different from existing data version.
+ VERSION_CHANGED = 6;
}
// Possible recovery causes for document store:
@@ -114,7 +117,7 @@ message InitializeStatsProto {
}
// Stats of the top-level function IcingSearchEngine::Put().
-// Next tag: 7
+// Next tag: 10
message PutDocumentStatsProto {
// Overall time used for the function call.
optional int32 latency_ms = 1;
@@ -122,8 +125,7 @@ message PutDocumentStatsProto {
// Time used to store the document.
optional int32 document_store_latency_ms = 2;
- // Time used to index the document. It does not include the time to merge
- // indices.
+ // Time used to index the document.
optional int32 index_latency_ms = 3;
// Time used to merge the indices.
@@ -139,6 +141,16 @@ message PutDocumentStatsProto {
reserved 2;
}
optional TokenizationStats tokenization_stats = 6;
+
+ // Time used to index all indexable string terms in the document. It does not
+ // include the time to merge indices.
+ optional int32 term_index_latency_ms = 7;
+
+ // Time used to index all indexable integers in the document.
+ optional int32 integer_index_latency_ms = 8;
+
+ // Time used to index all qualified id join strings in the document.
+ optional int32 qualified_id_join_index_latency_ms = 9;
}
// Stats of the top-level function IcingSearchEngine::Search() and
diff --git a/proto/icing/proto/schema.proto b/proto/icing/proto/schema.proto
index dc625fc..b972ece 100644
--- a/proto/icing/proto/schema.proto
+++ b/proto/icing/proto/schema.proto
@@ -60,11 +60,11 @@ message SchemaTypeConfigProto {
// it will default to value == 0.
optional int32 version = 5;
- // An experimental field to make the type as a subtype of parent_type, which
- // enables parent_type to be interpreted as its subtypes in the context of the
- // Search APIs, including schema type filters and projections specified in
+ // An experimental field to make the type as a subtype of parent_types, which
+ // enables parent_types to be interpreted as its subtypes in the context of
+ // the Search APIs, including schema type filters and projections specified in
// TypePropertyMask.
- optional string parent_type = 6;
+ repeated string parent_types = 6;
reserved 2, 3;
}
diff --git a/proto/icing/proto/search.proto b/proto/icing/proto/search.proto
index 8bdbf0c..e5ad269 100644
--- a/proto/icing/proto/search.proto
+++ b/proto/icing/proto/search.proto
@@ -98,7 +98,7 @@ message SearchSpecProto {
// Client-supplied specifications on what to include/how to format the search
// results.
-// Next tag: 8
+// Next tag: 9
message ResultSpecProto {
// The results will be returned in pages, and num_per_page specifies the
// number of documents in one page.
@@ -199,6 +199,10 @@ message ResultSpecProto {
NAMESPACE_AND_SCHEMA_TYPE = 3;
}
optional ResultGroupingType result_group_type = 7;
+
+ // The max # of child documents will be attached and returned in the result
+ // for each parent. It is only used for join API.
+ optional int32 max_joined_children_per_parent_to_return = 8;
}
// The representation of a single match within a DocumentProto property.
@@ -499,7 +503,10 @@ message JoinSpecProto {
optional string child_property_expression = 3;
// The max number of child documents to join to a parent document.
- optional int32 max_joined_child_count = 4;
+ // DEPRECATED: use ResultSpecProto.max_joined_children_per_parent_to_return to
+ // control the number of children that are returned. There is no supported
+ // control for the number of children being scored at this time.
+ optional int32 max_joined_child_count = 4 [deprecated = true];
// The strategy by which to score the aggregation of child documents. For
// example, you might want to know which entity document has the most actions
diff --git a/synced_AOSP_CL_number.txt b/synced_AOSP_CL_number.txt
index 0d6bfb4..ae59ff7 100644
--- a/synced_AOSP_CL_number.txt
+++ b/synced_AOSP_CL_number.txt
@@ -1 +1 @@
-set(synced_AOSP_CL_number=524885330)
+set(synced_AOSP_CL_number=531296607)