diff options
author | Tim <tjbarron@google.com> | 2023-05-11 20:56:04 +0000 |
---|---|---|
committer | Tim <tjbarron@google.com> | 2023-05-11 21:23:49 +0000 |
commit | b3b664dc373ddd2ff1c004cdcafd5c04bf82bdd5 (patch) | |
tree | cc621f7731dc8af9b55ae2356c14fe3e125cd5d2 | |
parent | 72d892535dfa5dca1366ae7b34b29bcd236bc0c7 (diff) | |
parent | fb6eb3c7c025b798b13ae36b923aac8c6ebe24bd (diff) | |
download | icing-b3b664dc373ddd2ff1c004cdcafd5c04bf82bdd5.tar.gz |
Merge remote-tracking branch 'goog/upstream-master' into androidx-platform-dev
Update Icing from upstream.
Descriptions:
========================================================================
Handle version changes in the schema store.
========================================================================
Modify the definition of propertyDefined:
========================================================================
Remove default args in SchemaStore::SetSchema and fix calls
========================================================================
Add allow_circular_schema_definitions flag
========================================================================
Onboard version detection to Icing
========================================================================
Add version util to help read/write version info
========================================================================
Add support for the overlay schema.
========================================================================
Allow cycles in schema-property-iterator
========================================================================
Add joinable properties into schema definition cycle restrictions.
========================================================================
Loosen circular references restriction for Schema Definitions.
========================================================================
Implement BackupSchemaProducer to generate a backup schema
========================================================================
Minor fix: remove a redundant log
========================================================================
Allow schema types to inherit from more than one parent
========================================================================
allow nested document properties to accept documents of subtype
========================================================================
Support polymorphism for Icing projection in Search and Get API
========================================================================
Add max_joined_child_per_parent into ResultSpec and change behavior
========================================================================
Support Icing schema type polymorphism for the search filter API
========================================================================
Verify that every child type's property set has included all compatible properties from parent types
========================================================================
Add individual type index latency
========================================================================
Build the iterator node for the propertyDefined() custom function
========================================================================
Advance all hits with same doc id from and merge sections once for the same bucket iter
========================================================================
Introduce DocHitInfoIteratorPropertyInSchema for property existence check
========================================================================
Add SchemaUtil::BuildTransitiveInheritanceGraph to build an inheritance map from schema
========================================================================
Introduce a lookup method for a property defined in a schema
========================================================================
Rollback of: Allow LanguageSegmenter::Iterators to declare AccessType.
========================================================================
Adds join info to QueryStatsProto
========================================================================
Bug:280698419
Bug:280698125
Bug:280698121
Bug:280697513
Bug:276349029
Bug:272145329
Bug:270102295
Bug:269295094
Bug:268680462
Bug:265304217
Bug:259744228
Bug:259743562
Bug:256022027
* goog/upstream-master:
Update Icing from upstream.
Update Icing from upstream.
Change-Id: Ia9c5c88bf8e43122204acc8f5231fb8bf65019e3
127 files changed, 11433 insertions, 1608 deletions
diff --git a/icing/file/posting_list/flash-index-storage.cc b/icing/file/posting_list/flash-index-storage.cc index 2ba24a3..cd7ac12 100644 --- a/icing/file/posting_list/flash-index-storage.cc +++ b/icing/file/posting_list/flash-index-storage.cc @@ -52,6 +52,27 @@ libtextclassifier3::StatusOr<FlashIndexStorage> FlashIndexStorage::Create( return storage; } +/* static */ libtextclassifier3::StatusOr<int> +FlashIndexStorage::ReadHeaderMagic(const Filesystem* filesystem, + const std::string& index_filename) { + ICING_RETURN_ERROR_IF_NULL(filesystem); + + if (!filesystem->FileExists(index_filename.c_str())) { + return absl_ports::NotFoundError("Flash index file doesn't exist"); + } + + ScopedFd sfd(filesystem->OpenForRead(index_filename.c_str())); + if (!sfd.is_valid()) { + return absl_ports::InternalError("Fail to open flash index file"); + } + + uint32_t block_size = SelectBlockSize(); + // Read and validate header. + ICING_ASSIGN_OR_RETURN(HeaderBlock header_block, + HeaderBlock::Read(filesystem, sfd.get(), block_size)); + return header_block.header()->magic; +} + FlashIndexStorage::~FlashIndexStorage() { if (header_block_ != nullptr) { FlushInMemoryFreeList(); diff --git a/icing/file/posting_list/flash-index-storage.h b/icing/file/posting_list/flash-index-storage.h index 05feb08..378b2dc 100644 --- a/icing/file/posting_list/flash-index-storage.h +++ b/icing/file/posting_list/flash-index-storage.h @@ -98,6 +98,17 @@ class FlashIndexStorage { std::string index_filename, const Filesystem* filesystem, PostingListSerializer* serializer, bool in_memory = true); + // Reads magic from existing file header. We need this during Icing + // initialization phase to determine the version. + // + // RETURNS: + // - On success, a valid magic + // - FAILED_PRECONDITION_ERROR if filesystem is null + // - NOT_FOUND_ERROR if the flash index file doesn't exist + // - INTERNAL_ERROR on I/O error + static libtextclassifier3::StatusOr<int> ReadHeaderMagic( + const Filesystem* filesystem, const std::string& index_filename); + FlashIndexStorage(FlashIndexStorage&&) = default; FlashIndexStorage(const FlashIndexStorage&) = delete; FlashIndexStorage& operator=(FlashIndexStorage&&) = default; diff --git a/icing/file/posting_list/flash-index-storage_test.cc b/icing/file/posting_list/flash-index-storage_test.cc index e63f5b0..3e2d239 100644 --- a/icing/file/posting_list/flash-index-storage_test.cc +++ b/icing/file/posting_list/flash-index-storage_test.cc @@ -26,6 +26,7 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" #include "icing/file/filesystem.h" +#include "icing/file/posting_list/flash-index-storage-header.h" #include "icing/index/hit/hit.h" #include "icing/index/main/posting-list-hit-serializer.h" #include "icing/store/document-id.h" @@ -42,6 +43,7 @@ using ::testing::Eq; using ::testing::IsEmpty; using ::testing::IsFalse; using ::testing::IsTrue; +using ::testing::Ne; using ::testing::Not; class FlashIndexStorageTest : public testing::Test { @@ -67,6 +69,50 @@ class FlashIndexStorageTest : public testing::Test { std::unique_ptr<PostingListHitSerializer> serializer_; }; +TEST_F(FlashIndexStorageTest, ReadHeaderMagic) { + { + ICING_ASSERT_OK_AND_ASSIGN( + FlashIndexStorage flash_index_storage, + FlashIndexStorage::Create(file_name_, &filesystem_, serializer_.get())); + } + EXPECT_THAT(FlashIndexStorage::ReadHeaderMagic(&filesystem_, file_name_), + IsOkAndHolds(HeaderBlock::Header::kMagic)); +} + +TEST_F(FlashIndexStorageTest, ReadHeaderMagicOldVersion) { + int block_size; + { + ICING_ASSERT_OK_AND_ASSIGN( + FlashIndexStorage flash_index_storage, + FlashIndexStorage::Create(file_name_, &filesystem_, serializer_.get())); + block_size = flash_index_storage.block_size(); + } + + int old_magic = 0x6dfba6ae; + ASSERT_THAT(old_magic, Ne(HeaderBlock::Header::kMagic)); + { + // Manually modify the header magic. + ScopedFd sfd(filesystem_.OpenForWrite(file_name_.c_str())); + ASSERT_THAT(sfd.is_valid(), IsTrue()); + + // Read and validate header. + ICING_ASSERT_OK_AND_ASSIGN( + HeaderBlock header_block, + HeaderBlock::Read(&filesystem_, sfd.get(), block_size)); + header_block.header()->magic = old_magic; + ASSERT_THAT(header_block.Write(sfd.get()), IsTrue()); + } + + EXPECT_THAT(FlashIndexStorage::ReadHeaderMagic(&filesystem_, file_name_), + IsOkAndHolds(old_magic)); +} + +TEST_F(FlashIndexStorageTest, + ReadHeaderMagicNonExistingFileShouldGetNotFoundError) { + EXPECT_THAT(FlashIndexStorage::ReadHeaderMagic(&filesystem_, file_name_), + StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); +} + TEST_F(FlashIndexStorageTest, CorruptHeader) { { // Create the header file diff --git a/icing/file/version-util.cc b/icing/file/version-util.cc new file mode 100644 index 0000000..468bde5 --- /dev/null +++ b/icing/file/version-util.cc @@ -0,0 +1,105 @@ +// Copyright (C) 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "icing/file/version-util.h" + +#include <cstdint> +#include <string> +#include <utility> + +#include "icing/text_classifier/lib3/utils/base/status.h" +#include "icing/text_classifier/lib3/utils/base/statusor.h" +#include "icing/absl_ports/canonical_errors.h" +#include "icing/file/filesystem.h" +#include "icing/index/index.h" + +namespace icing { +namespace lib { + +namespace version_util { + +libtextclassifier3::StatusOr<VersionInfo> ReadVersion( + const Filesystem& filesystem, const std::string& version_file_path, + const std::string& index_base_dir) { + // 1. Read the version info. + VersionInfo existing_version_info(-1, -1); + if (filesystem.FileExists(version_file_path.c_str()) && + !filesystem.PRead(version_file_path.c_str(), &existing_version_info, + sizeof(VersionInfo), /*offset=*/0)) { + return absl_ports::InternalError("Fail to read version"); + } + + // 2. Check the Index magic to see if we're actually on version 0. + libtextclassifier3::StatusOr<int> existing_flash_index_magic_or = + Index::ReadFlashIndexMagic(&filesystem, index_base_dir); + if (!existing_flash_index_magic_or.ok()) { + if (absl_ports::IsNotFound(existing_flash_index_magic_or.status())) { + // Flash index magic doesn't exist. In this case, we're unable to + // determine the version change state correctly (regardless of the + // existence of the version file), so invalidate VersionInfo by setting + // version to -1, but still keep the max_version value read in step 1. + existing_version_info.version = -1; + return existing_version_info; + } + // Real error. + return std::move(existing_flash_index_magic_or).status(); + } + if (existing_flash_index_magic_or.ValueOrDie() == + kVersionZeroFlashIndexMagic) { + existing_version_info.version = 0; + if (existing_version_info.max_version == -1) { + existing_version_info.max_version = 0; + } + } + + return existing_version_info; +} + +libtextclassifier3::Status WriteVersion(const Filesystem& filesystem, + const std::string& version_file_path, + const VersionInfo& version_info) { + if (!filesystem.PWrite(version_file_path.c_str(), /*offset=*/0, &version_info, + sizeof(VersionInfo))) { + return absl_ports::InternalError("Fail to write version"); + } + return libtextclassifier3::Status::OK; +} + +StateChange GetVersionStateChange(const VersionInfo& existing_version_info, + int32_t curr_version) { + if (!existing_version_info.IsValid()) { + return StateChange::kUndetermined; + } + + if (existing_version_info.version == 0) { + return (existing_version_info.max_version == existing_version_info.version) + ? StateChange::kVersionZeroUpgrade + : StateChange::kVersionZeroRollForward; + } + + if (existing_version_info.version == curr_version) { + return StateChange::kCompatible; + } else if (existing_version_info.version > curr_version) { + return StateChange::kRollBack; + } else { // existing_version_info.version < curr_version + return (existing_version_info.max_version == existing_version_info.version) + ? StateChange::kUpgrade + : StateChange::kRollForward; + } +} + +} // namespace version_util + +} // namespace lib +} // namespace icing diff --git a/icing/file/version-util.h b/icing/file/version-util.h new file mode 100644 index 0000000..7fa7fbd --- /dev/null +++ b/icing/file/version-util.h @@ -0,0 +1,97 @@ +// Copyright (C) 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ICING_FILE_VERSION_UTIL_H_ +#define ICING_FILE_VERSION_UTIL_H_ + +#include <cstdint> +#include <string> + +#include "icing/text_classifier/lib3/utils/base/status.h" +#include "icing/text_classifier/lib3/utils/base/statusor.h" +#include "icing/file/filesystem.h" + +namespace icing { +namespace lib { + +namespace version_util { + +// - Version 0: Android T. Can be identified only by flash index magic. +// - Version 1: mainline release 2023-06. +inline static constexpr int32_t kVersion = 1; +inline static constexpr int32_t kVersionOne = 1; + +inline static constexpr int kVersionZeroFlashIndexMagic = 0x6dfba6ae; + +struct VersionInfo { + int32_t version; + int32_t max_version; + + explicit VersionInfo(int32_t version_in, int32_t max_version_in) + : version(version_in), max_version(max_version_in) {} + + bool IsValid() const { return version >= 0 && max_version >= 0; } + + bool operator==(const VersionInfo& other) const { + return version == other.version && max_version == other.max_version; + } +} __attribute__((packed)); +static_assert(sizeof(VersionInfo) == 8, ""); + +enum class StateChange { + kUndetermined, + kCompatible, + kRollForward, + kRollBack, + kUpgrade, + kVersionZeroUpgrade, + kVersionZeroRollForward, +}; + +// Helper method to read version info (using version file and flash index header +// magic) from the existing data. If the state is invalid (e.g. flash index +// header file is missing), then return an invalid VersionInfo. +// +// RETURNS: +// - Existing data's VersionInfo on success +// - INTERNAL_ERROR on I/O errors +libtextclassifier3::StatusOr<VersionInfo> ReadVersion( + const Filesystem& filesystem, const std::string& version_file_path, + const std::string& index_base_dir); + +// Helper method to write version file. +// +// RETURNS: +// - OK on success +// - INTERNAL_ERROR on I/O errors +libtextclassifier3::Status WriteVersion(const Filesystem& filesystem, + const std::string& version_file_path, + const VersionInfo& version_info); + +// Helper method to determine the change state between the existing data version +// and the current code version. +// +// REQUIRES: curr_version > 0. We implement version checking in version 1, so +// the callers (except unit tests) will always use a version # greater than 0. +// +// RETURNS: StateChange +StateChange GetVersionStateChange(const VersionInfo& existing_version_info, + int32_t curr_version = kVersion); + +} // namespace version_util + +} // namespace lib +} // namespace icing + +#endif // ICING_FILE_VERSION_UTIL_H_ diff --git a/icing/file/version-util_test.cc b/icing/file/version-util_test.cc new file mode 100644 index 0000000..78cdb7d --- /dev/null +++ b/icing/file/version-util_test.cc @@ -0,0 +1,386 @@ +// Copyright (C) 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "icing/file/version-util.h" + +#include <optional> +#include <string> +#include <utility> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "icing/file/filesystem.h" +#include "icing/file/posting_list/flash-index-storage-header.h" +#include "icing/testing/common-matchers.h" +#include "icing/testing/tmp-directory.h" + +namespace icing { +namespace lib { +namespace version_util { + +namespace { + +using ::testing::Eq; + +struct VersionUtilReadVersionTestParam { + std::optional<VersionInfo> existing_version_info; + std::optional<int> existing_flash_index_magic; + VersionInfo expected_version_info; + + explicit VersionUtilReadVersionTestParam( + std::optional<VersionInfo> existing_version_info_in, + std::optional<int> existing_flash_index_magic_in, + VersionInfo expected_version_info_in) + : existing_version_info(std::move(existing_version_info_in)), + existing_flash_index_magic(std::move(existing_flash_index_magic_in)), + expected_version_info(std::move(expected_version_info_in)) {} +}; + +class VersionUtilReadVersionTest + : public ::testing::TestWithParam<VersionUtilReadVersionTestParam> { + protected: + void SetUp() override { + base_dir_ = GetTestTempDir() + "/version_util_test"; + version_file_path_ = base_dir_ + "/version"; + index_path_ = base_dir_ + "/index"; + + ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(base_dir_.c_str())); + } + + void TearDown() override { + ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(base_dir_.c_str())); + } + + const Filesystem& filesystem() const { return filesystem_; } + + Filesystem filesystem_; + std::string base_dir_; + std::string version_file_path_; + std::string index_path_; +}; + +TEST_P(VersionUtilReadVersionTest, ReadVersion) { + const VersionUtilReadVersionTestParam& param = GetParam(); + + // Prepare version file and flash index file. + if (param.existing_version_info.has_value()) { + ICING_ASSERT_OK(WriteVersion(filesystem_, version_file_path_, + param.existing_version_info.value())); + } + + if (param.existing_flash_index_magic.has_value()) { + HeaderBlock header_block(&filesystem_, /*block_size=*/4096); + header_block.header()->magic = param.existing_flash_index_magic.value(); + + std::string main_index_dir = index_path_ + "/idx/main"; + ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(main_index_dir.c_str())); + std::string flash_index_file_path = main_index_dir + "/main_index"; + + ScopedFd sfd(filesystem_.OpenForWrite(flash_index_file_path.c_str())); + ASSERT_TRUE(sfd.is_valid()); + ASSERT_TRUE(header_block.Write(sfd.get())); + } + + ICING_ASSERT_OK_AND_ASSIGN( + VersionInfo version_info, + ReadVersion(filesystem_, version_file_path_, index_path_)); + EXPECT_THAT(version_info, Eq(param.expected_version_info)); +} + +INSTANTIATE_TEST_SUITE_P( + VersionUtilReadVersionTest, VersionUtilReadVersionTest, + testing::Values( + // - Version file doesn't exist + // - Flash index doesn't exist + // - Result: version -1, max_version -1 (invalid) + VersionUtilReadVersionTestParam( + /*existing_version_info_in=*/std::nullopt, + /*existing_flash_index_magic_in=*/std::nullopt, + /*expected_version_info_in=*/ + VersionInfo(/*version_in=*/-1, /*max_version=*/-1)), + + // - Version file doesn't exist + // - Flash index exists with version 0 magic + // - Result: version 0, max_version 0 + VersionUtilReadVersionTestParam( + /*existing_version_info_in=*/std::nullopt, + /*existing_flash_index_magic_in=*/ + std::make_optional<int>(kVersionZeroFlashIndexMagic), + /*expected_version_info_in=*/ + VersionInfo(/*version_in=*/0, /*max_version=*/0)), + + // - Version file doesn't exist + // - Flash index exists with non version 0 magic + // - Result: version -1, max_version -1 (invalid) + VersionUtilReadVersionTestParam( + /*existing_version_info_in=*/std::nullopt, + /*existing_flash_index_magic_in=*/ + std::make_optional<int>(kVersionZeroFlashIndexMagic + 1), + /*expected_version_info_in=*/ + VersionInfo(/*version_in=*/-1, /*max_version=*/-1)), + + // - Version file exists + // - Flash index doesn't exist + // - Result: version -1, max_version 1 (invalid) + VersionUtilReadVersionTestParam( + /*existing_version_info_in=*/std::make_optional<VersionInfo>( + /*version_in=*/1, /*max_version=*/1), + /*existing_flash_index_magic_in=*/std::nullopt, + /*expected_version_info_in=*/ + VersionInfo(/*version_in=*/-1, /*max_version=*/1)), + + // - Version file exists: version 1, max_version 1 + // - Flash index exists with version 0 magic + // - Result: version 0, max_version 1 + VersionUtilReadVersionTestParam( + /*existing_version_info_in=*/std::make_optional<VersionInfo>( + /*version_in=*/1, /*max_version=*/1), + /*existing_flash_index_magic_in=*/ + std::make_optional<int>(kVersionZeroFlashIndexMagic), + /*expected_version_info_in=*/ + VersionInfo(/*version_in=*/0, /*max_version=*/1)), + + // - Version file exists: version 2, max_version 3 + // - Flash index exists with version 0 magic + // - Result: version 0, max_version 3 + VersionUtilReadVersionTestParam( + /*existing_version_info_in=*/std::make_optional<VersionInfo>( + /*version_in=*/2, /*max_version=*/3), + /*existing_flash_index_magic_in=*/ + std::make_optional<int>(kVersionZeroFlashIndexMagic), + /*expected_version_info_in=*/ + VersionInfo(/*version_in=*/0, /*max_version=*/3)), + + // - Version file exists: version 1, max_version 1 + // - Flash index exists with non version 0 magic + // - Result: version 1, max_version 1 + VersionUtilReadVersionTestParam( + /*existing_version_info_in=*/std::make_optional<VersionInfo>( + /*version_in=*/1, /*max_version=*/1), + /*existing_flash_index_magic_in=*/ + std::make_optional<int>(kVersionZeroFlashIndexMagic + 1), + /*expected_version_info_in=*/ + VersionInfo(/*version_in=*/1, /*max_version=*/1)), + + // - Version file exists: version 2, max_version 3 + // - Flash index exists with non version 0 magic + // - Result: version 2, max_version 3 + VersionUtilReadVersionTestParam( + /*existing_version_info_in=*/std::make_optional<VersionInfo>( + /*version_in=*/2, /*max_version=*/3), + /*existing_flash_index_magic_in=*/ + std::make_optional<int>(kVersionZeroFlashIndexMagic + 1), + /*expected_version_info_in=*/ + VersionInfo(/*version_in=*/2, /*max_version=*/3)))); + +struct VersionUtilStateChangeTestParam { + VersionInfo existing_version_info; + int32_t curr_version; + StateChange expected_state_change; + + explicit VersionUtilStateChangeTestParam(VersionInfo existing_version_info_in, + int32_t curr_version_in, + StateChange expected_state_change_in) + : existing_version_info(std::move(existing_version_info_in)), + curr_version(curr_version_in), + expected_state_change(expected_state_change_in) {} +}; + +class VersionUtilStateChangeTest + : public ::testing::TestWithParam<VersionUtilStateChangeTestParam> {}; + +TEST_P(VersionUtilStateChangeTest, GetVersionStateChange) { + const VersionUtilStateChangeTestParam& param = GetParam(); + + EXPECT_THAT( + GetVersionStateChange(param.existing_version_info, param.curr_version), + Eq(param.expected_state_change)); +} + +INSTANTIATE_TEST_SUITE_P( + VersionUtilStateChangeTest, VersionUtilStateChangeTest, + testing::Values( + // - version -1, max_version -1 (invalid) + // - Current version = 1 + // - Result: undetermined + VersionUtilStateChangeTestParam( + /*existing_version_info_in=*/VersionInfo(-1, -1), + /*curr_version_in=*/1, + /*expected_state_change_in=*/StateChange::kUndetermined), + + // - version -1, max_version 1 (invalid) + // - Current version = 1 + // - Result: undetermined + VersionUtilStateChangeTestParam( + /*existing_version_info_in=*/VersionInfo(-1, 1), + /*curr_version_in=*/1, + /*expected_state_change_in=*/StateChange::kUndetermined), + + // - version -1, max_version -1 (invalid) + // - Current version = 2 + // - Result: undetermined + VersionUtilStateChangeTestParam( + /*existing_version_info_in=*/VersionInfo(-1, -1), + /*curr_version_in=*/2, + /*expected_state_change_in=*/StateChange::kUndetermined), + + // - version -1, max_version 1 (invalid) + // - Current version = 2 + // - Result: undetermined + VersionUtilStateChangeTestParam( + /*existing_version_info_in=*/VersionInfo(-1, 1), + /*curr_version_in=*/2, + /*expected_state_change_in=*/StateChange::kUndetermined), + + // - version 0, max_version 0 + // - Current version = 1 + // - Result: version 0 upgrade + VersionUtilStateChangeTestParam( + /*existing_version_info_in=*/VersionInfo(0, 0), + /*curr_version_in=*/1, + /*expected_state_change_in=*/StateChange::kVersionZeroUpgrade), + + // - version 0, max_version 1 + // - Current version = 1 + // - Result: version 0 roll forward + VersionUtilStateChangeTestParam( + /*existing_version_info_in=*/VersionInfo(0, 1), + /*curr_version_in=*/1, + /*expected_state_change_in=*/StateChange::kVersionZeroRollForward), + + // - version 0, max_version 2 + // - Current version = 1 + // - Result: version 0 roll forward + VersionUtilStateChangeTestParam( + /*existing_version_info_in=*/VersionInfo(0, 2), + /*curr_version_in=*/1, + /*expected_state_change_in=*/StateChange::kVersionZeroRollForward), + + // - version 0, max_version 0 + // - Current version = 2 + // - Result: version 0 upgrade + VersionUtilStateChangeTestParam( + /*existing_version_info_in=*/VersionInfo(0, 0), + /*curr_version_in=*/2, + /*expected_state_change_in=*/StateChange::kVersionZeroUpgrade), + + // - version 0, max_version 1 + // - Current version = 2 + // - Result: version 0 upgrade + VersionUtilStateChangeTestParam( + /*existing_version_info_in=*/VersionInfo(0, 1), + /*curr_version_in=*/2, + /*expected_state_change_in=*/StateChange::kVersionZeroRollForward), + + // - version 0, max_version 2 + // - Current version = 2 + // - Result: version 0 roll forward + VersionUtilStateChangeTestParam( + /*existing_version_info_in=*/VersionInfo(0, 2), + /*curr_version_in=*/2, + /*expected_state_change_in=*/StateChange::kVersionZeroRollForward), + + // - version 1, max_version 1 + // - Current version = 1 + // - Result: compatible + VersionUtilStateChangeTestParam( + /*existing_version_info_in=*/VersionInfo(1, 1), + /*curr_version_in=*/1, + /*expected_state_change_in=*/StateChange::kCompatible), + + // - version 1, max_version 2 + // - Current version = 1 + // - Result: compatible + VersionUtilStateChangeTestParam( + /*existing_version_info_in=*/VersionInfo(1, 2), + /*curr_version_in=*/1, + /*expected_state_change_in=*/StateChange::kCompatible), + + // - version 2, max_version 2 + // - Current version = 1 + // - Result: roll back + VersionUtilStateChangeTestParam( + /*existing_version_info_in=*/VersionInfo(2, 2), + /*curr_version_in=*/1, + /*expected_state_change_in=*/StateChange::kRollBack), + + // - version 2, max_version 3 + // - Current version = 1 + // - Result: roll back + VersionUtilStateChangeTestParam( + /*existing_version_info_in=*/VersionInfo(2, 3), + /*curr_version_in=*/1, + /*expected_state_change_in=*/StateChange::kRollBack), + + // - version 1, max_version 1 + // - Current version = 2 + // - Result: upgrade + VersionUtilStateChangeTestParam( + /*existing_version_info_in=*/VersionInfo(1, 1), + /*curr_version_in=*/2, + /*expected_state_change_in=*/StateChange::kUpgrade), + + // - version 1, max_version 2 + // - Current version = 2 + // - Result: roll forward + VersionUtilStateChangeTestParam( + /*existing_version_info_in=*/VersionInfo(1, 2), + /*curr_version_in=*/2, + /*expected_state_change_in=*/StateChange::kRollForward), + + // - version 1, max_version 3 + // - Current version = 2 + // - Result: roll forward + VersionUtilStateChangeTestParam( + /*existing_version_info_in=*/VersionInfo(1, 3), + /*curr_version_in=*/2, + /*expected_state_change_in=*/StateChange::kRollForward), + + // - version 2, max_version 2 + // - Current version = 2 + // - Result: compatible + VersionUtilStateChangeTestParam( + /*existing_version_info_in=*/VersionInfo(2, 2), + /*curr_version_in=*/2, + /*expected_state_change_in=*/StateChange::kCompatible), + + // - version 2, max_version 3 + // - Current version = 2 + // - Result: compatible + VersionUtilStateChangeTestParam( + /*existing_version_info_in=*/VersionInfo(2, 3), + /*curr_version_in=*/2, + /*expected_state_change_in=*/StateChange::kCompatible), + + // - version 3, max_version 3 + // - Current version = 2 + // - Result: rollback + VersionUtilStateChangeTestParam( + /*existing_version_info_in=*/VersionInfo(3, 3), + /*curr_version_in=*/2, + /*expected_state_change_in=*/StateChange::kRollBack), + + // - version 3, max_version 4 + // - Current version = 2 + // - Result: rollback + VersionUtilStateChangeTestParam( + /*existing_version_info_in=*/VersionInfo(3, 4), + /*curr_version_in=*/2, + /*expected_state_change_in=*/StateChange::kRollBack))); + +} // namespace + +} // namespace version_util +} // namespace lib +} // namespace icing diff --git a/icing/icing-search-engine.cc b/icing/icing-search-engine.cc index 56c7795..e7b6ae9 100644 --- a/icing/icing-search-engine.cc +++ b/icing/icing-search-engine.cc @@ -32,6 +32,7 @@ #include "icing/file/destructible-file.h" #include "icing/file/file-backed-proto.h" #include "icing/file/filesystem.h" +#include "icing/file/version-util.h" #include "icing/index/data-indexing-handler.h" #include "icing/index/hit/doc-hit-info.h" #include "icing/index/index-processor.h" @@ -41,7 +42,7 @@ #include "icing/index/numeric/integer-index.h" #include "icing/index/string-section-indexing-handler.h" #include "icing/join/join-processor.h" -#include "icing/join/qualified-id-joinable-property-indexing-handler.h" +#include "icing/join/qualified-id-join-indexing-handler.h" #include "icing/join/qualified-id-type-joinable-index.h" #include "icing/legacy/index/icing-filesystem.h" #include "icing/portable/endian.h" @@ -96,6 +97,7 @@ namespace lib { namespace { +constexpr std::string_view kVersionFilename = "version"; constexpr std::string_view kDocumentSubfolderName = "document_dir"; constexpr std::string_view kIndexSubfolderName = "index_dir"; constexpr std::string_view kIntegerIndexSubfolderName = "integer_index_dir"; @@ -216,6 +218,12 @@ libtextclassifier3::Status ValidateSuggestionSpec( return libtextclassifier3::Status::OK; } +// Version file is a single file under base_dir containing version info of the +// existing data. +std::string MakeVersionFilePath(const std::string& base_dir) { + return absl_ports::StrCat(base_dir, "/", kVersionFilename); +} + // Document store files are in a standalone subfolder for easier file // management. We can delete and recreate the subfolder and not touch/affect // anything else. @@ -454,20 +462,34 @@ libtextclassifier3::Status IcingSearchEngine::CheckInitMarkerFile( // fails, then just assume the value is zero (the most likely reason for // failure would be non-existence because the last init was successful // anyways). - ScopedFd marker_file_fd(filesystem_->OpenForWrite(marker_filepath.c_str())); + std::unique_ptr<ScopedFd> marker_file_fd = std::make_unique<ScopedFd>( + filesystem_->OpenForWrite(marker_filepath.c_str())); libtextclassifier3::Status status; if (file_exists && - filesystem_->PRead(marker_file_fd.get(), &network_init_attempts, + filesystem_->PRead(marker_file_fd->get(), &network_init_attempts, sizeof(network_init_attempts), /*offset=*/0)) { host_init_attempts = GNetworkToHostL(network_init_attempts); if (host_init_attempts > kMaxUnsuccessfulInitAttempts) { // We're tried and failed to init too many times. We need to throw // everything out and start from scratch. ResetMembers(); + marker_file_fd.reset(); + + // Delete the entire base directory. if (!filesystem_->DeleteDirectoryRecursively( options_.base_dir().c_str())) { return absl_ports::InternalError("Failed to delete icing base dir!"); } + + // Create the base directory again and reopen marker file. + if (!filesystem_->CreateDirectoryRecursively( + options_.base_dir().c_str())) { + return absl_ports::InternalError("Failed to create icing base dir!"); + } + + marker_file_fd = std::make_unique<ScopedFd>( + filesystem_->OpenForWrite(marker_filepath.c_str())); + status = absl_ports::DataLossError( "Encountered failed initialization limit. Cleared all data."); host_init_attempts = 0; @@ -482,10 +504,10 @@ libtextclassifier3::Status IcingSearchEngine::CheckInitMarkerFile( ++host_init_attempts; network_init_attempts = GHostToNetworkL(host_init_attempts); // Write the updated number of attempts before we get started. - if (!filesystem_->PWrite(marker_file_fd.get(), /*offset=*/0, + if (!filesystem_->PWrite(marker_file_fd->get(), /*offset=*/0, &network_init_attempts, sizeof(network_init_attempts)) || - !filesystem_->DataSync(marker_file_fd.get())) { + !filesystem_->DataSync(marker_file_fd->get())) { return absl_ports::InternalError( "Failed to write and sync init marker file"); } @@ -547,6 +569,31 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers( return status; } + // Read version file and determine the state change. + const std::string version_filepath = MakeVersionFilePath(options_.base_dir()); + const std::string index_dir = MakeIndexDirectoryPath(options_.base_dir()); + ICING_ASSIGN_OR_RETURN( + version_util::VersionInfo version_info, + version_util::ReadVersion(*filesystem_, version_filepath, index_dir)); + version_util::StateChange version_state_change = + version_util::GetVersionStateChange(version_info); + if (version_state_change != version_util::StateChange::kCompatible) { + // Step 1: migrate schema according to the version state change. + ICING_RETURN_IF_ERROR(SchemaStore::MigrateSchema( + filesystem_.get(), MakeSchemaDirectoryPath(options_.base_dir()), + version_state_change, version_util::kVersion)); + + // Step 2: discard all derived data + ICING_RETURN_IF_ERROR(DiscardDerivedFiles()); + + // Step 3: update version file + version_util::VersionInfo new_version_info( + version_util::kVersion, + std::max(version_info.max_version, version_util::kVersion)); + ICING_RETURN_IF_ERROR(version_util::WriteVersion( + *filesystem_, version_filepath, new_version_info)); + } + ICING_RETURN_IF_ERROR(InitializeSchemaStore(initialize_stats)); // TODO(b/156383798) : Resolve how to specify the locale. @@ -567,7 +614,6 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers( // and index directories and initialize them from scratch. const std::string doc_store_dir = MakeDocumentDirectoryPath(options_.base_dir()); - const std::string index_dir = MakeIndexDirectoryPath(options_.base_dir()); const std::string integer_index_dir = MakeIntegerIndexWorkingPath(options_.base_dir()); const std::string qualified_id_join_index_dir = @@ -597,7 +643,6 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers( // We're going to need to build the index from scratch. So just delete its // directory now. // Discard index directory and instantiate a new one. - const std::string index_dir = MakeIndexDirectoryPath(options_.base_dir()); Index::Options index_options(index_dir, options_.index_merge_size()); if (!filesystem_->DeleteDirectoryRecursively(index_dir.c_str()) || !filesystem_->CreateDirectoryRecursively(index_dir.c_str())) { @@ -649,6 +694,24 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers( InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC); initialize_stats->set_qualified_id_join_index_restoration_cause( InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC); + } else if (version_state_change != version_util::StateChange::kCompatible) { + ICING_RETURN_IF_ERROR(InitializeDocumentStore( + /*force_recovery_and_revalidate_documents=*/true, initialize_stats)); + index_init_status = InitializeIndex(initialize_stats); + if (!index_init_status.ok() && !absl_ports::IsDataLoss(index_init_status)) { + return index_init_status; + } + + initialize_stats->set_schema_store_recovery_cause( + InitializeStatsProto::VERSION_CHANGED); + initialize_stats->set_document_store_recovery_cause( + InitializeStatsProto::VERSION_CHANGED); + initialize_stats->set_index_restoration_cause( + InitializeStatsProto::VERSION_CHANGED); + initialize_stats->set_integer_index_restoration_cause( + InitializeStatsProto::VERSION_CHANGED); + initialize_stats->set_qualified_id_join_index_restoration_cause( + InitializeStatsProto::VERSION_CHANGED); } else { ICING_RETURN_IF_ERROR(InitializeDocumentStore( /*force_recovery_and_revalidate_documents=*/false, initialize_stats)); @@ -861,7 +924,8 @@ SetSchemaResultProto IcingSearchEngine::SetSchema( DestructibleFile marker_file(marker_filepath, filesystem_.get()); auto set_schema_result_or = schema_store_->SetSchema( - std::move(new_schema), ignore_errors_and_delete_documents); + std::move(new_schema), ignore_errors_and_delete_documents, + options_.allow_circular_schema_definitions()); if (!set_schema_result_or.ok()) { TransformStatus(set_schema_result_or.status(), result_status); return result_proto; @@ -1123,12 +1187,13 @@ GetResultProto IcingSearchEngine::Get(const std::string_view name_space, DocumentProto document = std::move(document_or).ValueOrDie(); std::unique_ptr<ProjectionTree> type_projection_tree; std::unique_ptr<ProjectionTree> wildcard_projection_tree; - for (const TypePropertyMask& type_field_mask : - result_spec.type_property_masks()) { - if (type_field_mask.schema_type() == document.schema()) { + for (const SchemaStore::ExpandedTypePropertyMask& type_field_mask : + schema_store_->ExpandTypePropertyMasks( + result_spec.type_property_masks())) { + if (type_field_mask.schema_type == document.schema()) { type_projection_tree = std::make_unique<ProjectionTree>(type_field_mask); - } else if (type_field_mask.schema_type() == - ProjectionTree::kSchemaTypeWildcard) { + } else if (type_field_mask.schema_type == + SchemaStore::kSchemaTypeWildcard) { wildcard_projection_tree = std::make_unique<ProjectionTree>(type_field_mask); } @@ -1817,7 +1882,7 @@ SearchResultProto IcingSearchEngine::Search( child_result_adjustment_info = std::make_unique<ResultAdjustmentInfo>( join_spec.nested_spec().search_spec(), join_spec.nested_spec().scoring_spec(), - join_spec.nested_spec().result_spec(), + join_spec.nested_spec().result_spec(), schema_store_.get(), std::move(nested_query_scoring_results.query_terms)); } @@ -1847,7 +1912,7 @@ SearchResultProto IcingSearchEngine::Search( // Construct parent's result adjustment info. auto parent_result_adjustment_info = std::make_unique<ResultAdjustmentInfo>( - search_spec, scoring_spec, result_spec, + search_spec, scoring_spec, result_spec, schema_store_.get(), std::move(query_scoring_results.query_terms)); std::unique_ptr<ScoredDocumentHitsRanker> ranker; @@ -2352,11 +2417,10 @@ IcingSearchEngine::CreateDataIndexingHandlers() { handlers.push_back(std::move(integer_section_indexing_handler)); // Qualified id joinable property index handler - ICING_ASSIGN_OR_RETURN( - std::unique_ptr<QualifiedIdJoinablePropertyIndexingHandler> - qualified_id_joinable_property_indexing_handler, - QualifiedIdJoinablePropertyIndexingHandler::Create( - clock_.get(), qualified_id_join_index_.get())); + ICING_ASSIGN_OR_RETURN(std::unique_ptr<QualifiedIdJoinIndexingHandler> + qualified_id_joinable_property_indexing_handler, + QualifiedIdJoinIndexingHandler::Create( + clock_.get(), qualified_id_join_index_.get())); handlers.push_back( std::move(qualified_id_joinable_property_indexing_handler)); @@ -2454,6 +2518,44 @@ IcingSearchEngine::TruncateIndicesTo(DocumentId last_stored_document_id) { qualified_id_join_index_needed_restoration); } +libtextclassifier3::Status IcingSearchEngine::DiscardDerivedFiles() { + if (schema_store_ != nullptr || document_store_ != nullptr || + index_ != nullptr || integer_index_ != nullptr || + qualified_id_join_index_ != nullptr) { + return absl_ports::FailedPreconditionError( + "Cannot discard derived files while having valid instances"); + } + + // Schema store + ICING_RETURN_IF_ERROR( + SchemaStore::DiscardDerivedFiles(filesystem_.get(), options_.base_dir())); + + // Document store + ICING_RETURN_IF_ERROR(DocumentStore::DiscardDerivedFiles( + filesystem_.get(), options_.base_dir())); + + // Term index + if (!filesystem_->DeleteDirectoryRecursively( + MakeIndexDirectoryPath(options_.base_dir()).c_str())) { + return absl_ports::InternalError("Failed to discard index"); + } + + // Integer index + if (!filesystem_->DeleteDirectoryRecursively( + MakeIntegerIndexWorkingPath(options_.base_dir()).c_str())) { + return absl_ports::InternalError("Failed to discard integer index"); + } + + // Qualified id join index + if (!filesystem_->DeleteDirectoryRecursively( + MakeQualifiedIdJoinIndexWorkingPath(options_.base_dir()).c_str())) { + return absl_ports::InternalError( + "Failed to discard qualified id join index"); + } + + return libtextclassifier3::Status::OK; +} + libtextclassifier3::Status IcingSearchEngine::ClearSearchIndices() { ICING_RETURN_IF_ERROR(index_->Reset()); ICING_RETURN_IF_ERROR(integer_index_->Clear()); diff --git a/icing/icing-search-engine.h b/icing/icing-search-engine.h index 3e85f69..4192169 100644 --- a/icing/icing-search-engine.h +++ b/icing/icing-search-engine.h @@ -614,6 +614,15 @@ class IcingSearchEngine { libtextclassifier3::Status CheckConsistency() ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + // Discards all derived data. + // + // Returns: + // OK on success + // FAILED_PRECONDITION_ERROR if those instances are valid (non nullptr) + // INTERNAL_ERROR on any I/O errors + libtextclassifier3::Status DiscardDerivedFiles() + ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + // Repopulates derived data off our ground truths. // // Returns: diff --git a/icing/icing-search-engine_backwards_compatibility_test.cc b/icing/icing-search-engine_backwards_compatibility_test.cc index 848c347..178e923 100644 --- a/icing/icing-search-engine_backwards_compatibility_test.cc +++ b/icing/icing-search-engine_backwards_compatibility_test.cc @@ -118,16 +118,17 @@ TEST_F(IcingSearchEngineBackwardsCompatibilityTest, IcingSearchEngine icing(icing_options, GetTestJniCache()); InitializeResultProto init_result = icing.Initialize(); EXPECT_THAT(init_result.status(), ProtoIsOk()); + + // Since there will be version change, the recovery cause will be + // VERSION_CHANGED. EXPECT_THAT(init_result.initialize_stats().document_store_data_status(), Eq(InitializeStatsProto::NO_DATA_LOSS)); EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(), - Eq(InitializeStatsProto::LEGACY_DOCUMENT_LOG_FORMAT)); + Eq(InitializeStatsProto::VERSION_CHANGED)); EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(), - Eq(InitializeStatsProto::NONE)); - // The main and lite indexes are in legacy formats and therefore will need to - // be rebuilt from scratch. + Eq(InitializeStatsProto::VERSION_CHANGED)); EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(), - Eq(InitializeStatsProto::IO_ERROR)); + Eq(InitializeStatsProto::VERSION_CHANGED)); // Set up schema, this is the one used to validate documents in the testdata // files. Do not change unless you're also updating the testdata files. @@ -257,17 +258,17 @@ TEST_F(IcingSearchEngineBackwardsCompatibilityTest, MigrateToLargerScale) { IcingSearchEngine icing(icing_options, GetTestJniCache()); InitializeResultProto init_result = icing.Initialize(); EXPECT_THAT(init_result.status(), ProtoIsOk()); + + // Since there will be version change, the recovery cause will be + // VERSION_CHANGED. EXPECT_THAT(init_result.initialize_stats().document_store_data_status(), Eq(InitializeStatsProto::NO_DATA_LOSS)); - // No recovery is required for the document store. EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(), - Eq(InitializeStatsProto::NONE)); + Eq(InitializeStatsProto::VERSION_CHANGED)); EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(), - Eq(InitializeStatsProto::NONE)); - // The main and lite indexes are in legacy formats and therefore will need to - // be rebuilt from scratch. + Eq(InitializeStatsProto::VERSION_CHANGED)); EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(), - Eq(InitializeStatsProto::IO_ERROR)); + Eq(InitializeStatsProto::VERSION_CHANGED)); // Verify that the schema stored in the index matches the one that we expect. // Do not change unless you're also updating the testdata files. @@ -404,18 +405,19 @@ TEST_F(IcingSearchEngineBackwardsCompatibilityTest, IcingSearchEngine icing(icing_options, GetTestJniCache()); InitializeResultProto init_result = icing.Initialize(); EXPECT_THAT(init_result.status(), ProtoIsOk()); + + // Since there will be version change, the recovery cause will be + // VERSION_CHANGED. EXPECT_THAT(init_result.initialize_stats().document_store_data_status(), Eq(InitializeStatsProto::NO_DATA_LOSS)); - // No recovery is required for the document store. EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(), - Eq(InitializeStatsProto::NONE)); + Eq(InitializeStatsProto::VERSION_CHANGED)); // TODO: create enum code for legacy schema store recovery after schema store // change is made. EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(), - Eq(InitializeStatsProto::NONE)); - // No recovery is required for the index. + Eq(InitializeStatsProto::VERSION_CHANGED)); EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(), - Eq(InitializeStatsProto::NONE)); + Eq(InitializeStatsProto::VERSION_CHANGED)); // Verify that the schema stored in the index matches the one that we expect. // Do not change unless you're also updating the testdata files. diff --git a/icing/icing-search-engine_benchmark.cc b/icing/icing-search-engine_benchmark.cc index cf654a8..fb44595 100644 --- a/icing/icing-search-engine_benchmark.cc +++ b/icing/icing-search-engine_benchmark.cc @@ -1164,7 +1164,6 @@ void BM_JoinQueryQualifiedId(benchmark::State& state) { // JoinSpec JoinSpecProto* join_spec = search_spec.mutable_join_spec(); - join_spec->set_max_joined_child_count(std::numeric_limits<int32_t>::max()); join_spec->set_parent_property_expression( std::string(JoinProcessor::kQualifiedIdExpr)); join_spec->set_child_property_expression("personQualifiedId"); @@ -1181,6 +1180,8 @@ void BM_JoinQueryQualifiedId(benchmark::State& state) { static constexpr int kNumPerPage = 10; ResultSpecProto result_spec; result_spec.set_num_per_page(kNumPerPage); + result_spec.set_max_joined_children_per_parent_to_return( + std::numeric_limits<int32_t>::max()); ScoringSpecProto score_spec = ScoringSpecProto::default_instance(); diff --git a/icing/icing-search-engine_initialization_test.cc b/icing/icing-search-engine_initialization_test.cc index 0db4d54..13a2dc3 100644 --- a/icing/icing-search-engine_initialization_test.cc +++ b/icing/icing-search-engine_initialization_test.cc @@ -24,13 +24,19 @@ #include "icing/document-builder.h" #include "icing/file/filesystem.h" #include "icing/file/mock-filesystem.h" +#include "icing/file/version-util.h" #include "icing/icing-search-engine.h" +#include "icing/index/index-processor.h" #include "icing/index/index.h" +#include "icing/index/integer-section-indexing-handler.h" #include "icing/index/numeric/integer-index.h" +#include "icing/index/string-section-indexing-handler.h" #include "icing/jni/jni-cache.h" #include "icing/join/doc-join-info.h" #include "icing/join/join-processor.h" +#include "icing/join/qualified-id-join-indexing-handler.h" #include "icing/join/qualified-id-type-joinable-index.h" +#include "icing/legacy/index/icing-filesystem.h" #include "icing/legacy/index/icing-mock-filesystem.h" #include "icing/portable/endian.h" #include "icing/portable/equals-proto.h" @@ -61,6 +67,12 @@ #include "icing/testing/jni-test-helpers.h" #include "icing/testing/test-data.h" #include "icing/testing/tmp-directory.h" +#include "icing/tokenization/language-segmenter-factory.h" +#include "icing/tokenization/language-segmenter.h" +#include "icing/transform/normalizer-factory.h" +#include "icing/transform/normalizer.h" +#include "icing/util/tokenized-document.h" +#include "unicode/uloc.h" namespace icing { namespace lib { @@ -144,21 +156,39 @@ class IcingSearchEngineInitializationTest : public testing::Test { icu_data_file_helper::SetUpICUDataFile(icu_data_file_path)); } filesystem_.CreateDirectoryRecursively(GetTestBaseDir().c_str()); + + language_segmenter_factory::SegmenterOptions segmenter_options(ULOC_US); + ICING_ASSERT_OK_AND_ASSIGN( + lang_segmenter_, + language_segmenter_factory::Create(std::move(segmenter_options))); + + ICING_ASSERT_OK_AND_ASSIGN( + normalizer_, + normalizer_factory::Create( + /*max_term_byte_size=*/std::numeric_limits<int32_t>::max())); } void TearDown() override { + normalizer_.reset(); + lang_segmenter_.reset(); filesystem_.DeleteDirectoryRecursively(GetTestBaseDir().c_str()); } const Filesystem* filesystem() const { return &filesystem_; } - private: + const IcingFilesystem* icing_filesystem() const { return &icing_filesystem_; } + Filesystem filesystem_; + IcingFilesystem icing_filesystem_; + std::unique_ptr<LanguageSegmenter> lang_segmenter_; + std::unique_ptr<Normalizer> normalizer_; }; // Non-zero value so we don't override it to be the current time constexpr int64_t kDefaultCreationTimestampMs = 1575492852000; +std::string GetVersionFilename() { return GetTestBaseDir() + "/version"; } + std::string GetDocumentDir() { return GetTestBaseDir() + "/document_dir"; } std::string GetIndexDir() { return GetTestBaseDir() + "/index_dir"; } @@ -869,7 +899,9 @@ TEST_F(IcingSearchEngineInitializationTest, ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<SchemaStore> schema_store, SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock)); - ICING_EXPECT_OK(schema_store->SetSchema(new_schema)); + ICING_EXPECT_OK(schema_store->SetSchema( + new_schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); } // Will persist new schema IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); @@ -1024,7 +1056,6 @@ TEST_F(IcingSearchEngineInitializationTest, ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<SchemaStore> schema_store, SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock)); - ICING_EXPECT_OK(schema_store->SetSchema(CreateMessageSchema())); // Puts message2 into DocumentStore but doesn't index it. ICING_ASSERT_OK_AND_ASSIGN( @@ -1137,7 +1168,6 @@ TEST_F(IcingSearchEngineInitializationTest, search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY); search_spec3.set_query("name:person"); JoinSpecProto* join_spec = search_spec3.mutable_join_spec(); - join_spec->set_max_joined_child_count(100); join_spec->set_parent_property_expression( std::string(JoinProcessor::kQualifiedIdExpr)); join_spec->set_child_property_expression("senderQualifiedId"); @@ -1151,6 +1181,10 @@ TEST_F(IcingSearchEngineInitializationTest, *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec(); *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance(); + ResultSpecProto result_spec3 = ResultSpecProto::default_instance(); + result_spec3.set_max_joined_children_per_parent_to_return( + std::numeric_limits<int32_t>::max()); + SearchResultProto expected_join_search_result_proto; expected_join_search_result_proto.mutable_status()->set_code(StatusProto::OK); SearchResultProto::ResultProto* result_proto = @@ -1159,9 +1193,8 @@ TEST_F(IcingSearchEngineInitializationTest, *result_proto->mutable_joined_results()->Add()->mutable_document() = message2; *result_proto->mutable_joined_results()->Add()->mutable_document() = message1; - SearchResultProto search_result_proto3 = - icing.Search(search_spec3, ScoringSpecProto::default_instance(), - ResultSpecProto::default_instance()); + SearchResultProto search_result_proto3 = icing.Search( + search_spec3, ScoringSpecProto::default_instance(), result_spec3); EXPECT_THAT(search_result_proto3, EqualsSearchResultIgnoreStatsAndScores( expected_join_search_result_proto)); } @@ -1527,7 +1560,6 @@ TEST_F(IcingSearchEngineInitializationTest, search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); search_spec.set_query("name:person"); JoinSpecProto* join_spec = search_spec.mutable_join_spec(); - join_spec->set_max_joined_child_count(100); join_spec->set_parent_property_expression( std::string(JoinProcessor::kQualifiedIdExpr)); join_spec->set_child_property_expression("senderQualifiedId"); @@ -1541,6 +1573,10 @@ TEST_F(IcingSearchEngineInitializationTest, *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec(); *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance(); + ResultSpecProto result_spec = ResultSpecProto::default_instance(); + result_spec.set_max_joined_children_per_parent_to_return( + std::numeric_limits<int32_t>::max()); + SearchResultProto expected_search_result_proto; expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); SearchResultProto::ResultProto* result_proto = @@ -1559,8 +1595,7 @@ TEST_F(IcingSearchEngineInitializationTest, EXPECT_THAT(icing.Put(person).status(), ProtoIsOk()); EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); SearchResultProto search_result_proto = - icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); + icing.Search(search_spec, GetDefaultScoringSpec(), result_spec); EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( expected_search_result_proto)); } // This should shut down IcingSearchEngine and persist anything it needs to @@ -1619,24 +1654,26 @@ TEST_F(IcingSearchEngineInitializationTest, // Check that our index is ok by searching over the restored index SearchResultProto search_result_proto = - icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); + icing.Search(search_spec, GetDefaultScoringSpec(), result_spec); EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( expected_search_result_proto)); } TEST_F(IcingSearchEngineInitializationTest, RestoreIndexLoseTermIndex) { - // Test the following scenario: losing the entire term index directory. + // Test the following scenario: losing the entire term index. Since we need + // flash index magic to determine the version, in this test we will throw out + // the entire term index and re-initialize an empty one, to bypass + // undetermined version state change and correctly trigger "lose term index" + // scenario. // IcingSearchEngine should be able to recover term index. Several additional // behaviors are also tested: // - Index directory handling: - // - Term index directory should not be discarded since we've already lost - // it. Start it from scratch. + // - Term index directory should not be discarded (but instead just being + // rebuilt by replaying all docs). // - Integer index directory should be unaffected. // - Qualified id join index directory should be unaffected. // - Truncate indices: - // - "TruncateTo()" for term index shouldn't take effect since we start it - // from scratch. + // - "TruncateTo()" for term index shouldn't take effect since it is empty. // - "Clear()" shouldn't be called for integer index, i.e. no integer index // storage sub directories (path_expr = "*/integer_index_dir/*") should be // discarded. @@ -1704,9 +1741,18 @@ TEST_F(IcingSearchEngineInitializationTest, RestoreIndexLoseTermIndex) { EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); } - // 2. Delete the term index directory to trigger RestoreIndexIfNeeded. - std::string idx_dir = GetIndexDir(); - filesystem()->DeleteDirectoryRecursively(idx_dir.c_str()); + // 2. Delete and re-initialize an empty term index to trigger + // RestoreIndexIfNeeded. + { + std::string idx_subdir = GetIndexDir() + "/idx"; + ASSERT_TRUE(filesystem()->DeleteDirectoryRecursively(idx_subdir.c_str())); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<Index> index, + Index::Create(Index::Options(GetIndexDir(), + /*index_merge_size=*/100), + filesystem(), icing_filesystem())); + ICING_ASSERT_OK(index->PersistToDisk()); + } // 3. Create the index again. This should trigger index restoration. { @@ -1791,7 +1837,6 @@ TEST_F(IcingSearchEngineInitializationTest, RestoreIndexLoseTermIndex) { search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY); search_spec3.set_query("name:person"); JoinSpecProto* join_spec = search_spec3.mutable_join_spec(); - join_spec->set_max_joined_child_count(100); join_spec->set_parent_property_expression( std::string(JoinProcessor::kQualifiedIdExpr)); join_spec->set_child_property_expression("senderQualifiedId"); @@ -1805,9 +1850,12 @@ TEST_F(IcingSearchEngineInitializationTest, RestoreIndexLoseTermIndex) { *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec(); *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance(); - SearchResultProto results3 = - icing.Search(search_spec3, ScoringSpecProto::default_instance(), - ResultSpecProto::default_instance()); + ResultSpecProto result_spec3 = ResultSpecProto::default_instance(); + result_spec3.set_max_joined_children_per_parent_to_return( + std::numeric_limits<int32_t>::max()); + + SearchResultProto results3 = icing.Search( + search_spec3, ScoringSpecProto::default_instance(), result_spec3); ASSERT_THAT(results3.results(), SizeIs(1)); EXPECT_THAT(results3.results(0).document().uri(), Eq("person")); EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3)); @@ -1985,7 +2033,6 @@ TEST_F(IcingSearchEngineInitializationTest, RestoreIndexLoseIntegerIndex) { search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY); search_spec3.set_query("name:person"); JoinSpecProto* join_spec = search_spec3.mutable_join_spec(); - join_spec->set_max_joined_child_count(100); join_spec->set_parent_property_expression( std::string(JoinProcessor::kQualifiedIdExpr)); join_spec->set_child_property_expression("senderQualifiedId"); @@ -1999,9 +2046,12 @@ TEST_F(IcingSearchEngineInitializationTest, RestoreIndexLoseIntegerIndex) { *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec(); *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance(); - SearchResultProto results3 = - icing.Search(search_spec3, ScoringSpecProto::default_instance(), - ResultSpecProto::default_instance()); + ResultSpecProto result_spec3 = ResultSpecProto::default_instance(); + result_spec3.set_max_joined_children_per_parent_to_return( + std::numeric_limits<int32_t>::max()); + + SearchResultProto results3 = icing.Search( + search_spec3, ScoringSpecProto::default_instance(), result_spec3); ASSERT_THAT(results3.results(), SizeIs(1)); EXPECT_THAT(results3.results(0).document().uri(), Eq("person")); EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3)); @@ -2181,7 +2231,6 @@ TEST_F(IcingSearchEngineInitializationTest, search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY); search_spec3.set_query("name:person"); JoinSpecProto* join_spec = search_spec3.mutable_join_spec(); - join_spec->set_max_joined_child_count(100); join_spec->set_parent_property_expression( std::string(JoinProcessor::kQualifiedIdExpr)); join_spec->set_child_property_expression("senderQualifiedId"); @@ -2195,9 +2244,12 @@ TEST_F(IcingSearchEngineInitializationTest, *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec(); *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance(); - SearchResultProto results3 = - icing.Search(search_spec3, ScoringSpecProto::default_instance(), - ResultSpecProto::default_instance()); + ResultSpecProto result_spec3 = ResultSpecProto::default_instance(); + result_spec3.set_max_joined_children_per_parent_to_return( + std::numeric_limits<int32_t>::max()); + + SearchResultProto results3 = icing.Search( + search_spec3, ScoringSpecProto::default_instance(), result_spec3); ASSERT_THAT(results3.results(), SizeIs(1)); EXPECT_THAT(results3.results(0).document().uri(), Eq("person")); EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3)); @@ -2303,14 +2355,12 @@ TEST_F(IcingSearchEngineInitializationTest, // - Integer index: [0, 1, 2] // - Qualified id join index: [0, 1, 2] { - Filesystem filesystem; - IcingFilesystem icing_filesystem; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<Index> index, Index::Create( Index::Options(GetIndexDir(), /*index_merge_size=*/message.ByteSizeLong()), - &filesystem, &icing_filesystem)); + filesystem(), icing_filesystem())); DocumentId original_last_added_doc_id = index->last_added_document_id(); index->set_last_added_document_id(original_last_added_doc_id + 1); Index::Editor editor = @@ -2405,7 +2455,6 @@ TEST_F(IcingSearchEngineInitializationTest, search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY); search_spec3.set_query("name:person"); JoinSpecProto* join_spec = search_spec3.mutable_join_spec(); - join_spec->set_max_joined_child_count(100); join_spec->set_parent_property_expression( std::string(JoinProcessor::kQualifiedIdExpr)); join_spec->set_child_property_expression("senderQualifiedId"); @@ -2419,9 +2468,12 @@ TEST_F(IcingSearchEngineInitializationTest, *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec(); *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance(); - SearchResultProto results3 = - icing.Search(search_spec3, ScoringSpecProto::default_instance(), - ResultSpecProto::default_instance()); + ResultSpecProto result_spec3 = ResultSpecProto::default_instance(); + result_spec3.set_max_joined_children_per_parent_to_return( + std::numeric_limits<int32_t>::max()); + + SearchResultProto results3 = icing.Search( + search_spec3, ScoringSpecProto::default_instance(), result_spec3); ASSERT_THAT(results3.results(), SizeIs(1)); EXPECT_THAT(results3.results(0).document().uri(), Eq("person")); EXPECT_THAT(results3.results(0).joined_results(), SizeIs(2)); @@ -2435,14 +2487,12 @@ TEST_F(IcingSearchEngineInitializationTest, // verify the correctness of term index restoration. Instead, we have to check // hits for "foo" should not be found in term index. { - Filesystem filesystem; - IcingFilesystem icing_filesystem; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<Index> index, Index::Create( Index::Options(GetIndexDir(), /*index_merge_size=*/message.ByteSizeLong()), - &filesystem, &icing_filesystem)); + filesystem(), icing_filesystem())); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<DocHitInfoIterator> doc_hit_info_iter, index->GetIterator("foo", /*term_start_index=*/0, @@ -2549,14 +2599,12 @@ TEST_F(IcingSearchEngineInitializationTest, // - Integer index: [0, 1, 2, 3] // - Qualified id join index: [0, 1, 2, 3] { - Filesystem filesystem; - IcingFilesystem icing_filesystem; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<Index> index, Index::Create( Index::Options(GetIndexDir(), /*index_merge_size=*/message.ByteSizeLong()), - &filesystem, &icing_filesystem)); + filesystem(), icing_filesystem())); DocumentId original_last_added_doc_id = index->last_added_document_id(); index->set_last_added_document_id(original_last_added_doc_id + 1); Index::Editor editor = @@ -2654,7 +2702,6 @@ TEST_F(IcingSearchEngineInitializationTest, search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY); search_spec3.set_query("name:person"); JoinSpecProto* join_spec = search_spec3.mutable_join_spec(); - join_spec->set_max_joined_child_count(100); join_spec->set_parent_property_expression( std::string(JoinProcessor::kQualifiedIdExpr)); join_spec->set_child_property_expression("senderQualifiedId"); @@ -2668,9 +2715,12 @@ TEST_F(IcingSearchEngineInitializationTest, *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec(); *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance(); - SearchResultProto results3 = - icing.Search(search_spec3, ScoringSpecProto::default_instance(), - ResultSpecProto::default_instance()); + ResultSpecProto result_spec3 = ResultSpecProto::default_instance(); + result_spec3.set_max_joined_children_per_parent_to_return( + std::numeric_limits<int32_t>::max()); + + SearchResultProto results3 = icing.Search( + search_spec3, ScoringSpecProto::default_instance(), result_spec3); ASSERT_THAT(results3.results(), SizeIs(1)); EXPECT_THAT(results3.results(0).document().uri(), Eq("person")); EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3)); @@ -2686,14 +2736,12 @@ TEST_F(IcingSearchEngineInitializationTest, // verify the correctness of term index restoration. Instead, we have to check // hits for "foo" should not be found in term index. { - Filesystem filesystem; - IcingFilesystem icing_filesystem; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<Index> index, Index::Create( Index::Options(GetIndexDir(), /*index_merge_size=*/message.ByteSizeLong()), - &filesystem, &icing_filesystem)); + filesystem(), icing_filesystem())); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<DocHitInfoIterator> doc_hit_info_iter, index->GetIterator("foo", /*term_start_index=*/0, @@ -2747,15 +2795,13 @@ TEST_F(IcingSearchEngineInitializationTest, // - Integer index: [] // - Qualified id join index: [] { - Filesystem filesystem; - IcingFilesystem icing_filesystem; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<Index> index, Index::Create( // index merge size is not important here because we will manually // invoke merge below. Index::Options(GetIndexDir(), /*index_merge_size=*/100), - &filesystem, &icing_filesystem)); + filesystem(), icing_filesystem())); // Add hits for document 0 and merge. ASSERT_THAT(index->last_added_document_id(), kInvalidDocumentId); index->set_last_added_document_id(0); @@ -2828,12 +2874,10 @@ TEST_F(IcingSearchEngineInitializationTest, // enough to verify the correctness of term index restoration. Instead, we // have to check hits for "foo", "bar" should not be found in term index. { - Filesystem filesystem; - IcingFilesystem icing_filesystem; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<Index> index, Index::Create(Index::Options(GetIndexDir(), /*index_merge_size=*/100), - &filesystem, &icing_filesystem)); + filesystem(), icing_filesystem())); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<DocHitInfoIterator> doc_hit_info_iter, index->GetIterator("foo", /*term_start_index=*/0, @@ -2944,14 +2988,12 @@ TEST_F(IcingSearchEngineInitializationTest, // - Integer index: [0, 1, 2, 3] // - Qualified id join index: [0, 1, 2, 3] { - Filesystem filesystem; - IcingFilesystem icing_filesystem; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<Index> index, Index::Create( Index::Options(GetIndexDir(), /*index_merge_size=*/message.ByteSizeLong()), - &filesystem, &icing_filesystem)); + filesystem(), icing_filesystem())); // Add hits for document 4 and merge. DocumentId original_last_added_doc_id = index->last_added_document_id(); index->set_last_added_document_id(original_last_added_doc_id + 1); @@ -3057,7 +3099,6 @@ TEST_F(IcingSearchEngineInitializationTest, search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY); search_spec3.set_query("name:person"); JoinSpecProto* join_spec = search_spec3.mutable_join_spec(); - join_spec->set_max_joined_child_count(100); join_spec->set_parent_property_expression( std::string(JoinProcessor::kQualifiedIdExpr)); join_spec->set_child_property_expression("senderQualifiedId"); @@ -3071,9 +3112,12 @@ TEST_F(IcingSearchEngineInitializationTest, *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec(); *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance(); - SearchResultProto results3 = - icing.Search(search_spec3, ScoringSpecProto::default_instance(), - ResultSpecProto::default_instance()); + ResultSpecProto result_spec3 = ResultSpecProto::default_instance(); + result_spec3.set_max_joined_children_per_parent_to_return( + std::numeric_limits<int32_t>::max()); + + SearchResultProto results3 = icing.Search( + search_spec3, ScoringSpecProto::default_instance(), result_spec3); ASSERT_THAT(results3.results(), SizeIs(1)); EXPECT_THAT(results3.results(0).document().uri(), Eq("person")); EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3)); @@ -3089,12 +3133,10 @@ TEST_F(IcingSearchEngineInitializationTest, // enough to verify the correctness of term index restoration. Instead, we // have to check hits for "foo", "bar" should not be found in term index. { - Filesystem filesystem; - IcingFilesystem icing_filesystem; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<Index> index, Index::Create(Index::Options(GetIndexDir(), /*index_merge_size=*/100), - &filesystem, &icing_filesystem)); + filesystem(), icing_filesystem())); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<DocHitInfoIterator> doc_hit_info_iter, index->GetIterator("foo", /*term_start_index=*/0, @@ -3426,7 +3468,6 @@ TEST_F(IcingSearchEngineInitializationTest, search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY); search_spec3.set_query("name:person"); JoinSpecProto* join_spec = search_spec3.mutable_join_spec(); - join_spec->set_max_joined_child_count(100); join_spec->set_parent_property_expression( std::string(JoinProcessor::kQualifiedIdExpr)); join_spec->set_child_property_expression("senderQualifiedId"); @@ -3440,9 +3481,12 @@ TEST_F(IcingSearchEngineInitializationTest, *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec(); *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance(); - SearchResultProto results3 = - icing.Search(search_spec3, ScoringSpecProto::default_instance(), - ResultSpecProto::default_instance()); + ResultSpecProto result_spec3 = ResultSpecProto::default_instance(); + result_spec3.set_max_joined_children_per_parent_to_return( + std::numeric_limits<int32_t>::max()); + + SearchResultProto results3 = icing.Search( + search_spec3, ScoringSpecProto::default_instance(), result_spec3); ASSERT_THAT(results3.results(), SizeIs(1)); EXPECT_THAT(results3.results(0).document().uri(), Eq("person")); EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3)); @@ -3808,7 +3852,6 @@ TEST_F(IcingSearchEngineInitializationTest, search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY); search_spec3.set_query("name:person"); JoinSpecProto* join_spec = search_spec3.mutable_join_spec(); - join_spec->set_max_joined_child_count(100); join_spec->set_parent_property_expression( std::string(JoinProcessor::kQualifiedIdExpr)); join_spec->set_child_property_expression("senderQualifiedId"); @@ -3822,9 +3865,12 @@ TEST_F(IcingSearchEngineInitializationTest, *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec(); *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance(); - SearchResultProto results3 = - icing.Search(search_spec3, ScoringSpecProto::default_instance(), - ResultSpecProto::default_instance()); + ResultSpecProto result_spec3 = ResultSpecProto::default_instance(); + result_spec3.set_max_joined_children_per_parent_to_return( + std::numeric_limits<int32_t>::max()); + + SearchResultProto results3 = icing.Search( + search_spec3, ScoringSpecProto::default_instance(), result_spec3); ASSERT_THAT(results3.results(), SizeIs(1)); EXPECT_THAT(results3.results(0).document().uri(), Eq("person")); EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3)); @@ -4249,9 +4295,16 @@ TEST_F(IcingSearchEngineInitializationTest, } { - // Delete the index file to trigger RestoreIndexIfNeeded. + // Delete and re-initialize an empty index file to trigger + // RestoreIndexIfNeeded. std::string idx_subdir = GetIndexDir() + "/idx"; - filesystem()->DeleteDirectoryRecursively(idx_subdir.c_str()); + ASSERT_TRUE(filesystem()->DeleteDirectoryRecursively(idx_subdir.c_str())); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<Index> index, + Index::Create(Index::Options(GetIndexDir(), + /*index_merge_size=*/100), + filesystem(), icing_filesystem())); + ICING_ASSERT_OK(index->PersistToDisk()); } { @@ -4501,7 +4554,9 @@ TEST_F(IcingSearchEngineInitializationTest, ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<SchemaStore> schema_store, SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock)); - ICING_EXPECT_OK(schema_store->SetSchema(new_schema)); + ICING_EXPECT_OK(schema_store->SetSchema( + new_schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); } { @@ -4890,10 +4945,11 @@ TEST_F(IcingSearchEngineInitializationTest, } { - // Delete the schema store header file to trigger an I/O error. + // Delete the schema store type mapper to trigger an I/O error. std::string schema_store_header_file_path = - GetSchemaDir() + "/schema_store_header"; - filesystem()->DeleteFile(schema_store_header_file_path.c_str()); + GetSchemaDir() + "/schema_type_mapper"; + ASSERT_TRUE(filesystem()->DeleteDirectoryRecursively( + schema_store_header_file_path.c_str())); } { @@ -4974,6 +5030,303 @@ TEST_F(IcingSearchEngineInitializationTest, } } +class IcingSearchEngineInitializationVersionChangeTest + : public IcingSearchEngineInitializationTest, + public ::testing::WithParamInterface<version_util::VersionInfo> {}; + +TEST_P(IcingSearchEngineInitializationVersionChangeTest, + RecoverFromVersionChange) { + // TODO(b/280697513): test backup schema migration + // Test the following scenario: version change. All derived data should be + // rebuilt. We test this by manually adding some invalid derived data and + // verifying they're removed due to rebuild. + SchemaProto schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty( + PropertyConfigBuilder() + .SetName("name") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_REQUIRED))) + .AddType(SchemaTypeConfigBuilder() + .SetType("Message") + .AddProperty(PropertyConfigBuilder() + .SetName("body") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_REQUIRED)) + .AddProperty(PropertyConfigBuilder() + .SetName("indexableInteger") + .SetDataTypeInt64(NUMERIC_MATCH_RANGE) + .SetCardinality(CARDINALITY_REQUIRED)) + .AddProperty(PropertyConfigBuilder() + .SetName("senderQualifiedId") + .SetDataTypeJoinableString( + JOINABLE_VALUE_TYPE_QUALIFIED_ID) + .SetCardinality(CARDINALITY_REQUIRED))) + .Build(); + + DocumentProto person1 = + DocumentBuilder() + .SetKey("namespace", "person/1") + .SetSchema("Person") + .AddStringProperty("name", "person") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + DocumentProto person2 = + DocumentBuilder() + .SetKey("namespace", "person/2") + .SetSchema("Person") + .AddStringProperty("name", "person") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + DocumentProto message = + DocumentBuilder() + .SetKey("namespace", "message") + .SetSchema("Message") + .AddStringProperty("body", "correct message") + .AddInt64Property("indexableInteger", 123) + .AddStringProperty("senderQualifiedId", "namespace#person/1") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + + { + // Initializes folder and schema, index person1 and person2 + TestIcingSearchEngine icing( + GetDefaultIcingOptions(), std::make_unique<Filesystem>(), + std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(), + GetTestJniCache()); + EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); + EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); + EXPECT_THAT(icing.Put(person1).status(), ProtoIsOk()); + EXPECT_THAT(icing.Put(person2).status(), ProtoIsOk()); + } // This should shut down IcingSearchEngine and persist anything it needs to + + { + // Manually: + // - Put message into DocumentStore + // - But add some incorrect data for message into 3 indices + // - Change version file + // + // These will make sure last_added_document_id is consistent with + // last_stored_document_id, so if Icing didn't handle version change + // correctly, then the index won't be rebuilt. + FakeClock fake_clock; + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaStore> schema_store, + SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock)); + + // Put message into DocumentStore + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::CreateResult create_result, + DocumentStore::Create(filesystem(), GetDocumentDir(), &fake_clock, + schema_store.get(), + /*force_recovery_and_revalidate_documents=*/false, + /*namespace_id_fingerprint=*/false, + PortableFileBackedProtoLog< + DocumentWrapper>::kDeflateCompressionLevel, + /*initialize_stats=*/nullptr)); + std::unique_ptr<DocumentStore> document_store = + std::move(create_result.document_store); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId doc_id, document_store->Put(message)); + + // Index doc_id with incorrect data + Index::Options options(GetIndexDir(), /*index_merge_size=*/1024 * 1024); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<Index> index, + Index::Create(options, filesystem(), icing_filesystem())); + + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<IntegerIndex> integer_index, + IntegerIndex::Create(*filesystem(), GetIntegerIndexDir())); + + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<QualifiedIdTypeJoinableIndex> qualified_id_join_index, + QualifiedIdTypeJoinableIndex::Create(*filesystem(), + GetQualifiedIdJoinIndexDir())); + + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<StringSectionIndexingHandler> + string_section_indexing_handler, + StringSectionIndexingHandler::Create(&fake_clock, normalizer_.get(), + index.get())); + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<IntegerSectionIndexingHandler> + integer_section_indexing_handler, + IntegerSectionIndexingHandler::Create( + &fake_clock, integer_index.get())); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<QualifiedIdJoinIndexingHandler> + qualified_id_joinable_property_indexing_handler, + QualifiedIdJoinIndexingHandler::Create(&fake_clock, + qualified_id_join_index.get())); + std::vector<std::unique_ptr<DataIndexingHandler>> handlers; + handlers.push_back(std::move(string_section_indexing_handler)); + handlers.push_back(std::move(integer_section_indexing_handler)); + handlers.push_back( + std::move(qualified_id_joinable_property_indexing_handler)); + IndexProcessor index_processor(std::move(handlers), &fake_clock); + + DocumentProto incorrect_message = + DocumentBuilder() + .SetKey("namespace", "message") + .SetSchema("Message") + .AddStringProperty("body", "wrong message") + .AddInt64Property("indexableInteger", 456) + .AddStringProperty("senderQualifiedId", "namespace#person/2") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .Build(); + ICING_ASSERT_OK_AND_ASSIGN( + TokenizedDocument tokenized_document, + TokenizedDocument::Create(schema_store.get(), lang_segmenter_.get(), + std::move(incorrect_message))); + ICING_ASSERT_OK(index_processor.IndexDocument(tokenized_document, doc_id)); + + // Change existing data's version file + const version_util::VersionInfo& existing_version_info = GetParam(); + ICING_ASSERT_OK(version_util::WriteVersion( + *filesystem(), GetVersionFilename(), existing_version_info)); + } + + // Mock filesystem to observe and check the behavior of all indices. + TestIcingSearchEngine icing(GetDefaultIcingOptions(), + std::make_unique<Filesystem>(), + std::make_unique<IcingFilesystem>(), + std::make_unique<FakeClock>(), GetTestJniCache()); + InitializeResultProto initialize_result = icing.Initialize(); + EXPECT_THAT(initialize_result.status(), ProtoIsOk()); + // Index Restoration should be triggered here. Incorrect data should be + // deleted and correct data of message should be indexed. + EXPECT_THAT( + initialize_result.initialize_stats().document_store_recovery_cause(), + Eq(InitializeStatsProto::VERSION_CHANGED)); + EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(), + Eq(InitializeStatsProto::VERSION_CHANGED)); + EXPECT_THAT( + initialize_result.initialize_stats().integer_index_restoration_cause(), + Eq(InitializeStatsProto::VERSION_CHANGED)); + EXPECT_THAT(initialize_result.initialize_stats() + .qualified_id_join_index_restoration_cause(), + Eq(InitializeStatsProto::VERSION_CHANGED)); + + // Manually check version file + ICING_ASSERT_OK_AND_ASSIGN( + version_util::VersionInfo version_info_after_init, + version_util::ReadVersion(*filesystem(), GetVersionFilename(), + GetIndexDir())); + EXPECT_THAT(version_info_after_init.version, Eq(version_util::kVersion)); + EXPECT_THAT(version_info_after_init.max_version, + Eq(std::max(version_util::kVersion, GetParam().max_version))); + + SearchResultProto expected_search_result_proto; + expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_search_result_proto.mutable_results()->Add()->mutable_document() = + message; + + // Verify term search + SearchSpecProto search_spec1; + search_spec1.set_query("body:correct"); + search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY); + SearchResultProto search_result_proto1 = + icing.Search(search_spec1, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto1, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); + + // Verify numeric (integer) search + SearchSpecProto search_spec2; + search_spec2.set_query("indexableInteger == 123"); + search_spec2.set_search_type( + SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY); + search_spec2.add_enabled_features(std::string(kNumericSearchFeature)); + + SearchResultProto search_result_google::protobuf = + icing.Search(search_spec2, ScoringSpecProto::default_instance(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_google::protobuf, EqualsSearchResultIgnoreStatsAndScores( + expected_search_result_proto)); + + // Verify join search: join a query for `name:person` with a child query for + // `body:message` based on the child's `senderQualifiedId` field. + SearchSpecProto search_spec3; + search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY); + search_spec3.set_query("name:person"); + JoinSpecProto* join_spec = search_spec3.mutable_join_spec(); + join_spec->set_parent_property_expression( + std::string(JoinProcessor::kQualifiedIdExpr)); + join_spec->set_child_property_expression("senderQualifiedId"); + join_spec->set_aggregation_scoring_strategy( + JoinSpecProto::AggregationScoringStrategy::COUNT); + JoinSpecProto::NestedSpecProto* nested_spec = + join_spec->mutable_nested_spec(); + SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec(); + nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY); + nested_search_spec->set_query("body:message"); + *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec(); + *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance(); + + ResultSpecProto result_spec3 = ResultSpecProto::default_instance(); + result_spec3.set_max_joined_children_per_parent_to_return( + std::numeric_limits<int32_t>::max()); + + SearchResultProto expected_join_search_result_proto; + expected_join_search_result_proto.mutable_status()->set_code(StatusProto::OK); + // Person 1 with message + SearchResultProto::ResultProto* result_proto = + expected_join_search_result_proto.mutable_results()->Add(); + *result_proto->mutable_document() = person1; + *result_proto->mutable_joined_results()->Add()->mutable_document() = message; + // Person 2 without children + *expected_join_search_result_proto.mutable_results() + ->Add() + ->mutable_document() = person2; + + SearchResultProto search_result_proto3 = icing.Search( + search_spec3, ScoringSpecProto::default_instance(), result_spec3); + EXPECT_THAT(search_result_proto3, EqualsSearchResultIgnoreStatsAndScores( + expected_join_search_result_proto)); +} + +INSTANTIATE_TEST_SUITE_P( + IcingSearchEngineInitializationVersionChangeTest, + IcingSearchEngineInitializationVersionChangeTest, + testing::Values( + // Manually change existing data set's version to kVersion + 1. When + // initializing, it will detect "rollback". + version_util::VersionInfo( + /*version_in=*/version_util::kVersion + 1, + /*max_version_in=*/version_util::kVersion + 1), + + // Manually change existing data set's version to kVersion - 1 and + // max_version to kVersion - 1. When initializing, it will detect + // "upgrade". + version_util::VersionInfo( + /*version_in=*/version_util::kVersion - 1, + /*max_version_in=*/version_util::kVersion - 1), + + // Manually change existing data set's version to kVersion - 1 and + // max_version to kVersion. When initializing, it will detect "roll + // forward". + version_util::VersionInfo( + /*version_in=*/version_util::kVersion - 1, + /*max_version_in=*/version_util::kVersion), + + // Manually change existing data set's version to 0 and max_version to + // 0. When initializing, it will detect "version 0 upgrade". + // + // Note: in reality, version 0 won't be written into version file, but + // it is ok here since it is hack to simulate version 0 situation. + version_util::VersionInfo( + /*version_in=*/0, + /*max_version_in=*/0), + + // Manually change existing data set's version to 0 and max_version to + // kVersion. When initializing, it will detect "version 0 roll forward". + // + // Note: in reality, version 0 won't be written into version file, but + // it is ok here since it is hack to simulate version 0 situation. + version_util::VersionInfo( + /*version_in=*/0, + /*max_version_in=*/version_util::kVersion))); + } // namespace } // namespace lib } // namespace icing diff --git a/icing/icing-search-engine_optimize_test.cc b/icing/icing-search-engine_optimize_test.cc index 48fae13..3127171 100644 --- a/icing/icing-search-engine_optimize_test.cc +++ b/icing/icing-search-engine_optimize_test.cc @@ -897,7 +897,6 @@ TEST_F(IcingSearchEngineOptimizeTest, search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); search_spec.set_query("name:person"); JoinSpecProto* join_spec = search_spec.mutable_join_spec(); - join_spec->set_max_joined_child_count(100); join_spec->set_parent_property_expression( std::string(JoinProcessor::kQualifiedIdExpr)); join_spec->set_child_property_expression("senderQualifiedId"); @@ -911,6 +910,10 @@ TEST_F(IcingSearchEngineOptimizeTest, *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec(); *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance(); + ResultSpecProto result_spec = ResultSpecProto::default_instance(); + result_spec.set_max_joined_children_per_parent_to_return( + std::numeric_limits<int32_t>::max()); + // Person1 is going to be deleted below. Only person2 which is joined with // message3 should match the query. SearchResultProto expected_search_result_proto; @@ -935,8 +938,7 @@ TEST_F(IcingSearchEngineOptimizeTest, // Validates that join search query works right after Optimize() SearchResultProto search_result_proto = - icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); + icing.Search(search_spec, GetDefaultScoringSpec(), result_spec); EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( expected_search_result_proto)); } // Destroys IcingSearchEngine to make sure nothing is cached. @@ -945,8 +947,7 @@ TEST_F(IcingSearchEngineOptimizeTest, EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); SearchResultProto search_result_proto = - icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); + icing.Search(search_spec, GetDefaultScoringSpec(), result_spec); EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( expected_search_result_proto)); } @@ -1020,7 +1021,6 @@ TEST_F(IcingSearchEngineOptimizeTest, search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); search_spec.set_query("name:person"); JoinSpecProto* join_spec = search_spec.mutable_join_spec(); - join_spec->set_max_joined_child_count(100); join_spec->set_parent_property_expression( std::string(JoinProcessor::kQualifiedIdExpr)); join_spec->set_child_property_expression("senderQualifiedId"); @@ -1034,6 +1034,10 @@ TEST_F(IcingSearchEngineOptimizeTest, *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec(); *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance(); + ResultSpecProto result_spec = ResultSpecProto::default_instance(); + result_spec.set_max_joined_children_per_parent_to_return( + std::numeric_limits<int32_t>::max()); + // Message1 and message3 are going to be deleted below. Both person1 and // person2 should be included even though person2 has no child (since we're // doing left join). @@ -1064,8 +1068,7 @@ TEST_F(IcingSearchEngineOptimizeTest, // Validates that join search query works right after Optimize() SearchResultProto search_result_proto = - icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); + icing.Search(search_spec, GetDefaultScoringSpec(), result_spec); EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( expected_search_result_proto)); } // Destroys IcingSearchEngine to make sure nothing is cached. @@ -1074,8 +1077,7 @@ TEST_F(IcingSearchEngineOptimizeTest, EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); SearchResultProto search_result_proto = - icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); + icing.Search(search_spec, GetDefaultScoringSpec(), result_spec); EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( expected_search_result_proto)); } @@ -1207,7 +1209,6 @@ TEST_F(IcingSearchEngineOptimizeTest, search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY); search_spec3.set_query("name:person"); JoinSpecProto* join_spec = search_spec3.mutable_join_spec(); - join_spec->set_max_joined_child_count(100); join_spec->set_parent_property_expression( std::string(JoinProcessor::kQualifiedIdExpr)); join_spec->set_child_property_expression("senderQualifiedId"); @@ -1221,6 +1222,10 @@ TEST_F(IcingSearchEngineOptimizeTest, *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec(); *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance(); + ResultSpecProto result_spec3 = ResultSpecProto::default_instance(); + result_spec3.set_max_joined_children_per_parent_to_return( + std::numeric_limits<int32_t>::max()); + SearchResultProto expected_join_search_result_proto; expected_join_search_result_proto.mutable_status()->set_code(StatusProto::OK); SearchResultProto::ResultProto* result_proto = @@ -1230,8 +1235,7 @@ TEST_F(IcingSearchEngineOptimizeTest, *result_proto->mutable_joined_results()->Add()->mutable_document() = message1; SearchResultProto search_result_proto3 = - icing.Search(search_spec3, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); + icing.Search(search_spec3, GetDefaultScoringSpec(), result_spec3); EXPECT_THAT(search_result_proto3, EqualsSearchResultIgnoreStatsAndScores( expected_join_search_result_proto)); } diff --git a/icing/icing-search-engine_schema_test.cc b/icing/icing-search-engine_schema_test.cc index 7081ba2..2609cce 100644 --- a/icing/icing-search-engine_schema_test.cc +++ b/icing/icing-search-engine_schema_test.cc @@ -1157,6 +1157,10 @@ TEST_F(IcingSearchEngineSchemaTest, EXPECT_THAT(icing.Put(person2).status(), ProtoIsOk()); EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); + ResultSpecProto result_spec = ResultSpecProto::default_instance(); + result_spec.set_max_joined_children_per_parent_to_return( + std::numeric_limits<int32_t>::max()); + // Verify join search: join a query for `name:person` with a child query for // `subject:message` based on the child's `receiverQualifiedId` field. // Since "receiverQualifiedId" is not JOINABLE_VALUE_TYPE_QUALIFIED_ID, @@ -1166,7 +1170,6 @@ TEST_F(IcingSearchEngineSchemaTest, search_spec_join_by_receiver.set_query("name:person"); search_spec_join_by_receiver.set_term_match_type(TermMatchType::EXACT_ONLY); JoinSpecProto* join_spec = search_spec_join_by_receiver.mutable_join_spec(); - join_spec->set_max_joined_child_count(100); join_spec->set_parent_property_expression( std::string(JoinProcessor::kQualifiedIdExpr)); join_spec->set_child_property_expression("receiverQualifiedId"); @@ -1189,9 +1192,8 @@ TEST_F(IcingSearchEngineSchemaTest, *expected_empty_child_search_result_proto.mutable_results() ->Add() ->mutable_document() = person1; - SearchResultProto actual_results = - icing.Search(search_spec_join_by_receiver, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); + SearchResultProto actual_results = icing.Search( + search_spec_join_by_receiver, GetDefaultScoringSpec(), result_spec); EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores( expected_empty_child_search_result_proto)); @@ -1214,9 +1216,8 @@ TEST_F(IcingSearchEngineSchemaTest, *expected_join_by_sender_search_result_proto.mutable_results() ->Add() ->mutable_document() = person1; - actual_results = - icing.Search(search_spec_join_by_sender, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); + actual_results = icing.Search(search_spec_join_by_sender, + GetDefaultScoringSpec(), result_spec); EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores( expected_join_by_sender_search_result_proto)); @@ -1259,9 +1260,8 @@ TEST_F(IcingSearchEngineSchemaTest, *expected_join_by_receiver_search_result_proto.mutable_results() ->Add() ->mutable_document() = person2; - actual_results = - icing.Search(search_spec_join_by_receiver, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); + actual_results = icing.Search(search_spec_join_by_receiver, + GetDefaultScoringSpec(), result_spec); EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores( expected_join_by_receiver_search_result_proto)); @@ -1269,13 +1269,98 @@ TEST_F(IcingSearchEngineSchemaTest, // Verify join search: join a query for `name:person` with a child query for // `subject:message` based on the child's `senderQualifiedId` field. We should // get the same set of result since `senderQualifiedId` is unchanged. - actual_results = - icing.Search(search_spec_join_by_sender, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); + actual_results = icing.Search(search_spec_join_by_sender, + GetDefaultScoringSpec(), result_spec); EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores( expected_join_by_sender_search_result_proto)); } +TEST_F(IcingSearchEngineSchemaTest, + SetSchemaWithValidCycle_circularSchemaDefinitionNotAllowedFails) { + IcingSearchEngineOptions options = GetDefaultIcingOptions(); + options.set_allow_circular_schema_definitions(false); + IcingSearchEngine icing(options, GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + // Create schema with circular type definitions: A <-> B + SchemaProto schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder().SetType("A").AddProperty( + PropertyConfigBuilder() + .SetName("b") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("B", /*index_nested_properties=*/true))) + .AddType(SchemaTypeConfigBuilder().SetType("B").AddProperty( + PropertyConfigBuilder() + .SetName("a") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("A", /*index_nested_properties=*/false))) + .Build(); + + EXPECT_THAT( + icing.SetSchema(schema, /*ignore_errors_and_delete_documents=*/false) + .status(), + ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); +} + +TEST_F(IcingSearchEngineSchemaTest, + SetSchemaWithValidCycle_allowCircularSchemaDefinitionsOK) { + IcingSearchEngineOptions options = GetDefaultIcingOptions(); + options.set_allow_circular_schema_definitions(true); + IcingSearchEngine icing(options, GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + // Create schema with valid circular type definitions: A <-> B, B->A sets + // index_nested_properties=false + SchemaProto schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder().SetType("A").AddProperty( + PropertyConfigBuilder() + .SetName("b") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("B", /*index_nested_properties=*/true))) + .AddType(SchemaTypeConfigBuilder().SetType("B").AddProperty( + PropertyConfigBuilder() + .SetName("a") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("A", /*index_nested_properties=*/false))) + .Build(); + + EXPECT_THAT( + icing.SetSchema(schema, /*ignore_errors_and_delete_documents=*/false) + .status(), + ProtoStatusIs(StatusProto::OK)); +} + +TEST_F(IcingSearchEngineSchemaTest, + SetSchemaWithInvalidCycle_allowCircularSchemaDefinitionsFails) { + IcingSearchEngineOptions options = GetDefaultIcingOptions(); + options.set_allow_circular_schema_definitions(true); + IcingSearchEngine icing(options, GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + // Create schema with invalid circular type definitions: A <-> B, all edges + // set index_nested_properties=true + SchemaProto schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder().SetType("A").AddProperty( + PropertyConfigBuilder() + .SetName("b") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("B", /*index_nested_properties=*/true))) + .AddType(SchemaTypeConfigBuilder().SetType("B").AddProperty( + PropertyConfigBuilder() + .SetName("a") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("A", /*index_nested_properties=*/true))) + .Build(); + + EXPECT_THAT( + icing.SetSchema(schema, /*ignore_errors_and_delete_documents=*/false) + .status(), + ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); +} + TEST_F( IcingSearchEngineSchemaTest, ForceSetSchemaIndexedPropertyDeletionTriggersIndexRestorationAndReturnsOk) { @@ -1500,7 +1585,6 @@ TEST_F(IcingSearchEngineSchemaTest, search_spec.set_query("name:person"); search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); JoinSpecProto* join_spec = search_spec.mutable_join_spec(); - join_spec->set_max_joined_child_count(100); join_spec->set_parent_property_expression( std::string(JoinProcessor::kQualifiedIdExpr)); join_spec->set_child_property_expression("senderQualifiedId"); @@ -1514,9 +1598,12 @@ TEST_F(IcingSearchEngineSchemaTest, *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec(); *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance(); + ResultSpecProto result_spec = ResultSpecProto::default_instance(); + result_spec.set_max_joined_children_per_parent_to_return( + std::numeric_limits<int32_t>::max()); + SearchResultProto actual_results = - icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); + icing.Search(search_spec, GetDefaultScoringSpec(), result_spec); EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores( expected_search_result_proto)); @@ -1568,8 +1655,8 @@ TEST_F(IcingSearchEngineSchemaTest, // Verify join search: join a query for `name:person` with a child query for // `subject:tps` based on the child's `senderQualifiedId` field. We should // still be able to join person and email documents by this property. - actual_results = icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); + actual_results = + icing.Search(search_spec, GetDefaultScoringSpec(), result_spec); EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores( expected_search_result_proto)); } @@ -1800,7 +1887,6 @@ TEST_F( search_spec.set_query("name:person"); search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); JoinSpecProto* join_spec = search_spec.mutable_join_spec(); - join_spec->set_max_joined_child_count(100); join_spec->set_parent_property_expression( std::string(JoinProcessor::kQualifiedIdExpr)); join_spec->set_child_property_expression("senderQualifiedId"); @@ -1814,9 +1900,12 @@ TEST_F( *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec(); *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance(); + ResultSpecProto result_spec = ResultSpecProto::default_instance(); + result_spec.set_max_joined_children_per_parent_to_return( + std::numeric_limits<int32_t>::max()); + SearchResultProto actual_results = - icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); + icing.Search(search_spec, GetDefaultScoringSpec(), result_spec); EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores( expected_search_result_proto)); @@ -1870,8 +1959,8 @@ TEST_F( // Verify join search: join a query for `name:person` with a child query for // `subject:tps` based on the child's `senderQualifiedId` field. We should // still be able to join person and email documents by this property. - actual_results = icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); + actual_results = + icing.Search(search_spec, GetDefaultScoringSpec(), result_spec); EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores( expected_search_result_proto)); } diff --git a/icing/icing-search-engine_search_test.cc b/icing/icing-search-engine_search_test.cc index 63fb657..cada6c7 100644 --- a/icing/icing-search-engine_search_test.cc +++ b/icing/icing-search-engine_search_test.cc @@ -1118,6 +1118,99 @@ TEST_P(IcingSearchEngineSearchTest, SearchResultShouldBeRankedByDocumentScore) { expected_search_result_proto)); } +TEST_P(IcingSearchEngineSearchTest, SearchWorksForNestedSubtypeDocument) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + SchemaProto schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty( + PropertyConfigBuilder() + .SetName("name") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .AddType(SchemaTypeConfigBuilder() + .SetType("Artist") + .AddParentType("Person") + .AddProperty(PropertyConfigBuilder() + .SetName("name") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("emailAddress") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .AddType(SchemaTypeConfigBuilder().SetType("Company").AddProperty( + PropertyConfigBuilder() + .SetName("employee") + .SetDataTypeDocument("Person", + /*index_nested_properties=*/true) + .SetCardinality(CARDINALITY_REPEATED))) + .Build(); + ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); + + // Create a company with a person and an artist. + DocumentProto document_company = + DocumentBuilder() + .SetKey("namespace", "uri") + .SetCreationTimestampMs(1000) + .SetSchema("Company") + .AddDocumentProperty("employee", + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Person") + .AddStringProperty("name", "name_person") + .Build(), + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Artist") + .AddStringProperty("name", "name_artist") + .AddStringProperty("emailAddress", "email") + .Build()) + .Build(); + ASSERT_THAT(icing.Put(document_company).status(), ProtoIsOk()); + + SearchResultProto company_search_result_proto; + company_search_result_proto.mutable_status()->set_code(StatusProto::OK); + *company_search_result_proto.mutable_results()->Add()->mutable_document() = + document_company; + + SearchResultProto empty_search_result_proto; + empty_search_result_proto.mutable_status()->set_code(StatusProto::OK); + + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.set_search_type(GetParam()); + + // "name_person" should match the company. + search_spec.set_query("name_person"); + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + company_search_result_proto)); + + // "name_artist" should match the company. + search_spec.set_query("name_artist"); + search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + company_search_result_proto)); + + // "email" should not match the company even though the artist has a matched + // property. This is because the "employee" property is defined as Person + // type, and indexing on document properties should be based on defined types, + // instead of subtypes. + search_spec.set_query("email"); + search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( + empty_search_result_proto)); +} + TEST_P(IcingSearchEngineSearchTest, SearchShouldAllowNoScoring) { IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); @@ -3472,7 +3565,6 @@ TEST_P(IcingSearchEngineSearchTest, JoinQueryStatsProtoTest) { // JoinSpec JoinSpecProto* join_spec = search_spec.mutable_join_spec(); - join_spec->set_max_joined_child_count(100); join_spec->set_parent_property_expression( std::string(JoinProcessor::kQualifiedIdExpr)); join_spec->set_child_property_expression("personQualifiedId"); @@ -3496,6 +3588,8 @@ TEST_P(IcingSearchEngineSearchTest, JoinQueryStatsProtoTest) { // Parent ResultSpec ResultSpecProto result_spec; result_spec.set_num_per_page(1); + result_spec.set_max_joined_children_per_parent_to_return( + std::numeric_limits<int32_t>::max()); // Since we: // - Use MAX for aggregation scoring strategy. @@ -4050,7 +4144,6 @@ TEST_P(IcingSearchEngineSearchTest, JoinByQualifiedId) { // JoinSpec JoinSpecProto* join_spec = search_spec.mutable_join_spec(); - join_spec->set_max_joined_child_count(100); join_spec->set_parent_property_expression( std::string(JoinProcessor::kQualifiedIdExpr)); join_spec->set_child_property_expression("personQualifiedId"); @@ -4071,6 +4164,8 @@ TEST_P(IcingSearchEngineSearchTest, JoinByQualifiedId) { // Parent ResultSpec ResultSpecProto result_spec; result_spec.set_num_per_page(1); + result_spec.set_max_joined_children_per_parent_to_return( + std::numeric_limits<int32_t>::max()); // Since we: // - Use MAX for aggregation scoring strategy. @@ -4123,6 +4218,346 @@ TEST_P(IcingSearchEngineSearchTest, JoinByQualifiedId) { EqualsSearchResultIgnoreStatsAndScores(expected_result3)); } +TEST_P(IcingSearchEngineSearchTest, + JoinShouldLimitNumChildDocumentsByMaxJoinedChildPerParent) { + SchemaProto schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder() + .SetType("Person") + .AddProperty(PropertyConfigBuilder() + .SetName("firstName") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("lastName") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("emailAddress") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .AddType(SchemaTypeConfigBuilder() + .SetType("Email") + .AddProperty(PropertyConfigBuilder() + .SetName("subject") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("personQualifiedId") + .SetDataTypeJoinableString( + JOINABLE_VALUE_TYPE_QUALIFIED_ID) + .SetCardinality(CARDINALITY_OPTIONAL))) + .Build(); + + DocumentProto person1 = + DocumentBuilder() + .SetKey("pkg$db/namespace", "person1") + .SetSchema("Person") + .AddStringProperty("firstName", "first1") + .AddStringProperty("lastName", "last1") + .AddStringProperty("emailAddress", "email1@gmail.com") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .SetScore(1) + .Build(); + DocumentProto person2 = + DocumentBuilder() + .SetKey("pkg$db/namespace", "person2") + .SetSchema("Person") + .AddStringProperty("firstName", "first2") + .AddStringProperty("lastName", "last2") + .AddStringProperty("emailAddress", "email2@gmail.com") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .SetScore(2) + .Build(); + + DocumentProto email1 = + DocumentBuilder() + .SetKey("namespace", "email1") + .SetSchema("Email") + .AddStringProperty("subject", "test subject 1") + .AddStringProperty("personQualifiedId", "pkg$db/namespace#person1") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .SetScore(100) + .Build(); + DocumentProto email2 = + DocumentBuilder() + .SetKey("namespace", "email2") + .SetSchema("Email") + .AddStringProperty("subject", "test subject 2") + .AddStringProperty("personQualifiedId", "pkg$db/namespace#person2") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .SetScore(99) + .Build(); + DocumentProto email3 = + DocumentBuilder() + .SetKey("namespace", "email3") + .SetSchema("Email") + .AddStringProperty("subject", "test subject 3") + .AddStringProperty("personQualifiedId", "pkg$db/namespace#person2") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .SetScore(98) + .Build(); + DocumentProto email4 = + DocumentBuilder() + .SetKey("namespace", "email4") + .SetSchema("Email") + .AddStringProperty("subject", "test subject 4") + .AddStringProperty("personQualifiedId", "pkg$db/namespace#person2") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .SetScore(97) + .Build(); + + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(person1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(person2).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(email3).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(email4).status(), ProtoIsOk()); + + // Parent SearchSpec + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.set_query("firstName:first"); + search_spec.set_search_type(GetParam()); + + // JoinSpec + JoinSpecProto* join_spec = search_spec.mutable_join_spec(); + join_spec->set_parent_property_expression( + std::string(JoinProcessor::kQualifiedIdExpr)); + join_spec->set_child_property_expression("personQualifiedId"); + join_spec->set_aggregation_scoring_strategy( + JoinSpecProto::AggregationScoringStrategy::COUNT); + JoinSpecProto::NestedSpecProto* nested_spec = + join_spec->mutable_nested_spec(); + SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec(); + nested_search_spec->set_term_match_type(TermMatchType::PREFIX); + nested_search_spec->set_query("subject:test"); + nested_search_spec->set_search_type(GetParam()); + *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec(); + *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance(); + + // Parent ScoringSpec + ScoringSpecProto scoring_spec = GetDefaultScoringSpec(); + + // Parent ResultSpec with max_joined_children_per_parent_to_return = 2 + ResultSpecProto result_spec; + result_spec.set_num_per_page(1); + result_spec.set_max_joined_children_per_parent_to_return(2); + + // - Use COUNT for aggregation scoring strategy. + // - max_joined_children_per_parent_to_return = 2. + // - (Default) use DESC as the ranking order. + // + // person2 should have the highest aggregated score (3) since email2, email3, + // email4 are joined to it and the COUNT aggregated score is 3. However, only + // email2 and email3 should be attached to person2 due to + // max_joined_children_per_parent_to_return limitation in result_spec. + // person1 should be the second (aggregated score = 1). + SearchResultProto::ResultProto expected_result_proto1; + *expected_result_proto1.mutable_document() = person2; + expected_result_proto1.set_score(3); + SearchResultProto::ResultProto* child_result_proto1 = + expected_result_proto1.mutable_joined_results()->Add(); + *child_result_proto1->mutable_document() = email2; + child_result_proto1->set_score(99); + SearchResultProto::ResultProto* child_result_google::protobuf = + expected_result_proto1.mutable_joined_results()->Add(); + *child_result_google::protobuf->mutable_document() = email3; + child_result_google::protobuf->set_score(98); + + SearchResultProto::ResultProto expected_result_google::protobuf; + *expected_result_google::protobuf.mutable_document() = person1; + expected_result_google::protobuf.set_score(1); + SearchResultProto::ResultProto* child_result_proto3 = + expected_result_google::protobuf.mutable_joined_results()->Add(); + *child_result_proto3->mutable_document() = email1; + child_result_proto3->set_score(100); + + SearchResultProto result1 = + icing.Search(search_spec, scoring_spec, result_spec); + uint64_t next_page_token = result1.next_page_token(); + EXPECT_THAT(next_page_token, Ne(kInvalidNextPageToken)); + EXPECT_THAT(result1.results(), + ElementsAre(EqualsProto(expected_result_proto1))); + + SearchResultProto result2 = icing.GetNextPage(next_page_token); + next_page_token = result2.next_page_token(); + EXPECT_THAT(next_page_token, Eq(kInvalidNextPageToken)); + EXPECT_THAT(result2.results(), + ElementsAre(EqualsProto(expected_result_google::protobuf))); +} + +TEST_P(IcingSearchEngineSearchTest, JoinWithZeroMaxJoinedChildPerParent) { + SchemaProto schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder() + .SetType("Person") + .AddProperty(PropertyConfigBuilder() + .SetName("firstName") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("lastName") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("emailAddress") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .AddType(SchemaTypeConfigBuilder() + .SetType("Email") + .AddProperty(PropertyConfigBuilder() + .SetName("subject") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("personQualifiedId") + .SetDataTypeJoinableString( + JOINABLE_VALUE_TYPE_QUALIFIED_ID) + .SetCardinality(CARDINALITY_OPTIONAL))) + .Build(); + + DocumentProto person1 = + DocumentBuilder() + .SetKey("pkg$db/namespace", "person1") + .SetSchema("Person") + .AddStringProperty("firstName", "first1") + .AddStringProperty("lastName", "last1") + .AddStringProperty("emailAddress", "email1@gmail.com") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .SetScore(1) + .Build(); + DocumentProto person2 = + DocumentBuilder() + .SetKey("pkg$db/namespace", "person2") + .SetSchema("Person") + .AddStringProperty("firstName", "first2") + .AddStringProperty("lastName", "last2") + .AddStringProperty("emailAddress", "email2@gmail.com") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .SetScore(2) + .Build(); + + DocumentProto email1 = + DocumentBuilder() + .SetKey("namespace", "email1") + .SetSchema("Email") + .AddStringProperty("subject", "test subject 1") + .AddStringProperty("personQualifiedId", "pkg$db/namespace#person1") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .SetScore(100) + .Build(); + DocumentProto email2 = + DocumentBuilder() + .SetKey("namespace", "email2") + .SetSchema("Email") + .AddStringProperty("subject", "test subject 2") + .AddStringProperty("personQualifiedId", "pkg$db/namespace#person2") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .SetScore(99) + .Build(); + DocumentProto email3 = + DocumentBuilder() + .SetKey("namespace", "email3") + .SetSchema("Email") + .AddStringProperty("subject", "test subject 3") + .AddStringProperty("personQualifiedId", "pkg$db/namespace#person2") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .SetScore(98) + .Build(); + DocumentProto email4 = + DocumentBuilder() + .SetKey("namespace", "email4") + .SetSchema("Email") + .AddStringProperty("subject", "test subject 4") + .AddStringProperty("personQualifiedId", "pkg$db/namespace#person2") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .SetScore(97) + .Build(); + + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(person1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(person2).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(email3).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(email4).status(), ProtoIsOk()); + + // Parent SearchSpec + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.set_query("firstName:first"); + search_spec.set_search_type(GetParam()); + + // JoinSpec + JoinSpecProto* join_spec = search_spec.mutable_join_spec(); + join_spec->set_parent_property_expression( + std::string(JoinProcessor::kQualifiedIdExpr)); + join_spec->set_child_property_expression("personQualifiedId"); + join_spec->set_aggregation_scoring_strategy( + JoinSpecProto::AggregationScoringStrategy::COUNT); + JoinSpecProto::NestedSpecProto* nested_spec = + join_spec->mutable_nested_spec(); + SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec(); + nested_search_spec->set_term_match_type(TermMatchType::PREFIX); + nested_search_spec->set_query("subject:test"); + nested_search_spec->set_search_type(GetParam()); + *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec(); + *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance(); + + // Parent ScoringSpec + ScoringSpecProto scoring_spec = GetDefaultScoringSpec(); + + // Parent ResultSpec with max_joined_children_per_parent_to_return = 0 + ResultSpecProto result_spec; + result_spec.set_num_per_page(1); + result_spec.set_max_joined_children_per_parent_to_return(0); + + // - Use COUNT for aggregation scoring strategy. + // - max_joined_children_per_parent_to_return = 0. + // - (Default) use DESC as the ranking order. + // + // person2 should have the highest aggregated score (3) since email2, email3, + // email4 are joined to it and the COUNT aggregated score is 3. However, no + // child documents should be attached to person2 due to + // max_joined_children_per_parent_to_return limitation in result_spec. + // person1 should be the second (aggregated score = 1) with no attached child + // documents. + SearchResultProto::ResultProto expected_result_proto1; + *expected_result_proto1.mutable_document() = person2; + expected_result_proto1.set_score(3); + + SearchResultProto::ResultProto expected_result_google::protobuf; + *expected_result_google::protobuf.mutable_document() = person1; + expected_result_google::protobuf.set_score(1); + + SearchResultProto result1 = + icing.Search(search_spec, scoring_spec, result_spec); + uint64_t next_page_token = result1.next_page_token(); + EXPECT_THAT(next_page_token, Ne(kInvalidNextPageToken)); + EXPECT_THAT(result1.results(), + ElementsAre(EqualsProto(expected_result_proto1))); + + SearchResultProto result2 = icing.GetNextPage(next_page_token); + next_page_token = result2.next_page_token(); + EXPECT_THAT(next_page_token, Eq(kInvalidNextPageToken)); + EXPECT_THAT(result2.results(), + ElementsAre(EqualsProto(expected_result_google::protobuf))); +} + TEST_P(IcingSearchEngineSearchTest, JoinSnippet) { SchemaProto schema = SchemaBuilder() @@ -4192,7 +4627,6 @@ TEST_P(IcingSearchEngineSearchTest, JoinSnippet) { // JoinSpec JoinSpecProto* join_spec = search_spec.mutable_join_spec(); - join_spec->set_max_joined_child_count(100); join_spec->set_parent_property_expression( std::string(JoinProcessor::kQualifiedIdExpr)); join_spec->set_child_property_expression("personQualifiedId"); @@ -4217,6 +4651,8 @@ TEST_P(IcingSearchEngineSearchTest, JoinSnippet) { // Parent ResultSpec (without snippet) ResultSpecProto result_spec; result_spec.set_num_per_page(1); + result_spec.set_max_joined_children_per_parent_to_return( + std::numeric_limits<int32_t>::max()); SearchResultProto result = icing.Search(search_spec, scoring_spec, result_spec); @@ -4315,7 +4751,6 @@ TEST_P(IcingSearchEngineSearchTest, JoinProjection) { // JoinSpec JoinSpecProto* join_spec = search_spec.mutable_join_spec(); - join_spec->set_max_joined_child_count(100); join_spec->set_parent_property_expression( std::string(JoinProcessor::kQualifiedIdExpr)); join_spec->set_child_property_expression("personQualifiedId"); @@ -4341,6 +4776,8 @@ TEST_P(IcingSearchEngineSearchTest, JoinProjection) { // Parent ResultSpec (with projection) ResultSpecProto result_spec; result_spec.set_num_per_page(1); + result_spec.set_max_joined_children_per_parent_to_return( + std::numeric_limits<int32_t>::max()); type_property_mask = result_spec.add_type_property_masks(); type_property_mask->set_schema_type("Person"); type_property_mask->add_paths("emailAddress"); @@ -4518,7 +4955,6 @@ TEST_F(IcingSearchEngineSearchTest, JoinWithAdvancedScoring) { // JoinSpec JoinSpecProto* join_spec = search_spec.mutable_join_spec(); - join_spec->set_max_joined_child_count(100); join_spec->set_parent_property_expression( std::string(JoinProcessor::kQualifiedIdExpr)); join_spec->set_child_property_expression("personQualifiedId"); @@ -4533,6 +4969,8 @@ TEST_F(IcingSearchEngineSearchTest, JoinWithAdvancedScoring) { // Parent ResultSpec ResultSpecProto result_spec; result_spec.set_num_per_page(1); + result_spec.set_max_joined_children_per_parent_to_return( + std::numeric_limits<int32_t>::max()); SearchResultProto results = icing.Search(search_spec, parent_scoring_spec, result_spec); diff --git a/icing/icing-search-engine_test.cc b/icing/icing-search-engine_test.cc index 1340ebb..ddb83a8 100644 --- a/icing/icing-search-engine_test.cc +++ b/icing/icing-search-engine_test.cc @@ -428,6 +428,408 @@ TEST_F(IcingSearchEngineTest, EqualsProto(expected_get_result_proto)); } +TEST_F(IcingSearchEngineTest, GetDocumentProjectionPolymorphism) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + SchemaProto schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder() + .SetType("Person") + .AddProperty(PropertyConfigBuilder() + .SetName("name") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("emailAddress") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .AddType(SchemaTypeConfigBuilder() + .SetType("Artist") + .AddParentType("Person") + .AddProperty(PropertyConfigBuilder() + .SetName("name") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("emailAddress") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("company") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .Build(); + ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); + + // Add a person document and an artist document + DocumentProto document_person = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Person") + .AddStringProperty("name", "Meg Ryan") + .AddStringProperty("emailAddress", "shopgirl@aol.com") + .Build(); + DocumentProto document_artist = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Artist") + .AddStringProperty("name", "Meg Artist") + .AddStringProperty("emailAddress", "artist@aol.com") + .AddStringProperty("company", "aol") + .Build(); + ASSERT_THAT(icing.Put(document_person).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document_artist).status(), ProtoIsOk()); + + // Add type property masks + GetResultSpecProto result_spec; + TypePropertyMask* person_type_property_mask = + result_spec.add_type_property_masks(); + person_type_property_mask->set_schema_type("Person"); + person_type_property_mask->add_paths("name"); + // Since Artist is a child type of Person, the TypePropertyMask for Person + // will be merged to Artist's TypePropertyMask by polymorphism, so that 'name' + // will also show in Artist's projection results. + TypePropertyMask* artist_type_property_mask = + result_spec.add_type_property_masks(); + artist_type_property_mask->set_schema_type("Artist"); + artist_type_property_mask->add_paths("emailAddress"); + + // Verify that the returned person document only contains the 'name' property, + // and the returned artist document contain both the 'name' and 'emailAddress' + // properties. + GetResultProto expected_get_result_proto; + expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_get_result_proto.mutable_document() = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Person") + .AddStringProperty("name", "Meg Ryan") + .Build(); + ASSERT_THAT(icing.Get("namespace", "uri1", result_spec), + EqualsProto(expected_get_result_proto)); + + *expected_get_result_proto.mutable_document() = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Artist") + .AddStringProperty("name", "Meg Artist") + .AddStringProperty("emailAddress", "artist@aol.com") + .Build(); + ASSERT_THAT(icing.Get("namespace", "uri2", result_spec), + EqualsProto(expected_get_result_proto)); +} + +TEST_F(IcingSearchEngineTest, GetDocumentProjectionMultipleParentPolymorphism) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + SchemaProto schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder() + .SetType("Email") + .AddProperty(PropertyConfigBuilder() + .SetName("sender") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("recipient") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .AddType(SchemaTypeConfigBuilder() + .SetType("Message") + .AddProperty(PropertyConfigBuilder() + .SetName("content") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("note") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .AddType(SchemaTypeConfigBuilder() + .SetType("EmailMessage") + .AddParentType("Email") + .AddParentType("Message") + .AddProperty(PropertyConfigBuilder() + .SetName("sender") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("recipient") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("content") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("note") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .Build(); + ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); + + // Add an email document and a message document + DocumentProto document_email = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddStringProperty("sender", "sender1") + .AddStringProperty("recipient", "recipient1") + .Build(); + DocumentProto document_message = DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Message") + .AddStringProperty("content", "content1") + .AddStringProperty("note", "note1") + .Build(); + // Add an emailMessage document + DocumentProto document_email_message = + DocumentBuilder() + .SetKey("namespace", "uri3") + .SetCreationTimestampMs(1000) + .SetSchema("EmailMessage") + .AddStringProperty("sender", "sender2") + .AddStringProperty("recipient", "recipient2") + .AddStringProperty("content", "content2") + .AddStringProperty("note", "note2") + .Build(); + + ASSERT_THAT(icing.Put(document_email).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document_message).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document_email_message).status(), ProtoIsOk()); + + // Add type property masks for Email and Message, and both of them will apply + // to EmailMessage. + GetResultSpecProto result_spec; + TypePropertyMask* email_type_property_mask = + result_spec.add_type_property_masks(); + email_type_property_mask->set_schema_type("Email"); + email_type_property_mask->add_paths("sender"); + + TypePropertyMask* message_type_property_mask = + result_spec.add_type_property_masks(); + message_type_property_mask->set_schema_type("Message"); + message_type_property_mask->add_paths("content"); + + // Verify that + // - The returned email document only contains the 'sender' property. + // - The returned message document only contains the 'content' property. + // - The returned email message document contains both the 'sender' and + // 'content' properties, + GetResultProto expected_get_result_proto; + expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_get_result_proto.mutable_document() = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddStringProperty("sender", "sender1") + .Build(); + ASSERT_THAT(icing.Get("namespace", "uri1", result_spec), + EqualsProto(expected_get_result_proto)); + + *expected_get_result_proto.mutable_document() = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Message") + .AddStringProperty("content", "content1") + .Build(); + ASSERT_THAT(icing.Get("namespace", "uri2", result_spec), + EqualsProto(expected_get_result_proto)); + + *expected_get_result_proto.mutable_document() = + DocumentBuilder() + .SetKey("namespace", "uri3") + .SetCreationTimestampMs(1000) + .SetSchema("EmailMessage") + .AddStringProperty("sender", "sender2") + .AddStringProperty("content", "content2") + .Build(); + ASSERT_THAT(icing.Get("namespace", "uri3", result_spec), + EqualsProto(expected_get_result_proto)); +} + +TEST_F(IcingSearchEngineTest, GetDocumentProjectionDiamondPolymorphism) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + // Create a schema with a diamond inheritance relation. + // Object + // / \ + // Email Message + // \ / + // EmailMessage + SchemaProto schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder().SetType("Object").AddProperty( + PropertyConfigBuilder() + .SetName("objectId") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .AddType(SchemaTypeConfigBuilder() + .SetType("Email") + .AddParentType("Object") + .AddProperty(PropertyConfigBuilder() + .SetName("objectId") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("sender") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("recipient") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .AddType(SchemaTypeConfigBuilder() + .SetType("Message") + .AddParentType("Object") + .AddProperty(PropertyConfigBuilder() + .SetName("objectId") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("content") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("note") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .AddType(SchemaTypeConfigBuilder() + .SetType("EmailMessage") + .AddParentType("Email") + .AddParentType("Message") + .AddProperty(PropertyConfigBuilder() + .SetName("objectId") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("sender") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("recipient") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("content") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("note") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .Build(); + ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); + + // Add an email document and a message document + DocumentProto document_email = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddStringProperty("objectId", "object1") + .AddStringProperty("sender", "sender1") + .AddStringProperty("recipient", "recipient1") + .Build(); + DocumentProto document_message = DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Message") + .AddStringProperty("objectId", "object2") + .AddStringProperty("content", "content1") + .AddStringProperty("note", "note1") + .Build(); + // Add an emailMessage document + DocumentProto document_email_message = + DocumentBuilder() + .SetKey("namespace", "uri3") + .SetCreationTimestampMs(1000) + .SetSchema("EmailMessage") + .AddStringProperty("objectId", "object3") + .AddStringProperty("sender", "sender2") + .AddStringProperty("recipient", "recipient2") + .AddStringProperty("content", "content2") + .AddStringProperty("note", "note2") + .Build(); + + ASSERT_THAT(icing.Put(document_email).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document_message).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document_email_message).status(), ProtoIsOk()); + + // Add type property masks for Object, which should apply to Email, Message + // and EmailMessage. + GetResultSpecProto result_spec; + TypePropertyMask* email_type_property_mask = + result_spec.add_type_property_masks(); + email_type_property_mask->set_schema_type("Object"); + email_type_property_mask->add_paths("objectId"); + + // Verify that all the documents only contain the 'objectId' property. + GetResultProto expected_get_result_proto; + expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); + *expected_get_result_proto.mutable_document() = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddStringProperty("objectId", "object1") + .Build(); + ASSERT_THAT(icing.Get("namespace", "uri1", result_spec), + EqualsProto(expected_get_result_proto)); + + *expected_get_result_proto.mutable_document() = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Message") + .AddStringProperty("objectId", "object2") + .Build(); + ASSERT_THAT(icing.Get("namespace", "uri2", result_spec), + EqualsProto(expected_get_result_proto)); + + *expected_get_result_proto.mutable_document() = + DocumentBuilder() + .SetKey("namespace", "uri3") + .SetCreationTimestampMs(1000) + .SetSchema("EmailMessage") + .AddStringProperty("objectId", "object3") + .Build(); + ASSERT_THAT(icing.Get("namespace", "uri3", result_spec), + EqualsProto(expected_get_result_proto)); +} + TEST_F(IcingSearchEngineTest, OlderUsageTimestampShouldNotOverrideNewerOnes) { IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); diff --git a/icing/index/index-processor_benchmark.cc b/icing/index/index-processor_benchmark.cc index ee43364..1cbe00d 100644 --- a/icing/index/index-processor_benchmark.cc +++ b/icing/index/index-processor_benchmark.cc @@ -172,7 +172,9 @@ std::unique_ptr<SchemaStore> CreateSchemaStore(const Filesystem& filesystem, SchemaProto schema; CreateFakeTypeConfig(schema.add_types()); - auto set_schema_status = schema_store->SetSchema(schema); + auto set_schema_status = schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false); if (!set_schema_status.ok()) { ICING_LOG(ERROR) << set_schema_status.status().error_message(); diff --git a/icing/index/index-processor_test.cc b/icing/index/index-processor_test.cc index 9453e58..ed9e856 100644 --- a/icing/index/index-processor_test.cc +++ b/icing/index/index-processor_test.cc @@ -40,7 +40,7 @@ #include "icing/index/numeric/numeric-index.h" #include "icing/index/string-section-indexing-handler.h" #include "icing/index/term-property-id.h" -#include "icing/join/qualified-id-joinable-property-indexing-handler.h" +#include "icing/join/qualified-id-join-indexing-handler.h" #include "icing/join/qualified-id-type-joinable-index.h" #include "icing/legacy/index/icing-filesystem.h" #include "icing/legacy/index/icing-mock-filesystem.h" @@ -267,7 +267,9 @@ class IndexProcessorTest : public Test { TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_OPTIONAL))) .Build(); - ICING_ASSERT_OK(schema_store_->SetSchema(schema)); + ICING_ASSERT_OK(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(doc_store_dir_.c_str())); ICING_ASSERT_OK_AND_ASSIGN( @@ -291,10 +293,10 @@ class IndexProcessorTest : public Test { IntegerSectionIndexingHandler::Create( &fake_clock_, integer_index_.get())); ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<QualifiedIdJoinablePropertyIndexingHandler> + std::unique_ptr<QualifiedIdJoinIndexingHandler> qualified_id_joinable_property_indexing_handler, - QualifiedIdJoinablePropertyIndexingHandler::Create( - &fake_clock_, qualified_id_join_index_.get())); + QualifiedIdJoinIndexingHandler::Create(&fake_clock_, + qualified_id_join_index_.get())); std::vector<std::unique_ptr<DataIndexingHandler>> handlers; handlers.push_back(std::move(string_section_indexing_handler)); handlers.push_back(std::move(integer_section_indexing_handler)); @@ -823,10 +825,10 @@ TEST_F(IndexProcessorTest, OutOfOrderDocumentIdsInRecoveryMode) { IntegerSectionIndexingHandler::Create( &fake_clock_, integer_index_.get())); ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<QualifiedIdJoinablePropertyIndexingHandler> + std::unique_ptr<QualifiedIdJoinIndexingHandler> qualified_id_joinable_property_indexing_handler, - QualifiedIdJoinablePropertyIndexingHandler::Create( - &fake_clock_, qualified_id_join_index_.get())); + QualifiedIdJoinIndexingHandler::Create(&fake_clock_, + qualified_id_join_index_.get())); std::vector<std::unique_ptr<DataIndexingHandler>> handlers; handlers.push_back(std::move(string_section_indexing_handler)); handlers.push_back(std::move(integer_section_indexing_handler)); diff --git a/icing/index/index.cc b/icing/index/index.cc index 5cfcd27..19edbb6 100644 --- a/icing/index/index.cc +++ b/icing/index/index.cc @@ -163,6 +163,12 @@ libtextclassifier3::StatusOr<std::unique_ptr<Index>> Index::Create( std::move(main_index), filesystem)); } +/* static */ libtextclassifier3::StatusOr<int> Index::ReadFlashIndexMagic( + const Filesystem* filesystem, const std::string& base_dir) { + return MainIndex::ReadFlashIndexMagic(filesystem, + MakeMainIndexFilepath(base_dir)); +} + libtextclassifier3::Status Index::TruncateTo(DocumentId document_id) { if (lite_index_->last_added_document_id() != kInvalidDocumentId && lite_index_->last_added_document_id() > document_id) { diff --git a/icing/index/index.h b/icing/index/index.h index 3200d70..c170278 100644 --- a/icing/index/index.h +++ b/icing/index/index.h @@ -86,6 +86,16 @@ class Index { const Options& options, const Filesystem* filesystem, const IcingFilesystem* icing_filesystem); + // Reads magic from existing flash (main) index file header. We need this + // during Icing initialization phase to determine the version. + // + // Returns + // Valid magic on success + // NOT_FOUND if the lite index doesn't exist + // INTERNAL on I/O error + static libtextclassifier3::StatusOr<int> ReadFlashIndexMagic( + const Filesystem* filesystem, const std::string& base_dir); + // Clears all files created by the index. Returns OK if all files were // cleared. libtextclassifier3::Status Reset() { diff --git a/icing/index/integer-section-indexing-handler.cc b/icing/index/integer-section-indexing-handler.cc index 584f028..63b09df 100644 --- a/icing/index/integer-section-indexing-handler.cc +++ b/icing/index/integer-section-indexing-handler.cc @@ -16,12 +16,19 @@ #include <cstdint> #include <memory> +#include <utility> #include "icing/text_classifier/lib3/utils/base/status.h" #include "icing/text_classifier/lib3/utils/base/statusor.h" +#include "icing/absl_ports/canonical_errors.h" +#include "icing/index/numeric/numeric-index.h" +#include "icing/legacy/core/icing-string-util.h" +#include "icing/proto/logging.pb.h" #include "icing/schema/section.h" #include "icing/store/document-id.h" +#include "icing/util/clock.h" #include "icing/util/logging.h" +#include "icing/util/status-macros.h" #include "icing/util/tokenized-document.h" namespace icing { @@ -41,7 +48,7 @@ IntegerSectionIndexingHandler::Create(const Clock* clock, libtextclassifier3::Status IntegerSectionIndexingHandler::Handle( const TokenizedDocument& tokenized_document, DocumentId document_id, bool recovery_mode, PutDocumentStatsProto* put_document_stats) { - // TODO(b/259744228): set integer indexing latency and other stats + std::unique_ptr<Timer> index_timer = clock_.GetNewTimer(); if (!IsDocumentIdValid(document_id)) { return absl_ports::InvalidArgumentError( @@ -93,6 +100,11 @@ libtextclassifier3::Status IntegerSectionIndexingHandler::Handle( } } + if (put_document_stats != nullptr) { + put_document_stats->set_integer_index_latency_ms( + index_timer->GetElapsedMilliseconds()); + } + return status; } diff --git a/icing/index/integer-section-indexing-handler_test.cc b/icing/index/integer-section-indexing-handler_test.cc index 895fe57..706856c 100644 --- a/icing/index/integer-section-indexing-handler_test.cc +++ b/icing/index/integer-section-indexing-handler_test.cc @@ -156,7 +156,9 @@ class IntegerSectionIndexingHandlerTest : public ::testing::Test { TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_OPTIONAL))) .Build(); - ICING_ASSERT_OK(schema_store_->SetSchema(schema)); + ICING_ASSERT_OK(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); ASSERT_TRUE( filesystem_.CreateDirectoryRecursively(document_store_dir_.c_str())); diff --git a/icing/index/iterator/doc-hit-info-iterator-filter.cc b/icing/index/iterator/doc-hit-info-iterator-filter.cc index 83a73a4..2c0c2c2 100644 --- a/icing/index/iterator/doc-hit-info-iterator-filter.cc +++ b/icing/index/iterator/doc-hit-info-iterator-filter.cc @@ -55,11 +55,16 @@ DocHitInfoIteratorFilter::DocHitInfoIteratorFilter( // Precompute all the SchemaTypeIds for (std::string_view schema_type : options_.schema_types) { - auto schema_type_id_or = schema_store_.GetSchemaTypeId(schema_type); + libtextclassifier3::StatusOr<const std::unordered_set<SchemaTypeId>*> + schema_type_ids_or = + schema_store_.GetSchemaTypeIdsWithChildren(schema_type); // If we can't find the SchemaTypeId, just throw it away - if (schema_type_id_or.ok()) { - target_schema_type_ids_.emplace(schema_type_id_or.ValueOrDie()); + if (schema_type_ids_or.ok()) { + const std::unordered_set<SchemaTypeId>* schema_type_ids = + schema_type_ids_or.ValueOrDie(); + target_schema_type_ids_.insert(schema_type_ids->begin(), + schema_type_ids->end()); } } } diff --git a/icing/index/iterator/doc-hit-info-iterator-filter_test.cc b/icing/index/iterator/doc-hit-info-iterator-filter_test.cc index 0900e1f..4b86cae 100644 --- a/icing/index/iterator/doc-hit-info-iterator-filter_test.cc +++ b/icing/index/iterator/doc-hit-info-iterator-filter_test.cc @@ -17,6 +17,7 @@ #include <limits> #include <memory> #include <string> +#include <string_view> #include <utility> #include <vector> @@ -80,7 +81,9 @@ class DocHitInfoIteratorDeletedFilterTest : public ::testing::Test { ICING_ASSERT_OK_AND_ASSIGN( schema_store_, SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_)); - ICING_ASSERT_OK(schema_store_->SetSchema(schema)); + ICING_ASSERT_OK(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, @@ -247,7 +250,9 @@ class DocHitInfoIteratorNamespaceFilterTest : public ::testing::Test { ICING_ASSERT_OK_AND_ASSIGN( schema_store_, SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_)); - ICING_ASSERT_OK(schema_store_->SetSchema(schema)); + ICING_ASSERT_OK(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, @@ -379,30 +384,52 @@ TEST_F(DocHitInfoIteratorNamespaceFilterTest, FilterForMultipleNamespacesOk) { class DocHitInfoIteratorSchemaTypeFilterTest : public ::testing::Test { protected: + static constexpr std::string_view kSchema1 = "email"; + static constexpr std::string_view kSchema2 = "message"; + static constexpr std::string_view kSchema3 = "person"; + static constexpr std::string_view kSchema4 = "artist"; + static constexpr std::string_view kSchema5 = "emailMessage"; + DocHitInfoIteratorSchemaTypeFilterTest() : test_dir_(GetTestTempDir() + "/icing") {} void SetUp() override { filesystem_.CreateDirectoryRecursively(test_dir_.c_str()); - document1_schema1_ = - DocumentBuilder().SetKey("namespace", "1").SetSchema(schema1_).Build(); - document2_schema2_ = - DocumentBuilder().SetKey("namespace", "2").SetSchema(schema2_).Build(); - document3_schema3_ = - DocumentBuilder().SetKey("namespace", "3").SetSchema(schema3_).Build(); - document4_schema1_ = - DocumentBuilder().SetKey("namespace", "4").SetSchema(schema1_).Build(); + document1_schema1_ = DocumentBuilder() + .SetKey("namespace", "1") + .SetSchema(std::string(kSchema1)) + .Build(); + document2_schema2_ = DocumentBuilder() + .SetKey("namespace", "2") + .SetSchema(std::string(kSchema2)) + .Build(); + document3_schema3_ = DocumentBuilder() + .SetKey("namespace", "3") + .SetSchema(std::string(kSchema3)) + .Build(); + document4_schema1_ = DocumentBuilder() + .SetKey("namespace", "4") + .SetSchema(std::string(kSchema1)) + .Build(); SchemaProto schema = SchemaBuilder() - .AddType(SchemaTypeConfigBuilder().SetType(schema1_)) - .AddType(SchemaTypeConfigBuilder().SetType(schema2_)) - .AddType(SchemaTypeConfigBuilder().SetType(schema3_)) + .AddType(SchemaTypeConfigBuilder().SetType(kSchema1)) + .AddType(SchemaTypeConfigBuilder().SetType(kSchema2)) + .AddType(SchemaTypeConfigBuilder().SetType(kSchema3)) + .AddType(SchemaTypeConfigBuilder().SetType(kSchema4).AddParentType( + kSchema3)) + .AddType(SchemaTypeConfigBuilder() + .SetType(std::string(kSchema5)) + .AddParentType(kSchema1) + .AddParentType(kSchema2)) .Build(); ICING_ASSERT_OK_AND_ASSIGN( schema_store_, SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_)); - ICING_ASSERT_OK(schema_store_->SetSchema(schema)); + ICING_ASSERT_OK(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, @@ -424,9 +451,6 @@ class DocHitInfoIteratorSchemaTypeFilterTest : public ::testing::Test { FakeClock fake_clock_; const Filesystem filesystem_; const std::string test_dir_; - const std::string schema1_ = "email"; - const std::string schema2_ = "message"; - const std::string schema3_ = "person"; DocumentProto document1_schema1_; DocumentProto document2_schema2_; DocumentProto document3_schema3_; @@ -495,7 +519,7 @@ TEST_F(DocHitInfoIteratorSchemaTypeFilterTest, std::unique_ptr<DocHitInfoIterator> original_iterator = std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos); - options_.schema_types = std::vector<std::string_view>{schema1_}; + options_.schema_types = std::vector<std::string_view>{kSchema1}; DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator), document_store_.get(), schema_store_.get(), options_); @@ -518,7 +542,7 @@ TEST_F(DocHitInfoIteratorSchemaTypeFilterTest, FilterForMultipleSchemaTypesOk) { std::unique_ptr<DocHitInfoIterator> original_iterator = std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos); - options_.schema_types = std::vector<std::string_view>{schema2_, schema3_}; + options_.schema_types = std::vector<std::string_view>{kSchema2, kSchema3}; DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator), document_store_.get(), schema_store_.get(), options_); @@ -527,6 +551,110 @@ TEST_F(DocHitInfoIteratorSchemaTypeFilterTest, FilterForMultipleSchemaTypesOk) { ElementsAre(document_id2, document_id3)); } +TEST_F(DocHitInfoIteratorSchemaTypeFilterTest, + FilterForSchemaTypePolymorphismOk) { + // Add some irrelevant documents. + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, + document_store_->Put(document1_schema1_)); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, + document_store_->Put(document2_schema2_)); + + // Create a person document and an artist document, where the artist should be + // able to be interpreted as a person by polymorphism. + ICING_ASSERT_OK_AND_ASSIGN( + DocumentId person_document_id, + document_store_->Put(DocumentBuilder() + .SetKey("namespace", "person") + .SetSchema("person") + .Build())); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentId artist_document_id, + document_store_->Put(DocumentBuilder() + .SetKey("namespace", "artist") + .SetSchema("artist") + .Build())); + + std::vector<DocHitInfo> doc_hit_infos = { + DocHitInfo(document_id1), DocHitInfo(document_id2), + DocHitInfo(person_document_id), DocHitInfo(artist_document_id)}; + + // Filters for the "person" type should also include the "artist" type. + std::unique_ptr<DocHitInfoIterator> original_iterator = + std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos); + options_.schema_types = {"person"}; + DocHitInfoIteratorFilter filtered_iterator_1(std::move(original_iterator), + document_store_.get(), + schema_store_.get(), options_); + EXPECT_THAT(GetDocumentIds(&filtered_iterator_1), + ElementsAre(person_document_id, artist_document_id)); + + // Filters for the "artist" type should not include the "person" type. + original_iterator = std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos); + options_.schema_types = {"artist"}; + DocHitInfoIteratorFilter filtered_iterator_2(std::move(original_iterator), + document_store_.get(), + schema_store_.get(), options_); + EXPECT_THAT(GetDocumentIds(&filtered_iterator_2), + ElementsAre(artist_document_id)); +} + +TEST_F(DocHitInfoIteratorSchemaTypeFilterTest, + FilterForSchemaTypeMultipleParentPolymorphismOk) { + // Create an email and a message document. + ICING_ASSERT_OK_AND_ASSIGN( + DocumentId email_document_id, + document_store_->Put(DocumentBuilder() + .SetKey("namespace", "email") + .SetSchema("email") + .Build())); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentId message_document_id, + document_store_->Put(DocumentBuilder() + .SetKey("namespace", "message") + .SetSchema("message") + .Build())); + + // Create a emailMessage document, which the should be able to be interpreted + // as both an email and a message by polymorphism. + ICING_ASSERT_OK_AND_ASSIGN( + DocumentId email_message_document_id, + document_store_->Put(DocumentBuilder() + .SetKey("namespace", "emailMessage") + .SetSchema("emailMessage") + .Build())); + + std::vector<DocHitInfo> doc_hit_infos = { + DocHitInfo(email_document_id), DocHitInfo(message_document_id), + DocHitInfo(email_message_document_id)}; + + // Filters for the "email" type should also include the "emailMessage" type. + std::unique_ptr<DocHitInfoIterator> original_iterator = + std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos); + options_.schema_types = std::vector<std::string_view>{"email"}; + DocHitInfoIteratorFilter filtered_iterator_1(std::move(original_iterator), + document_store_.get(), + schema_store_.get(), options_); + EXPECT_THAT(GetDocumentIds(&filtered_iterator_1), + ElementsAre(email_document_id, email_message_document_id)); + + // Filters for the "message" type should also include the "emailMessage" type. + original_iterator = std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos); + options_.schema_types = std::vector<std::string_view>{"message"}; + DocHitInfoIteratorFilter filtered_iterator_2(std::move(original_iterator), + document_store_.get(), + schema_store_.get(), options_); + EXPECT_THAT(GetDocumentIds(&filtered_iterator_2), + ElementsAre(message_document_id, email_message_document_id)); + + // Filters for a irrelevant type should return nothing. + original_iterator = std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos); + options_.schema_types = std::vector<std::string_view>{"person"}; + DocHitInfoIteratorFilter filtered_iterator_3(std::move(original_iterator), + document_store_.get(), + schema_store_.get(), options_); + EXPECT_THAT(GetDocumentIds(&filtered_iterator_3), IsEmpty()); +} + class DocHitInfoIteratorExpirationFilterTest : public ::testing::Test { protected: DocHitInfoIteratorExpirationFilterTest() @@ -542,7 +670,9 @@ class DocHitInfoIteratorExpirationFilterTest : public ::testing::Test { ICING_ASSERT_OK_AND_ASSIGN( schema_store_, SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_)); - ICING_ASSERT_OK(schema_store_->SetSchema(schema)); + ICING_ASSERT_OK(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, @@ -742,7 +872,9 @@ class DocHitInfoIteratorFilterTest : public ::testing::Test { ICING_ASSERT_OK_AND_ASSIGN( schema_store_, SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_)); - ICING_ASSERT_OK(schema_store_->SetSchema(schema)); + ICING_ASSERT_OK(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, diff --git a/icing/index/iterator/doc-hit-info-iterator-property-in-schema.cc b/icing/index/iterator/doc-hit-info-iterator-property-in-schema.cc new file mode 100644 index 0000000..5f260a8 --- /dev/null +++ b/icing/index/iterator/doc-hit-info-iterator-property-in-schema.cc @@ -0,0 +1,114 @@ +// Copyright (C) 2019 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "icing/index/iterator/doc-hit-info-iterator-property-in-schema.h" + +#include <cstdint> +#include <memory> +#include <string> +#include <string_view> +#include <utility> + +#include "icing/text_classifier/lib3/utils/base/status.h" +#include "icing/text_classifier/lib3/utils/base/statusor.h" +#include "icing/absl_ports/canonical_errors.h" +#include "icing/absl_ports/str_cat.h" +#include "icing/index/hit/doc-hit-info.h" +#include "icing/index/iterator/doc-hit-info-iterator.h" +#include "icing/schema/schema-store.h" +#include "icing/store/document-id.h" +#include "icing/store/document-store.h" + +namespace icing { +namespace lib { + +DocHitInfoIteratorPropertyInSchema::DocHitInfoIteratorPropertyInSchema( + std::unique_ptr<DocHitInfoIterator> delegate, + const DocumentStore* document_store, const SchemaStore* schema_store, + std::set<std::string> target_sections) + : delegate_(std::move(delegate)), + document_store_(*document_store), + schema_store_(*schema_store), + target_properties_(std::move(target_sections)) {} + +libtextclassifier3::Status DocHitInfoIteratorPropertyInSchema::Advance() { + doc_hit_info_ = DocHitInfo(kInvalidDocumentId); + hit_intersect_section_ids_mask_ = kSectionIdMaskNone; + + // Maps from SchemaTypeId to a bool indicating whether or not the type has + // the requested property. + std::unordered_map<SchemaTypeId, bool> property_defined_types; + while (delegate_->Advance().ok()) { + DocumentId document_id = delegate_->doc_hit_info().document_id(); + auto data_optional = + document_store_.GetAliveDocumentFilterData(document_id); + if (!data_optional) { + // Ran into some error retrieving information on this hit, skip + continue; + } + + // Guaranteed that the DocumentFilterData exists at this point + SchemaTypeId schema_type_id = data_optional.value().schema_type_id(); + bool valid_match = false; + auto itr = property_defined_types.find(schema_type_id); + if (itr != property_defined_types.end()) { + valid_match = itr->second; + } else { + for (const auto& property : target_properties_) { + if (schema_store_.IsPropertyDefinedInSchema(schema_type_id, property)) { + valid_match = true; + break; + } + } + property_defined_types[schema_type_id] = valid_match; + } + + if (valid_match) { + doc_hit_info_ = delegate_->doc_hit_info(); + hit_intersect_section_ids_mask_ = + delegate_->hit_intersect_section_ids_mask(); + doc_hit_info_.set_hit_section_ids_mask(hit_intersect_section_ids_mask_); + return libtextclassifier3::Status::OK; + } + + // The document's schema does not define any properties listed in + // target_properties_. Continue. + } + + // Didn't find anything on the delegate iterator. + return absl_ports::ResourceExhaustedError("No more DocHitInfos in iterator"); +} + +libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode> +DocHitInfoIteratorPropertyInSchema::TrimRightMostNode() && { + // Don't generate suggestion if the last operator is this custom function. + return absl_ports::InvalidArgumentError( + "Cannot generate suggestion if the last term is hasPropertyDefined()."); +} + +int32_t DocHitInfoIteratorPropertyInSchema::GetNumBlocksInspected() const { + return delegate_->GetNumBlocksInspected(); +} + +int32_t DocHitInfoIteratorPropertyInSchema::GetNumLeafAdvanceCalls() const { + return delegate_->GetNumLeafAdvanceCalls(); +} + +std::string DocHitInfoIteratorPropertyInSchema::ToString() const { + return absl_ports::StrCat("(", absl_ports::StrJoin(target_properties_, ","), + "): ", delegate_->ToString()); +} + +} // namespace lib +} // namespace icing diff --git a/icing/index/iterator/doc-hit-info-iterator-property-in-schema.h b/icing/index/iterator/doc-hit-info-iterator-property-in-schema.h new file mode 100644 index 0000000..35b87e1 --- /dev/null +++ b/icing/index/iterator/doc-hit-info-iterator-property-in-schema.h @@ -0,0 +1,76 @@ +// Copyright (C) 2019 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_PROPERTY_IN_SCHEMA_H_ +#define ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_PROPERTY_IN_SCHEMA_H_ + +#include <cstdint> +#include <memory> +#include <string> +#include <string_view> + +#include "icing/text_classifier/lib3/utils/base/status.h" +#include "icing/index/iterator/doc-hit-info-iterator.h" +#include "icing/schema/schema-store.h" +#include "icing/store/document-store.h" + +namespace icing { +namespace lib { + +// An iterator that helps filter for DocHitInfos whose schemas define the +// properties named in target_properties_. +class DocHitInfoIteratorPropertyInSchema : public DocHitInfoIterator { + public: + // Does not take any ownership, and all pointers must refer to valid objects + // that outlive the one constructed. The delegate should be at minimum be + // a DocHitInfoIteratorAllDocumentId, but other optimizations are possible, + // cf. go/icing-property-in-schema-existence. + explicit DocHitInfoIteratorPropertyInSchema( + std::unique_ptr<DocHitInfoIterator> delegate, + const DocumentStore* document_store, const SchemaStore* schema_store, + std::set<std::string> target_sections); + + libtextclassifier3::Status Advance() override; + + libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override; + + int32_t GetNumBlocksInspected() const override; + + int32_t GetNumLeafAdvanceCalls() const override; + + std::string ToString() const override; + + void PopulateMatchedTermsStats( + std::vector<TermMatchInfo>* matched_terms_stats, + SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override { + if (doc_hit_info_.document_id() == kInvalidDocumentId) { + // Current hit isn't valid, return. + return; + } + delegate_->PopulateMatchedTermsStats(matched_terms_stats, + filtering_section_mask); + } + + private: + std::unique_ptr<DocHitInfoIterator> delegate_; + const DocumentStore& document_store_; + const SchemaStore& schema_store_; + + std::set<std::string> target_properties_; +}; + +} // namespace lib +} // namespace icing + +#endif // ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_PROPERTY_IN_SCHEMA_H_ diff --git a/icing/index/iterator/doc-hit-info-iterator-property-in-schema_test.cc b/icing/index/iterator/doc-hit-info-iterator-property-in-schema_test.cc new file mode 100644 index 0000000..9bffeeb --- /dev/null +++ b/icing/index/iterator/doc-hit-info-iterator-property-in-schema_test.cc @@ -0,0 +1,263 @@ +// Copyright (C) 2019 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "icing/index/iterator/doc-hit-info-iterator-property-in-schema.h" + +#include <memory> +#include <string> +#include <utility> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "icing/document-builder.h" +#include "icing/file/filesystem.h" +#include "icing/index/hit/doc-hit-info.h" +#include "icing/index/iterator/doc-hit-info-iterator-all-document-id.h" +#include "icing/index/iterator/doc-hit-info-iterator-test-util.h" +#include "icing/index/iterator/doc-hit-info-iterator.h" +#include "icing/proto/document.pb.h" +#include "icing/proto/schema.pb.h" +#include "icing/schema-builder.h" +#include "icing/schema/schema-store.h" +#include "icing/schema/section.h" +#include "icing/store/document-id.h" +#include "icing/store/document-store.h" +#include "icing/testing/common-matchers.h" +#include "icing/testing/fake-clock.h" +#include "icing/testing/tmp-directory.h" + +namespace icing { +namespace lib { + +namespace { + +using ::testing::ElementsAre; +using ::testing::Eq; +using ::testing::IsEmpty; + +class DocHitInfoIteratorPropertyInSchemaTest : public ::testing::Test { + protected: + DocHitInfoIteratorPropertyInSchemaTest() + : test_dir_(GetTestTempDir() + "/icing") {} + + void SetUp() override { + filesystem_.CreateDirectoryRecursively(test_dir_.c_str()); + document1_ = DocumentBuilder() + .SetKey("namespace", "uri1") + .SetSchema("email") + .Build(); + document2_ = + DocumentBuilder().SetKey("namespace", "uri2").SetSchema("note").Build(); + + indexed_section_0 = "indexedSection0"; + unindexed_section_1 = "unindexedSection1"; + not_defined_section_2 = "notDefinedSection2"; + + schema_ = + SchemaBuilder() + .AddType( + SchemaTypeConfigBuilder() + .SetType("email") + // Add an indexed property so we generate section + // metadata on it + .AddProperty(PropertyConfigBuilder() + .SetName(indexed_section_0) + .SetDataTypeString(TERM_MATCH_EXACT, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName(unindexed_section_1) + .SetDataType(TYPE_STRING) + .SetCardinality(CARDINALITY_OPTIONAL))) + .AddType(SchemaTypeConfigBuilder().SetType("note").AddProperty( + PropertyConfigBuilder() + .SetName(unindexed_section_1) + .SetDataType(TYPE_STRING) + .SetCardinality(CARDINALITY_OPTIONAL))) + .Build(); + + ICING_ASSERT_OK_AND_ASSIGN( + schema_store_, + SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_)); + ICING_ASSERT_OK(schema_store_->SetSchema( + schema_, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); + + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::CreateResult create_result, + DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, + schema_store_.get(), + /*force_recovery_and_revalidate_documents=*/false, + /*namespace_id_fingerprint=*/false, + PortableFileBackedProtoLog< + DocumentWrapper>::kDeflateCompressionLevel, + /*initialize_stats=*/nullptr)); + document_store_ = std::move(create_result.document_store); + } + + void TearDown() override { + document_store_.reset(); + schema_store_.reset(); + filesystem_.DeleteDirectoryRecursively(test_dir_.c_str()); + } + + std::unique_ptr<SchemaStore> schema_store_; + std::unique_ptr<DocumentStore> document_store_; + const Filesystem filesystem_; + const std::string test_dir_; + std::string indexed_section_0; + std::string unindexed_section_1; + std::string not_defined_section_2; + SchemaProto schema_; + DocumentProto document1_; + DocumentProto document2_; + FakeClock fake_clock_; +}; + +TEST_F(DocHitInfoIteratorPropertyInSchemaTest, + AdvanceToDocumentWithIndexedProperty) { + // Populate the DocumentStore's FilterCache with this document's data + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id, + document_store_->Put(document1_)); + + auto original_iterator = std::make_unique<DocHitInfoIteratorAllDocumentId>( + document_store_->num_documents()); + + DocHitInfoIteratorPropertyInSchema property_defined_iterator( + std::move(original_iterator), document_store_.get(), schema_store_.get(), + /*target_target_sections=*/{indexed_section_0}); + + EXPECT_THAT(GetDocumentIds(&property_defined_iterator), + ElementsAre(document_id)); + + EXPECT_FALSE(property_defined_iterator.Advance().ok()); +} + +TEST_F(DocHitInfoIteratorPropertyInSchemaTest, + AdvanceToDocumentWithUnindexedProperty) { + // Populate the DocumentStore's FilterCache with this document's data + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id, + document_store_->Put(document1_)); + + auto original_iterator = std::make_unique<DocHitInfoIteratorAllDocumentId>( + document_store_->num_documents()); + + DocHitInfoIteratorPropertyInSchema property_defined_iterator( + std::move(original_iterator), document_store_.get(), schema_store_.get(), + /*target_target_sections=*/{unindexed_section_1}); + + EXPECT_THAT(GetDocumentIds(&property_defined_iterator), + ElementsAre(document_id)); + + EXPECT_FALSE(property_defined_iterator.Advance().ok()); +} + +TEST_F(DocHitInfoIteratorPropertyInSchemaTest, NoMatchWithUndefinedProperty) { + ICING_EXPECT_OK(document_store_->Put(document1_)); + + auto original_iterator = std::make_unique<DocHitInfoIteratorAllDocumentId>( + document_store_->num_documents()); + + DocHitInfoIteratorPropertyInSchema property_defined_iterator( + std::move(original_iterator), document_store_.get(), schema_store_.get(), + /*target_target_sections=*/{not_defined_section_2}); + EXPECT_FALSE(property_defined_iterator.Advance().ok()); +} + +TEST_F(DocHitInfoIteratorPropertyInSchemaTest, + CorrectlySetsSectionIdMasksAndPopulatesTermMatchInfo) { + // Populate the DocumentStore's FilterCache with this document's data + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id, + document_store_->Put(document1_)); + + // Arbitrary section ids for the documents in the DocHitInfoIterators. + // Created to test correct section_id_mask behavior. + SectionIdMask original_section_id_mask = 0b00000101; // hits in sections 0, 2 + + DocHitInfoTermFrequencyPair doc_hit_info1 = DocHitInfo(document_id); + doc_hit_info1.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1); + doc_hit_info1.UpdateSection(/*section_id=*/2, /*hit_term_frequency=*/2); + + // Create a hit that was found in the indexed section + std::vector<DocHitInfoTermFrequencyPair> doc_hit_infos = {doc_hit_info1}; + + auto original_iterator = + std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "hi"); + original_iterator->set_hit_intersect_section_ids_mask( + original_section_id_mask); + + DocHitInfoIteratorPropertyInSchema property_defined_iterator( + std::move(original_iterator), document_store_.get(), schema_store_.get(), + /*target_target_sections=*/{indexed_section_0}); + + std::vector<TermMatchInfo> matched_terms_stats; + property_defined_iterator.PopulateMatchedTermsStats(&matched_terms_stats); + EXPECT_THAT(matched_terms_stats, IsEmpty()); + + ICING_EXPECT_OK(property_defined_iterator.Advance()); + EXPECT_THAT(property_defined_iterator.doc_hit_info().document_id(), + Eq(document_id)); + + // The expected mask is the same as the original mask, since the iterator + // should treat it as a pass-through. + SectionIdMask expected_section_id_mask = original_section_id_mask; + EXPECT_EQ(property_defined_iterator.hit_intersect_section_ids_mask(), + expected_section_id_mask); + + property_defined_iterator.PopulateMatchedTermsStats(&matched_terms_stats); + std::unordered_map<SectionId, Hit::TermFrequency> + expected_section_ids_tf_map = {{0, 1}, {2, 2}}; + EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo( + "hi", expected_section_ids_tf_map))); + + EXPECT_FALSE(property_defined_iterator.Advance().ok()); +} + +TEST_F(DocHitInfoIteratorPropertyInSchemaTest, + TrimRightMostNodeResultsInError) { + auto original_iterator = std::make_unique<DocHitInfoIteratorAllDocumentId>( + document_store_->num_documents()); + + DocHitInfoIteratorPropertyInSchema property_defined_iterator( + std::move(original_iterator), document_store_.get(), schema_store_.get(), + /*target_target_sections=*/{indexed_section_0}); + + EXPECT_THAT(std::move(property_defined_iterator).TrimRightMostNode(), + StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); +} + +TEST_F(DocHitInfoIteratorPropertyInSchemaTest, + FindPropertyDefinedByMultipleTypes) { + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, + document_store_->Put(document1_)); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, + document_store_->Put(document2_)); + auto original_iterator = std::make_unique<DocHitInfoIteratorAllDocumentId>( + document_store_->num_documents()); + + DocHitInfoIteratorPropertyInSchema property_defined_iterator( + std::move(original_iterator), document_store_.get(), schema_store_.get(), + /*target_target_sections=*/{unindexed_section_1}); + + EXPECT_THAT(GetDocumentIds(&property_defined_iterator), + ElementsAre(document_id2, document_id1)); + + EXPECT_FALSE(property_defined_iterator.Advance().ok()); +} + +} // namespace + +} // namespace lib +} // namespace icing diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc b/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc index 60b9a12..78f4d34 100644 --- a/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc +++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc @@ -95,7 +95,9 @@ class DocHitInfoIteratorSectionRestrictTest : public ::testing::Test { ICING_ASSERT_OK_AND_ASSIGN( schema_store_, SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_)); - ICING_ASSERT_OK(schema_store_->SetSchema(schema_)); + ICING_ASSERT_OK(schema_store_->SetSchema( + schema_, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, diff --git a/icing/index/iterator/doc-hit-info-iterator.h b/icing/index/iterator/doc-hit-info-iterator.h index e1f06d0..d8cd3ad 100644 --- a/icing/index/iterator/doc-hit-info-iterator.h +++ b/icing/index/iterator/doc-hit-info-iterator.h @@ -85,10 +85,11 @@ class DocHitInfoIterator { unnormalized_term_length_(unnormalized_term_length) {} }; - // Trim the right-most itertor of the itertor tree. - // This is to support search suggestion for the last terms which is the + // Trim the rightmost iterator of the iterator tree. + // This is to support search suggestions for the last term which is the // right-most node of the root iterator tree. Only support trim the right-most - // node on the AND, AND_NARY, OR, OR_NARY, OR_LEAF and Filter itertor. + // node on the AND, AND_NARY, OR, OR_NARY, OR_LEAF, Filter, and the + // property-in-schema-check iterator. // // After calling this method, this iterator is no longer usable. Please use // the returned iterator. diff --git a/icing/index/main/main-index.cc b/icing/index/main/main-index.cc index 7df137c..d5e9d57 100644 --- a/icing/index/main/main-index.cc +++ b/icing/index/main/main-index.cc @@ -22,6 +22,7 @@ #include "icing/absl_ports/canonical_errors.h" #include "icing/absl_ports/str_cat.h" #include "icing/file/destructible-directory.h" +#include "icing/file/posting_list/flash-index-storage.h" #include "icing/file/posting_list/posting-list-common.h" #include "icing/index/main/posting-list-hit-serializer.h" #include "icing/index/term-id-codec.h" @@ -90,6 +91,10 @@ FindTermResult FindShortestValidTermWithPrefixHits( return result; } +std::string MakeFlashIndexFilename(const std::string& base_dir) { + return base_dir + "/main_index"; +} + } // namespace MainIndex::MainIndex(const std::string& index_directory, @@ -112,12 +117,18 @@ libtextclassifier3::StatusOr<std::unique_ptr<MainIndex>> MainIndex::Create( return main_index; } +/* static */ libtextclassifier3::StatusOr<int> MainIndex::ReadFlashIndexMagic( + const Filesystem* filesystem, const std::string& index_directory) { + return FlashIndexStorage::ReadHeaderMagic( + filesystem, MakeFlashIndexFilename(index_directory)); +} + // TODO(b/139087650) : Migrate off of IcingFilesystem. libtextclassifier3::Status MainIndex::Init() { if (!filesystem_->CreateDirectoryRecursively(base_dir_.c_str())) { return absl_ports::InternalError("Unable to create main index directory."); } - std::string flash_index_file = base_dir_ + "/main_index"; + std::string flash_index_file = MakeFlashIndexFilename(base_dir_); ICING_ASSIGN_OR_RETURN( FlashIndexStorage flash_index, FlashIndexStorage::Create(flash_index_file, filesystem_, diff --git a/icing/index/main/main-index.h b/icing/index/main/main-index.h index e181330..9e570d5 100644 --- a/icing/index/main/main-index.h +++ b/icing/index/main/main-index.h @@ -48,6 +48,16 @@ class MainIndex { const std::string& index_directory, const Filesystem* filesystem, const IcingFilesystem* icing_filesystem); + // Reads magic from existing flash index storage file header. We need this + // during Icing initialization phase to determine the version. + // + // RETURNS: + // - On success, a valid magic. + // - NOT_FOUND if the flash index doesn't exist. + // - INTERNAL on I/O error. + static libtextclassifier3::StatusOr<int> ReadFlashIndexMagic( + const Filesystem* filesystem, const std::string& index_directory); + // Get a PostingListHitAccessor that holds the posting list chain for 'term'. // // RETURNS: diff --git a/icing/index/numeric/integer-index-storage.cc b/icing/index/numeric/integer-index-storage.cc index f3901e1..5165040 100644 --- a/icing/index/numeric/integer-index-storage.cc +++ b/icing/index/numeric/integer-index-storage.cc @@ -292,12 +292,17 @@ libtextclassifier3::Status IntegerIndexStorageIterator::Advance() { // Merge sections with same document_id into a single DocHitInfo while (!pq_.empty() && pq_.top()->GetCurrentBasicHit().document_id() == document_id) { - doc_hit_info_.UpdateSection(pq_.top()->GetCurrentBasicHit().section_id()); - BucketPostingListIterator* bucket_itr = pq_.top(); pq_.pop(); - if (bucket_itr->AdvanceAndFilter(key_lower_, key_upper_).ok()) { + libtextclassifier3::Status advance_status; + do { + doc_hit_info_.UpdateSection( + bucket_itr->GetCurrentBasicHit().section_id()); + advance_status = bucket_itr->AdvanceAndFilter(key_lower_, key_upper_); + } while (advance_status.ok() && + bucket_itr->GetCurrentBasicHit().document_id() == document_id); + if (advance_status.ok()) { pq_.push(bucket_itr); } } diff --git a/icing/index/numeric/integer-index-storage_benchmark.cc b/icing/index/numeric/integer-index-storage_benchmark.cc index 54b19c3..27f35d9 100644 --- a/icing/index/numeric/integer-index-storage_benchmark.cc +++ b/icing/index/numeric/integer-index-storage_benchmark.cc @@ -12,22 +12,30 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include <algorithm> #include <cstdint> +#include <limits> #include <memory> #include <string> #include <unordered_map> +#include <utility> #include <vector> +#include "icing/text_classifier/lib3/utils/base/statusor.h" #include "testing/base/public/benchmark.h" #include "gmock/gmock.h" #include "gtest/gtest.h" #include "icing/absl_ports/canonical_errors.h" #include "icing/file/destructible-directory.h" #include "icing/file/filesystem.h" +#include "icing/index/hit/doc-hit-info.h" +#include "icing/index/iterator/doc-hit-info-iterator.h" #include "icing/index/numeric/integer-index-storage.h" #include "icing/index/numeric/posting-list-integer-index-serializer.h" +#include "icing/schema/section.h" #include "icing/store/document-id.h" #include "icing/testing/common-matchers.h" +#include "icing/testing/numeric/normal-distribution-number-generator.h" #include "icing/testing/numeric/number-generator.h" #include "icing/testing/numeric/uniform-distribution-integer-generator.h" #include "icing/testing/tmp-directory.h" @@ -65,6 +73,7 @@ static constexpr int kDefaultSeed = 12345; enum DistributionTypeEnum { kUniformDistribution, + kNormalDistribution, }; class IntegerIndexStorageBenchmark { @@ -103,6 +112,19 @@ CreateIntegerGenerator(DistributionTypeEnum distribution_type, int seed, return std::make_unique<UniformDistributionIntegerGenerator<int64_t>>( seed, /*range_lower=*/0, /*range_upper=*/static_cast<int64_t>(num_keys) * 10 - 1); + case DistributionTypeEnum::kNormalDistribution: + // Normal distribution with mean = 0 and stddev = num_keys / 1024. + // - keys in range [-1 * stddev, 1 * stddev]: 68.2% + // - keys in range [-2 * stddev, 2 * stddev]: 95.4% + // - keys in range [-3 * stddev, 3 * stddev]: 99.7% + // + // - When generating num_keys integers, 68.2% of them will be in range + // [-num_keys / 1024, num_keys / 1024] + // - Each number in this range will be sampled (num_keys * 0.682) / + // ((num_keys / 1024) * 2) = 349 times on average and become + // "single-range bucket". + return std::make_unique<NormalDistributionNumberGenerator<int64_t>>( + seed, /*mean=*/0.0, /*stddev=*/num_keys / 1024.0); default: return absl_ports::InvalidArgumentError("Unknown type"); } @@ -155,7 +177,18 @@ BENCHMARK(BM_Index) ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 17) ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 18) ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 19) - ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 20); + ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 20) + ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 10) + ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 11) + ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 12) + ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 13) + ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 14) + ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 15) + ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 16) + ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 17) + ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 18) + ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 19) + ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 20); void BM_BatchIndex(benchmark::State& state) { DistributionTypeEnum distribution_type = @@ -203,7 +236,18 @@ BENCHMARK(BM_BatchIndex) ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 17) ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 18) ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 19) - ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 20); + ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 20) + ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 10) + ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 11) + ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 12) + ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 13) + ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 14) + ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 15) + ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 16) + ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 17) + ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 18) + ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 19) + ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 20); void BM_ExactQuery(benchmark::State& state) { DistributionTypeEnum distribution_type = @@ -269,7 +313,81 @@ BENCHMARK(BM_ExactQuery) ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 17) ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 18) ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 19) - ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 20); + ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 20) + ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 10) + ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 11) + ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 12) + ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 13) + ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 14) + ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 15) + ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 16) + ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 17) + ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 18) + ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 19) + ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 20); + +void BM_RangeQueryAll(benchmark::State& state) { + DistributionTypeEnum distribution_type = + static_cast<DistributionTypeEnum>(state.range(0)); + int num_keys = state.range(1); + + IntegerIndexStorageBenchmark benchmark; + benchmark.filesystem.DeleteDirectoryRecursively( + benchmark.working_path.c_str()); + DestructibleDirectory ddir(&benchmark.filesystem, benchmark.working_path); + + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<IntegerIndexStorage> storage, + IntegerIndexStorage::Create(benchmark.filesystem, benchmark.working_path, + IntegerIndexStorage::Options(), + &benchmark.posting_list_serializer)); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<NumberGenerator<int64_t>> generator, + CreateIntegerGenerator(distribution_type, kDefaultSeed, num_keys)); + for (int i = 0; i < num_keys; ++i) { + ICING_ASSERT_OK(storage->AddKeys(static_cast<DocumentId>(i), + kDefaultSectionId, + {generator->Generate()})); + } + ICING_ASSERT_OK(storage->PersistToDisk()); + + for (auto _ : state) { + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<DocHitInfoIterator> iterator, + storage->GetIterator( + /*query_key_lower=*/std::numeric_limits<int64_t>::min(), + /*query_key_upper=*/std::numeric_limits<int64_t>::max())); + std::vector<DocHitInfo> data; + while (iterator->Advance().ok()) { + data.push_back(iterator->doc_hit_info()); + } + + ASSERT_THAT(data, SizeIs(num_keys)); + } +} +BENCHMARK(BM_RangeQueryAll) + ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 10) + ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 11) + ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 12) + ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 13) + ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 14) + ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 15) + ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 16) + ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 17) + ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 18) + ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 19) + ->ArgPair(DistributionTypeEnum::kUniformDistribution, 1 << 20) + ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 10) + ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 11) + ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 12) + ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 13) + ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 14) + ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 15) + ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 16) + ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 17) + ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 18) + ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 19) + ->ArgPair(DistributionTypeEnum::kNormalDistribution, 1 << 20); } // namespace diff --git a/icing/index/numeric/integer-index_test.cc b/icing/index/numeric/integer-index_test.cc index ec7f55b..92433e1 100644 --- a/icing/index/numeric/integer-index_test.cc +++ b/icing/index/numeric/integer-index_test.cc @@ -389,7 +389,10 @@ TYPED_TEST(NumericIndexIntegerTest, WildcardStorageQuery) { .AddProperty(PropertyConfigBuilder(int_property_config) .SetName("desiredProperty"))) .Build(); - ICING_ASSERT_OK(this->schema_store_->SetSchema(schema)); + ICING_ASSERT_OK(this->schema_store_->SetSchema( + schema, + /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); // Put 11 docs of "TypeA" into the document store. DocumentProto doc = @@ -1492,7 +1495,10 @@ TEST_F(IntegerIndexTest, WildcardStoragePersistenceQuery) { .AddProperty(PropertyConfigBuilder(int_property_config) .SetName("desiredProperty"))) .Build(); - ICING_ASSERT_OK(this->schema_store_->SetSchema(schema)); + ICING_ASSERT_OK(this->schema_store_->SetSchema( + schema, + /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); // Ids are assigned alphabetically, so the property ids are: // TypeA.desiredProperty = 0 @@ -1862,7 +1868,10 @@ TEST_F(IntegerIndexTest, WildcardStorageWorksAfterOptimize) { .AddProperty(PropertyConfigBuilder(int_property_config) .SetName("desiredProperty"))) .Build(); - ICING_ASSERT_OK(this->schema_store_->SetSchema(schema)); + ICING_ASSERT_OK(this->schema_store_->SetSchema( + schema, + /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); // Ids are assigned alphabetically, so the property ids are: // TypeA.desiredProperty = 0 @@ -2145,7 +2154,10 @@ TEST_F(IntegerIndexTest, WildcardStorageAvailableIndicesAfterOptimize) { .AddProperty(PropertyConfigBuilder(int_property_config) .SetName("undesiredProperty"))) .Build(); - ICING_ASSERT_OK(this->schema_store_->SetSchema(schema)); + ICING_ASSERT_OK(this->schema_store_->SetSchema( + schema, + /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); // Ids are assigned alphabetically, so the property ids are: // TypeA.desiredProperty = 0 diff --git a/icing/index/string-section-indexing-handler.cc b/icing/index/string-section-indexing-handler.cc index a992568..69b8889 100644 --- a/icing/index/string-section-indexing-handler.cc +++ b/icing/index/string-section-indexing-handler.cc @@ -30,6 +30,8 @@ #include "icing/store/document-id.h" #include "icing/transform/normalizer.h" #include "icing/util/clock.h" +#include "icing/util/logging.h" +#include "icing/util/status-macros.h" #include "icing/util/tokenized-document.h" namespace icing { @@ -121,7 +123,8 @@ libtextclassifier3::Status StringSectionIndexingHandler::Handle( } if (put_document_stats != nullptr) { - // TODO(b/259744228): set term index latency. + put_document_stats->set_term_index_latency_ms( + index_timer->GetElapsedMilliseconds()); put_document_stats->mutable_tokenization_stats()->set_num_tokens_indexed( num_tokens); } diff --git a/icing/join/join-processor.cc b/icing/join/join-processor.cc index da0e5d2..d68ec98 100644 --- a/icing/join/join-processor.cc +++ b/icing/join/join-processor.cc @@ -15,20 +15,27 @@ #include "icing/join/join-processor.h" #include <algorithm> -#include <functional> +#include <memory> +#include <optional> #include <string> #include <string_view> +#include <unordered_map> +#include <utility> #include <vector> #include "icing/text_classifier/lib3/utils/base/statusor.h" #include "icing/absl_ports/canonical_errors.h" #include "icing/absl_ports/str_cat.h" #include "icing/join/aggregation-scorer.h" +#include "icing/join/doc-join-info.h" +#include "icing/join/join-children-fetcher.h" #include "icing/join/qualified-id.h" +#include "icing/proto/schema.pb.h" #include "icing/proto/scoring.pb.h" #include "icing/proto/search.pb.h" #include "icing/schema/joinable-property.h" #include "icing/scoring/scored-document-hit.h" +#include "icing/store/document-filter-data.h" #include "icing/store/document-id.h" #include "icing/util/status-macros.h" @@ -76,13 +83,7 @@ JoinProcessor::GetChildrenFetcher( continue; } - // Since we've already sorted child_scored_document_hits, just simply omit - // if the map_joinable_qualified_id[parent_doc_id].size() has reached max - // joined child count. - if (map_joinable_qualified_id[ref_doc_id].size() < - join_spec.max_joined_child_count()) { - map_joinable_qualified_id[ref_doc_id].push_back(child); - } + map_joinable_qualified_id[ref_doc_id].push_back(child); } return JoinChildrenFetcher(join_spec, std::move(map_joinable_qualified_id)); } diff --git a/icing/join/join-processor_test.cc b/icing/join/join-processor_test.cc index 67b6201..ec92349 100644 --- a/icing/join/join-processor_test.cc +++ b/icing/join/join-processor_test.cc @@ -16,15 +16,20 @@ #include <memory> #include <string> +#include <utility> #include <vector> +#include "icing/text_classifier/lib3/utils/base/statusor.h" #include "gmock/gmock.h" #include "gtest/gtest.h" #include "icing/document-builder.h" #include "icing/file/filesystem.h" -#include "icing/join/qualified-id-joinable-property-indexing-handler.h" +#include "icing/file/portable-file-backed-proto-log.h" +#include "icing/join/qualified-id-join-indexing-handler.h" #include "icing/join/qualified-id-type-joinable-index.h" +#include "icing/portable/platform.h" #include "icing/proto/document.pb.h" +#include "icing/proto/document_wrapper.pb.h" #include "icing/proto/schema.pb.h" #include "icing/proto/scoring.pb.h" #include "icing/proto/search.pb.h" @@ -33,6 +38,7 @@ #include "icing/schema/section.h" #include "icing/scoring/scored-document-hit.h" #include "icing/store/document-id.h" +#include "icing/store/document-store.h" #include "icing/testing/common-matchers.h" #include "icing/testing/fake-clock.h" #include "icing/testing/icu-data-file-helper.h" @@ -103,7 +109,10 @@ class JoinProcessorTest : public ::testing::Test { JOINABLE_VALUE_TYPE_QUALIFIED_ID) .SetCardinality(CARDINALITY_OPTIONAL))) .Build(); - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); ASSERT_THAT(filesystem_.CreateDirectoryRecursively(doc_store_dir_.c_str()), IsTrue()); @@ -140,9 +149,9 @@ class JoinProcessorTest : public ::testing::Test { document)); ICING_ASSIGN_OR_RETURN( - std::unique_ptr<QualifiedIdJoinablePropertyIndexingHandler> handler, - QualifiedIdJoinablePropertyIndexingHandler::Create( - &fake_clock_, qualified_id_join_index_.get())); + std::unique_ptr<QualifiedIdJoinIndexingHandler> handler, + QualifiedIdJoinIndexingHandler::Create(&fake_clock_, + qualified_id_join_index_.get())); ICING_RETURN_IF_ERROR(handler->Handle(tokenized_document, document_id, /*recovery_mode=*/false, /*put_document_stats=*/nullptr)); @@ -244,7 +253,6 @@ TEST_F(JoinProcessorTest, JoinByQualifiedId) { scored_doc_hit5, scored_doc_hit4, scored_doc_hit3}; JoinSpecProto join_spec; - join_spec.set_max_joined_child_count(100); join_spec.set_parent_property_expression( std::string(JoinProcessor::kQualifiedIdExpr)); join_spec.set_child_property_expression("sender"); @@ -313,7 +321,6 @@ TEST_F(JoinProcessorTest, ShouldIgnoreChildDocumentsWithoutJoiningProperty) { scored_doc_hit3}; JoinSpecProto join_spec; - join_spec.set_max_joined_child_count(100); join_spec.set_parent_property_expression( std::string(JoinProcessor::kQualifiedIdExpr)); join_spec.set_child_property_expression("sender"); @@ -394,7 +401,6 @@ TEST_F(JoinProcessorTest, ShouldIgnoreChildDocumentsWithInvalidQualifiedId) { scored_doc_hit2, scored_doc_hit3, scored_doc_hit4}; JoinSpecProto join_spec; - join_spec.set_max_joined_child_count(100); join_spec.set_parent_property_expression( std::string(JoinProcessor::kQualifiedIdExpr)); join_spec.set_child_property_expression("sender"); @@ -459,7 +465,6 @@ TEST_F(JoinProcessorTest, LeftJoinShouldReturnParentWithoutChildren) { std::vector<ScoredDocumentHit> child_scored_document_hits = {scored_doc_hit3}; JoinSpecProto join_spec; - join_spec.set_max_joined_child_count(100); join_spec.set_parent_property_expression( std::string(JoinProcessor::kQualifiedIdExpr)); join_spec.set_child_property_expression("sender"); @@ -541,7 +546,6 @@ TEST_F(JoinProcessorTest, ShouldSortChildDocumentsByRankingStrategy) { scored_doc_hit2, scored_doc_hit3, scored_doc_hit4}; JoinSpecProto join_spec; - join_spec.set_max_joined_child_count(100); join_spec.set_parent_property_expression( std::string(JoinProcessor::kQualifiedIdExpr)); join_spec.set_child_property_expression("sender"); @@ -564,114 +568,6 @@ TEST_F(JoinProcessorTest, ShouldSortChildDocumentsByRankingStrategy) { {scored_doc_hit3, scored_doc_hit4, scored_doc_hit2})))); } -TEST_F(JoinProcessorTest, - ShouldTruncateByRankingStrategyIfExceedingMaxJoinedChildCount) { - DocumentProto person1 = DocumentBuilder() - .SetKey("pkg$db/namespace", "person1") - .SetSchema("Person") - .AddStringProperty("Name", "Alice") - .Build(); - DocumentProto person2 = DocumentBuilder() - .SetKey(R"(pkg$db/name#space\\)", "person2") - .SetSchema("Person") - .AddStringProperty("Name", "Bob") - .Build(); - - DocumentProto email1 = - DocumentBuilder() - .SetKey("pkg$db/namespace", "email1") - .SetSchema("Email") - .AddStringProperty("subject", "test subject 1") - .AddStringProperty("sender", "pkg$db/namespace#person1") - .Build(); - DocumentProto email2 = - DocumentBuilder() - .SetKey("pkg$db/namespace", "email2") - .SetSchema("Email") - .AddStringProperty("subject", "test subject 2") - .AddStringProperty("sender", "pkg$db/namespace#person1") - .Build(); - DocumentProto email3 = - DocumentBuilder() - .SetKey("pkg$db/namespace", "email3") - .SetSchema("Email") - .AddStringProperty("subject", "test subject 3") - .AddStringProperty("sender", "pkg$db/namespace#person1") - .Build(); - DocumentProto email4 = - DocumentBuilder() - .SetKey("pkg$db/namespace", "email4") - .SetSchema("Email") - .AddStringProperty("subject", "test subject 4") - .AddStringProperty("sender", - R"(pkg$db/name\#space\\\\#person2)") // escaped - .Build(); - - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, - PutAndIndexDocument(person1)); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, - PutAndIndexDocument(person2)); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3, - PutAndIndexDocument(email1)); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4, - PutAndIndexDocument(email2)); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id5, - PutAndIndexDocument(email3)); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id6, - PutAndIndexDocument(email4)); - - ScoredDocumentHit scored_doc_hit1(document_id1, kSectionIdMaskNone, - /*score=*/0.0); - ScoredDocumentHit scored_doc_hit2(document_id2, kSectionIdMaskNone, - /*score=*/0.0); - ScoredDocumentHit scored_doc_hit3(document_id3, kSectionIdMaskNone, - /*score=*/2.0); - ScoredDocumentHit scored_doc_hit4(document_id4, kSectionIdMaskNone, - /*score=*/5.0); - ScoredDocumentHit scored_doc_hit5(document_id5, kSectionIdMaskNone, - /*score=*/3.0); - ScoredDocumentHit scored_doc_hit6(document_id6, kSectionIdMaskNone, - /*score=*/1.0); - - // Parent ScoredDocumentHits: all Person documents - std::vector<ScoredDocumentHit> parent_scored_document_hits = { - scored_doc_hit1, scored_doc_hit2}; - - // Child ScoredDocumentHits: all Email documents - std::vector<ScoredDocumentHit> child_scored_document_hits = { - scored_doc_hit3, scored_doc_hit4, scored_doc_hit5, scored_doc_hit6}; - - JoinSpecProto join_spec; - join_spec.set_max_joined_child_count(2); - join_spec.set_parent_property_expression( - std::string(JoinProcessor::kQualifiedIdExpr)); - join_spec.set_child_property_expression("sender"); - join_spec.set_aggregation_scoring_strategy( - JoinSpecProto::AggregationScoringStrategy::COUNT); - join_spec.mutable_nested_spec()->mutable_scoring_spec()->set_order_by( - ScoringSpecProto::Order::DESC); - - ICING_ASSERT_OK_AND_ASSIGN( - std::vector<JoinedScoredDocumentHit> joined_result_document_hits, - Join(join_spec, std::move(parent_scored_document_hits), - std::move(child_scored_document_hits))); - // Since we set max_joind_child_count as 2 and use DESC as the (nested) - // ranking strategy, parent document with # of child documents more than 2 - // should only keep 2 child documents with higher scores and the rest should - // be truncated. - EXPECT_THAT( - joined_result_document_hits, - ElementsAre(EqualsJoinedScoredDocumentHit(JoinedScoredDocumentHit( - /*final_score=*/2.0, - /*parent_scored_document_hit=*/scored_doc_hit1, - /*child_scored_document_hits=*/ - {scored_doc_hit4, scored_doc_hit5})), - EqualsJoinedScoredDocumentHit(JoinedScoredDocumentHit( - /*final_score=*/1.0, - /*parent_scored_document_hit=*/scored_doc_hit2, - /*child_scored_document_hits=*/{scored_doc_hit6})))); -} - TEST_F(JoinProcessorTest, ShouldAllowSelfJoining) { DocumentProto email1 = DocumentBuilder() @@ -695,7 +591,6 @@ TEST_F(JoinProcessorTest, ShouldAllowSelfJoining) { std::vector<ScoredDocumentHit> child_scored_document_hits = {scored_doc_hit1}; JoinSpecProto join_spec; - join_spec.set_max_joined_child_count(100); join_spec.set_parent_property_expression( std::string(JoinProcessor::kQualifiedIdExpr)); join_spec.set_child_property_expression("sender"); diff --git a/icing/join/qualified-id-joinable-property-indexing-handler.cc b/icing/join/qualified-id-join-indexing-handler.cc index 150b23b..86af043 100644 --- a/icing/join/qualified-id-joinable-property-indexing-handler.cc +++ b/icing/join/qualified-id-join-indexing-handler.cc @@ -12,42 +12,44 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "icing/join/qualified-id-joinable-property-indexing-handler.h" +#include "icing/join/qualified-id-join-indexing-handler.h" #include <memory> #include <string_view> #include "icing/text_classifier/lib3/utils/base/status.h" +#include "icing/text_classifier/lib3/utils/base/statusor.h" #include "icing/absl_ports/canonical_errors.h" #include "icing/join/doc-join-info.h" #include "icing/join/qualified-id-type-joinable-index.h" #include "icing/join/qualified-id.h" #include "icing/legacy/core/icing-string-util.h" #include "icing/proto/logging.pb.h" +#include "icing/schema/joinable-property.h" #include "icing/store/document-id.h" +#include "icing/util/clock.h" #include "icing/util/logging.h" +#include "icing/util/status-macros.h" #include "icing/util/tokenized-document.h" namespace icing { namespace lib { /* static */ libtextclassifier3::StatusOr< - std::unique_ptr<QualifiedIdJoinablePropertyIndexingHandler>> -QualifiedIdJoinablePropertyIndexingHandler::Create( + std::unique_ptr<QualifiedIdJoinIndexingHandler>> +QualifiedIdJoinIndexingHandler::Create( const Clock* clock, QualifiedIdTypeJoinableIndex* qualified_id_join_index) { ICING_RETURN_ERROR_IF_NULL(clock); ICING_RETURN_ERROR_IF_NULL(qualified_id_join_index); - return std::unique_ptr<QualifiedIdJoinablePropertyIndexingHandler>( - new QualifiedIdJoinablePropertyIndexingHandler(clock, - qualified_id_join_index)); + return std::unique_ptr<QualifiedIdJoinIndexingHandler>( + new QualifiedIdJoinIndexingHandler(clock, qualified_id_join_index)); } -libtextclassifier3::Status QualifiedIdJoinablePropertyIndexingHandler::Handle( +libtextclassifier3::Status QualifiedIdJoinIndexingHandler::Handle( const TokenizedDocument& tokenized_document, DocumentId document_id, bool recovery_mode, PutDocumentStatsProto* put_document_stats) { - // TODO(b/263890397): set qualified id join index processing latency and other - // stats. + std::unique_ptr<Timer> index_timer = clock_.GetNewTimer(); if (!IsDocumentIdValid(document_id)) { return absl_ports::InvalidArgumentError( @@ -94,6 +96,11 @@ libtextclassifier3::Status QualifiedIdJoinablePropertyIndexingHandler::Handle( } } + if (put_document_stats != nullptr) { + put_document_stats->set_qualified_id_join_index_latency_ms( + index_timer->GetElapsedMilliseconds()); + } + return libtextclassifier3::Status::OK; } diff --git a/icing/join/qualified-id-joinable-property-indexing-handler.h b/icing/join/qualified-id-join-indexing-handler.h index 0265874..434403e 100644 --- a/icing/join/qualified-id-joinable-property-indexing-handler.h +++ b/icing/join/qualified-id-join-indexing-handler.h @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef ICING_JOIN_QUALIFIED_ID_JOINABLE_PROPERTY_INDEXING_HANDLER_H_ -#define ICING_JOIN_QUALIFIED_ID_JOINABLE_PROPERTY_INDEXING_HANDLER_H_ +#ifndef ICING_JOIN_QUALIFIED_ID_JOIN_INDEXING_HANDLER_H_ +#define ICING_JOIN_QUALIFIED_ID_JOIN_INDEXING_HANDLER_H_ #include "icing/text_classifier/lib3/utils/base/status.h" #include "icing/index/data-indexing-handler.h" @@ -26,22 +26,21 @@ namespace icing { namespace lib { -class QualifiedIdJoinablePropertyIndexingHandler : public DataIndexingHandler { +class QualifiedIdJoinIndexingHandler : public DataIndexingHandler { public: - // Creates a QualifiedIdJoinablePropertyIndexingHandler instance which does - // not take ownership of any input components. All pointers must refer to - // valid objects that outlive the created - // QualifiedIdJoinablePropertyIndexingHandler instance. + // Creates a QualifiedIdJoinIndexingHandler instance which does not take + // ownership of any input components. All pointers must refer to valid objects + // that outlive the created QualifiedIdJoinIndexingHandler instance. // // Returns: - // - A QualifiedIdJoinablePropertyIndexingHandler instance on success + // - A QualifiedIdJoinIndexingHandler instance on success // - FAILED_PRECONDITION_ERROR if any of the input pointer is null static libtextclassifier3::StatusOr< - std::unique_ptr<QualifiedIdJoinablePropertyIndexingHandler>> + std::unique_ptr<QualifiedIdJoinIndexingHandler>> Create(const Clock* clock, QualifiedIdTypeJoinableIndex* qualified_id_join_index); - ~QualifiedIdJoinablePropertyIndexingHandler() override = default; + ~QualifiedIdJoinIndexingHandler() override = default; // Handles the joinable qualified id data indexing process: add data into the // qualified id type joinable cache. @@ -58,7 +57,7 @@ class QualifiedIdJoinablePropertyIndexingHandler : public DataIndexingHandler { bool recovery_mode, PutDocumentStatsProto* put_document_stats) override; private: - explicit QualifiedIdJoinablePropertyIndexingHandler( + explicit QualifiedIdJoinIndexingHandler( const Clock* clock, QualifiedIdTypeJoinableIndex* qualified_id_join_index) : DataIndexingHandler(clock), qualified_id_join_index_(*qualified_id_join_index) {} @@ -69,4 +68,4 @@ class QualifiedIdJoinablePropertyIndexingHandler : public DataIndexingHandler { } // namespace lib } // namespace icing -#endif // ICING_JOIN_QUALIFIED_ID_JOINABLE_PROPERTY_INDEXING_HANDLER_H_ +#endif // ICING_JOIN_QUALIFIED_ID_JOIN_INDEXING_HANDLER_H_ diff --git a/icing/join/qualified-id-joinable-property-indexing-handler_test.cc b/icing/join/qualified-id-join-indexing-handler_test.cc index 846520e..daddc4c 100644 --- a/icing/join/qualified-id-joinable-property-indexing-handler_test.cc +++ b/icing/join/qualified-id-join-indexing-handler_test.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "icing/join/qualified-id-joinable-property-indexing-handler.h" +#include "icing/join/qualified-id-join-indexing-handler.h" #include <memory> #include <string> @@ -73,7 +73,7 @@ static constexpr JoinablePropertyId kQualifiedId2JoinablePropertyId = 1; static constexpr DocumentId kDefaultDocumentId = 3; -class QualifiedIdJoinablePropertyIndexingHandlerTest : public ::testing::Test { +class QualifiedIdJoinIndexingHandlerTest : public ::testing::Test { protected: void SetUp() override { if (!IsCfStringTokenization() && !IsReverseJniTokenization()) { @@ -135,7 +135,9 @@ class QualifiedIdJoinablePropertyIndexingHandlerTest : public ::testing::Test { JOINABLE_VALUE_TYPE_QUALIFIED_ID) .SetCardinality(CARDINALITY_OPTIONAL))) .Build(); - ICING_ASSERT_OK(schema_store_->SetSchema(schema)); + ICING_ASSERT_OK(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); } void TearDown() override { @@ -157,18 +159,17 @@ class QualifiedIdJoinablePropertyIndexingHandlerTest : public ::testing::Test { std::unique_ptr<SchemaStore> schema_store_; }; -TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest, - CreationWithNullPointerShouldFail) { - EXPECT_THAT(QualifiedIdJoinablePropertyIndexingHandler::Create( +TEST_F(QualifiedIdJoinIndexingHandlerTest, CreationWithNullPointerShouldFail) { + EXPECT_THAT(QualifiedIdJoinIndexingHandler::Create( /*clock=*/nullptr, qualified_id_join_index_.get()), StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); - EXPECT_THAT(QualifiedIdJoinablePropertyIndexingHandler::Create( + EXPECT_THAT(QualifiedIdJoinIndexingHandler::Create( &fake_clock_, /*qualified_id_join_index=*/nullptr), StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); } -TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest, HandleJoinableProperty) { +TEST_F(QualifiedIdJoinIndexingHandlerTest, HandleJoinableProperty) { DocumentProto referenced_document = DocumentBuilder() .SetKey("pkg$db/ns", "ref_type/1") @@ -192,9 +193,9 @@ TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest, HandleJoinableProperty) { Eq(kInvalidDocumentId)); // Handle document. ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<QualifiedIdJoinablePropertyIndexingHandler> handler, - QualifiedIdJoinablePropertyIndexingHandler::Create( - &fake_clock_, qualified_id_join_index_.get())); + std::unique_ptr<QualifiedIdJoinIndexingHandler> handler, + QualifiedIdJoinIndexingHandler::Create(&fake_clock_, + qualified_id_join_index_.get())); EXPECT_THAT( handler->Handle(tokenized_document, kDefaultDocumentId, /*recovery_mode=*/false, /*put_document_stats=*/nullptr), @@ -207,8 +208,7 @@ TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest, HandleJoinableProperty) { IsOkAndHolds("pkg$db/ns#ref_type/1")); } -TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest, - HandleNestedJoinableProperty) { +TEST_F(QualifiedIdJoinIndexingHandlerTest, HandleNestedJoinableProperty) { DocumentProto referenced_document1 = DocumentBuilder() .SetKey("pkg$db/ns", "ref_type/1") @@ -246,9 +246,9 @@ TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest, Eq(kInvalidDocumentId)); // Handle nested_document. ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<QualifiedIdJoinablePropertyIndexingHandler> handler, - QualifiedIdJoinablePropertyIndexingHandler::Create( - &fake_clock_, qualified_id_join_index_.get())); + std::unique_ptr<QualifiedIdJoinIndexingHandler> handler, + QualifiedIdJoinIndexingHandler::Create(&fake_clock_, + qualified_id_join_index_.get())); EXPECT_THAT(handler->Handle(tokenized_document, kDefaultDocumentId, /*recovery_mode=*/false, /*put_document_stats=*/nullptr), @@ -264,7 +264,7 @@ TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest, IsOkAndHolds("pkg$db/ns#ref_type/1")); } -TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest, +TEST_F(QualifiedIdJoinIndexingHandlerTest, HandleShouldSkipInvalidFormatQualifiedId) { static constexpr std::string_view kInvalidFormatQualifiedId = "invalid_format_qualified_id"; @@ -289,9 +289,9 @@ TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest, // Index data should remain unchanged since there is no valid qualified id, // but last_added_document_id should be updated. ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<QualifiedIdJoinablePropertyIndexingHandler> handler, - QualifiedIdJoinablePropertyIndexingHandler::Create( - &fake_clock_, qualified_id_join_index_.get())); + std::unique_ptr<QualifiedIdJoinIndexingHandler> handler, + QualifiedIdJoinIndexingHandler::Create(&fake_clock_, + qualified_id_join_index_.get())); EXPECT_THAT( handler->Handle(tokenized_document, kDefaultDocumentId, /*recovery_mode=*/false, /*put_document_stats=*/nullptr), @@ -303,8 +303,7 @@ TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest, StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); } -TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest, - HandleShouldSkipEmptyQualifiedId) { +TEST_F(QualifiedIdJoinIndexingHandlerTest, HandleShouldSkipEmptyQualifiedId) { // Create a document without any qualified id. DocumentProto document = DocumentBuilder() .SetKey("icing", "fake_type/1") @@ -321,9 +320,9 @@ TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest, // Handle document. Index data should remain unchanged since there is no // qualified id, but last_added_document_id should be updated. ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<QualifiedIdJoinablePropertyIndexingHandler> handler, - QualifiedIdJoinablePropertyIndexingHandler::Create( - &fake_clock_, qualified_id_join_index_.get())); + std::unique_ptr<QualifiedIdJoinIndexingHandler> handler, + QualifiedIdJoinIndexingHandler::Create(&fake_clock_, + qualified_id_join_index_.get())); EXPECT_THAT( handler->Handle(tokenized_document, kDefaultDocumentId, /*recovery_mode=*/false, /*put_document_stats=*/nullptr), @@ -335,7 +334,7 @@ TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest, StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); } -TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest, +TEST_F(QualifiedIdJoinIndexingHandlerTest, HandleInvalidDocumentIdShouldReturnInvalidArgumentError) { DocumentProto referenced_document = DocumentBuilder() @@ -361,9 +360,9 @@ TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest, Eq(kDefaultDocumentId)); ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<QualifiedIdJoinablePropertyIndexingHandler> handler, - QualifiedIdJoinablePropertyIndexingHandler::Create( - &fake_clock_, qualified_id_join_index_.get())); + std::unique_ptr<QualifiedIdJoinIndexingHandler> handler, + QualifiedIdJoinIndexingHandler::Create(&fake_clock_, + qualified_id_join_index_.get())); // Handling document with kInvalidDocumentId should cause a failure, and both // index data and last_added_document_id should remain unchanged. @@ -389,7 +388,7 @@ TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest, StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); } -TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest, +TEST_F(QualifiedIdJoinIndexingHandlerTest, HandleOutOfOrderDocumentIdShouldReturnInvalidArgumentError) { DocumentProto referenced_document = DocumentBuilder() @@ -415,9 +414,9 @@ TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest, Eq(kDefaultDocumentId)); ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<QualifiedIdJoinablePropertyIndexingHandler> handler, - QualifiedIdJoinablePropertyIndexingHandler::Create( - &fake_clock_, qualified_id_join_index_.get())); + std::unique_ptr<QualifiedIdJoinIndexingHandler> handler, + QualifiedIdJoinIndexingHandler::Create(&fake_clock_, + qualified_id_join_index_.get())); // Handling document with document_id < last_added_document_id should cause a // failure, and both index data and last_added_document_id should remain @@ -447,7 +446,7 @@ TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest, StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); } -TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest, +TEST_F(QualifiedIdJoinIndexingHandlerTest, HandleRecoveryModeShouldIgnoreDocsLELastAddedDocId) { DocumentProto referenced_document = DocumentBuilder() @@ -473,9 +472,9 @@ TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest, Eq(kDefaultDocumentId)); ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<QualifiedIdJoinablePropertyIndexingHandler> handler, - QualifiedIdJoinablePropertyIndexingHandler::Create( - &fake_clock_, qualified_id_join_index_.get())); + std::unique_ptr<QualifiedIdJoinIndexingHandler> handler, + QualifiedIdJoinIndexingHandler::Create(&fake_clock_, + qualified_id_join_index_.get())); // Handle document with document_id < last_added_document_id in recovery mode. // We should not get any error, but the handler should ignore the document, so diff --git a/icing/query/advanced_query_parser/query-visitor.cc b/icing/query/advanced_query_parser/query-visitor.cc index c2cee47..664b072 100644 --- a/icing/query/advanced_query_parser/query-visitor.cc +++ b/icing/query/advanced_query_parser/query-visitor.cc @@ -33,6 +33,7 @@ #include "icing/index/iterator/doc-hit-info-iterator-none.h" #include "icing/index/iterator/doc-hit-info-iterator-not.h" #include "icing/index/iterator/doc-hit-info-iterator-or.h" +#include "icing/index/iterator/doc-hit-info-iterator-property-in-schema.h" #include "icing/index/iterator/doc-hit-info-iterator-section-restrict.h" #include "icing/index/iterator/doc-hit-info-iterator.h" #include "icing/query/advanced_query_parser/lexer.h" @@ -224,7 +225,7 @@ void QueryVisitor::RegisterFunctions() { Function property_defined_function = Function::Create(DataType::kDocumentIterator, "propertyDefined", - {Param(DataType::kText)}, std::move(property_defined)) + {Param(DataType::kString)}, std::move(property_defined)) .ValueOrDie(); registered_functions_.insert( {property_defined_function.name(), std::move(property_defined_function)}); @@ -301,20 +302,23 @@ libtextclassifier3::StatusOr<PendingValue> QueryVisitor::SearchFunction( libtextclassifier3::StatusOr<PendingValue> QueryVisitor::PropertyDefinedFunction(std::vector<PendingValue>&& args) { - // The first arg is guaranteed to be a TEXT at this point. It should be safe + // The first arg is guaranteed to be a STRING at this point. It should be safe // to call ValueOrDie. + const QueryTerm* member = args.at(0).string_val().ValueOrDie(); - // TODO(b/268680462): Consume this and implement the actual iterator. - // const QueryTerm* member = - args.at(0).text_val().ValueOrDie(); - - std::unique_ptr<DocHitInfoIterator> iterator = + std::unique_ptr<DocHitInfoIterator> all_docs_iterator = std::make_unique<DocHitInfoIteratorAllDocumentId>( document_store_.last_added_document_id()); - features_.insert(kPropertyDefinedInSchemaCustomFunctionFeature); + std::set<std::string> target_sections = {std::move(member->term)}; + std::unique_ptr<DocHitInfoIterator> property_in_schema_iterator = + std::make_unique<DocHitInfoIteratorPropertyInSchema>( + std::move(all_docs_iterator), &document_store_, &schema_store_, + std::move(target_sections)); - return PendingValue(std::move(iterator)); + features_.insert(kListFilterQueryLanguageFeature); + + return PendingValue(std::move(property_in_schema_iterator)); } libtextclassifier3::StatusOr<int64_t> QueryVisitor::PopPendingIntValue() { @@ -362,10 +366,8 @@ QueryVisitor::PopPendingIterator() { return CreateTermIterator(std::move(string_value)); } else { ICING_ASSIGN_OR_RETURN(QueryTerm text_value, PopPendingTextValue()); - ICING_ASSIGN_OR_RETURN( - std::unique_ptr<Tokenizer::Iterator> token_itr, - tokenizer_.Tokenize(text_value.term, - LanguageSegmenter::AccessType::kForwardIterator)); + ICING_ASSIGN_OR_RETURN(std::unique_ptr<Tokenizer::Iterator> token_itr, + tokenizer_.Tokenize(text_value.term)); std::string normalized_term; std::vector<std::unique_ptr<DocHitInfoIterator>> iterators; // The tokenizer will produce 1+ tokens out of the text. The prefix operator diff --git a/icing/query/advanced_query_parser/query-visitor_test.cc b/icing/query/advanced_query_parser/query-visitor_test.cc index c48d9ad..92eb3e7 100644 --- a/icing/query/advanced_query_parser/query-visitor_test.cc +++ b/icing/query/advanced_query_parser/query-visitor_test.cc @@ -792,7 +792,9 @@ TEST_P(QueryVisitorTest, NumericComparatorDoesntAffectLaterTerms) { ICING_ASSERT_OK(schema_store_->SetSchema( SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("type")) - .Build())); + .Build(), + /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); // Index three documents: // - Doc0: ["-2", "-1", "1", "2"] and [-2, -1, 1, 2] @@ -1543,7 +1545,9 @@ TEST_P(QueryVisitorTest, SingleMinusTerm) { ICING_ASSERT_OK(schema_store_->SetSchema( SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("type")) - .Build())); + .Build(), + /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); ICING_ASSERT_OK(document_store_->Put( DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build())); @@ -1595,7 +1599,9 @@ TEST_P(QueryVisitorTest, SingleNotTerm) { ICING_ASSERT_OK(schema_store_->SetSchema( SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("type")) - .Build())); + .Build(), + /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); ICING_ASSERT_OK(document_store_->Put( DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build())); @@ -1643,7 +1649,9 @@ TEST_P(QueryVisitorTest, NestedNotTerms) { ICING_ASSERT_OK(schema_store_->SetSchema( SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("type")) - .Build())); + .Build(), + /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); ICING_ASSERT_OK(document_store_->Put( DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build())); @@ -1699,7 +1707,9 @@ TEST_P(QueryVisitorTest, DeeplyNestedNotTerms) { ICING_ASSERT_OK(schema_store_->SetSchema( SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("type")) - .Build())); + .Build(), + /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); ICING_ASSERT_OK(document_store_->Put( DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build())); @@ -1991,7 +2001,9 @@ TEST_P(QueryVisitorTest, AndOrNotPrecedence) { .SetName("prop1") .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_OPTIONAL))) - .Build())); + .Build(), + /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); ICING_ASSERT_OK(document_store_->Put( DocumentBuilder().SetKey("ns", "uri0").SetSchema("type").Build())); @@ -2073,7 +2085,10 @@ TEST_P(QueryVisitorTest, PropertyFilter) { .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_OPTIONAL))) - .Build())); + .Build(), + /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); + // Section ids are assigned alphabetically. SectionId prop1_section_id = 0; SectionId prop2_section_id = 1; @@ -2145,7 +2160,10 @@ TEST_F(QueryVisitorTest, MultiPropertyFilter) { .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_OPTIONAL))) - .Build())); + .Build(), + /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); + // Section ids are assigned alphabetically. SectionId prop1_section_id = 0; SectionId prop2_section_id = 1; @@ -2210,7 +2228,9 @@ TEST_P(QueryVisitorTest, PropertyFilterStringIsInvalid) { .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_OPTIONAL))) - .Build())); + .Build(), + /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); // "prop1" is a STRING token, which cannot be a property name. std::string query = CreateQuery(R"(("prop1":foo))"); @@ -2241,7 +2261,9 @@ TEST_P(QueryVisitorTest, PropertyFilterNonNormalized) { .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_OPTIONAL))) - .Build())); + .Build(), + /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); // Section ids are assigned alphabetically. SectionId prop1_section_id = 0; SectionId prop2_section_id = 1; @@ -2308,7 +2330,10 @@ TEST_P(QueryVisitorTest, PropertyFilterWithGrouping) { .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_OPTIONAL))) - .Build())); + .Build(), + /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); + // Section ids are assigned alphabetically. SectionId prop1_section_id = 0; SectionId prop2_section_id = 1; @@ -2373,7 +2398,10 @@ TEST_P(QueryVisitorTest, ValidNestedPropertyFilter) { .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_OPTIONAL))) - .Build())); + .Build(), + /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); + // Section ids are assigned alphabetically. SectionId prop1_section_id = 0; SectionId prop2_section_id = 1; @@ -2457,7 +2485,10 @@ TEST_P(QueryVisitorTest, InvalidNestedPropertyFilter) { .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_OPTIONAL))) - .Build())); + .Build(), + /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); + // Section ids are assigned alphabetically. SectionId prop1_section_id = 0; SectionId prop2_section_id = 1; @@ -2537,7 +2568,10 @@ TEST_P(QueryVisitorTest, NotWithPropertyFilter) { .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_OPTIONAL))) - .Build())); + .Build(), + /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); + // Section ids are assigned alphabetically. SectionId prop1_section_id = 0; SectionId prop2_section_id = 1; @@ -2622,7 +2656,10 @@ TEST_P(QueryVisitorTest, PropertyFilterWithNot) { .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_OPTIONAL))) - .Build())); + .Build(), + /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); + // Section ids are assigned alphabetically. SectionId prop1_section_id = 0; SectionId prop2_section_id = 1; @@ -2713,7 +2750,10 @@ TEST_P(QueryVisitorTest, SegmentationTest) { .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_OPTIONAL))) - .Build())); + .Build(), + /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); + // Section ids are assigned alphabetically. SectionId prop1_section_id = 0; SectionId prop2_section_id = 1; @@ -2803,7 +2843,9 @@ TEST_P(QueryVisitorTest, PropertyRestrictsPopCorrectly) { .AddProperty(prop) .AddProperty(PropertyConfigBuilder(prop).SetName("prop1")) .AddProperty(PropertyConfigBuilder(prop).SetName("prop2"))) - .Build())); + .Build(), + /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); SectionId prop0_id = 0; SectionId prop1_id = 1; @@ -2916,7 +2958,9 @@ TEST_P(QueryVisitorTest, UnsatisfiablePropertyRestrictsPopCorrectly) { .AddProperty(prop) .AddProperty(PropertyConfigBuilder(prop).SetName("prop1")) .AddProperty(PropertyConfigBuilder(prop).SetName("prop2"))) - .Build())); + .Build(), + /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); SectionId prop0_id = 0; SectionId prop1_id = 1; @@ -3139,7 +3183,10 @@ TEST_F(QueryVisitorTest, SearchFunctionNestedFunctionCalls) { .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_OPTIONAL))) - .Build())); + .Build(), + /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); + // Section ids are assigned alphabetically. SectionId prop1_section_id = 0; @@ -3265,7 +3312,10 @@ TEST_F(QueryVisitorTest, SearchFunctionNestedPropertyRestrictsNarrowing) { .AddProperty(PropertyConfigBuilder(prop).SetName("prop5")) .AddProperty(PropertyConfigBuilder(prop).SetName("prop6")) .AddProperty(PropertyConfigBuilder(prop).SetName("prop7"))) - .Build())); + .Build(), + /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); + // Section ids are assigned alphabetically. SectionId prop0_id = 0; SectionId prop1_id = 1; @@ -3442,7 +3492,10 @@ TEST_F(QueryVisitorTest, SearchFunctionNestedPropertyRestrictsExpanding) { .AddProperty(PropertyConfigBuilder(prop).SetName("prop5")) .AddProperty(PropertyConfigBuilder(prop).SetName("prop6")) .AddProperty(PropertyConfigBuilder(prop).SetName("prop7"))) - .Build())); + .Build(), + /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); + // Section ids are assigned alphabetically. SectionId prop0_id = 0; SectionId prop1_id = 1; @@ -3606,7 +3659,7 @@ TEST_F(QueryVisitorTest, TEST_F( QueryVisitorTest, PropertyDefinedFunctionWithMoreThanOneTextArgumentReturnsInvalidArgument) { - std::string query = "propertyDefined(foo, bar)"; + std::string query = "propertyDefined(\"foo\", \"bar\")"; ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node, ParseQueryHelper(query)); QueryVisitor query_visitor( @@ -3620,9 +3673,9 @@ TEST_F( } TEST_F(QueryVisitorTest, - PropertyDefinedFunctionWithStringArgumentReturnsInvalidArgument) { - // The argument type is STRING, not TEXT here. - std::string query = "propertyDefined(\"foo\")"; + PropertyDefinedFunctionWithTextArgumentReturnsInvalidArgument) { + // The argument type is TEXT, not STRING here. + std::string query = "propertyDefined(foo)"; ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node, ParseQueryHelper(query)); QueryVisitor query_visitor( @@ -3650,7 +3703,7 @@ TEST_F(QueryVisitorTest, StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); } -TEST_P(QueryVisitorTest, PropertyDefinedFunctionCurrentlyReturnsEverything) { +TEST_P(QueryVisitorTest, PropertyDefinedFunctionReturnsMatchingDocuments) { // Set up two schemas, one with a "url" field and one without. ICING_ASSERT_OK(schema_store_->SetSchema( SchemaBuilder() @@ -3661,8 +3714,11 @@ TEST_P(QueryVisitorTest, PropertyDefinedFunctionCurrentlyReturnsEverything) { .SetDataType(TYPE_STRING) .SetCardinality(CARDINALITY_OPTIONAL))) .AddType(SchemaTypeConfigBuilder().SetType("typeWithoutUrl")) - .Build())); + .Build(), + /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); + // Document 0 has the term "foo" and its schema has the url property. ICING_ASSERT_OK(document_store_->Put( DocumentBuilder().SetKey("ns", "uri0").SetSchema("typeWithUrl").Build())); Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1, @@ -3670,6 +3726,7 @@ TEST_P(QueryVisitorTest, PropertyDefinedFunctionCurrentlyReturnsEverything) { editor.BufferTerm("foo"); editor.IndexAllBufferedTerms(); + // Document 1 has the term "foo" and its schema DOESN'T have the url property. ICING_ASSERT_OK(document_store_->Put(DocumentBuilder() .SetKey("ns", "uri1") .SetSchema("typeWithoutUrl") @@ -3679,6 +3736,7 @@ TEST_P(QueryVisitorTest, PropertyDefinedFunctionCurrentlyReturnsEverything) { editor.BufferTerm("foo"); editor.IndexAllBufferedTerms(); + // Document 2 has the term "bar" and its schema has the url property. ICING_ASSERT_OK(document_store_->Put( DocumentBuilder().SetKey("ns", "uri2").SetSchema("typeWithUrl").Build())); editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_PREFIX, @@ -3686,7 +3744,60 @@ TEST_P(QueryVisitorTest, PropertyDefinedFunctionCurrentlyReturnsEverything) { editor.BufferTerm("bar"); editor.IndexAllBufferedTerms(); - std::string query = CreateQuery("foo propertyDefined(url)"); + std::string query = CreateQuery("foo propertyDefined(\"url\")"); + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node, + ParseQueryHelper(query)); + QueryVisitor query_visitor( + index_.get(), numeric_index_.get(), document_store_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), query, + DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, + /*needs_term_frequency_info_=*/true); + root_node->Accept(&query_visitor); + ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results, + std::move(query_visitor).ConsumeResults()); + EXPECT_THAT(query_results.features_in_use, + UnorderedElementsAre(kListFilterQueryLanguageFeature)); + + EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()), + UnorderedElementsAre(kDocumentId0)); +} + +TEST_P(QueryVisitorTest, + PropertyDefinedFunctionReturnsNothingIfNoMatchingProperties) { + // Set up two schemas, one with a "url" field and one without. + ICING_ASSERT_OK(schema_store_->SetSchema( + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder() + .SetType("typeWithUrl") + .AddProperty(PropertyConfigBuilder() + .SetName("url") + .SetDataType(TYPE_STRING) + .SetCardinality(CARDINALITY_OPTIONAL))) + .AddType(SchemaTypeConfigBuilder().SetType("typeWithoutUrl")) + .Build(), + /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); + + // Document 0 has the term "foo" and its schema has the url property. + ICING_ASSERT_OK(document_store_->Put( + DocumentBuilder().SetKey("ns", "uri0").SetSchema("typeWithUrl").Build())); + Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1, + TERM_MATCH_PREFIX, /*namespace_id=*/0); + editor.BufferTerm("foo"); + editor.IndexAllBufferedTerms(); + + // Document 1 has the term "foo" and its schema DOESN'T have the url property. + ICING_ASSERT_OK(document_store_->Put(DocumentBuilder() + .SetKey("ns", "uri1") + .SetSchema("typeWithoutUrl") + .Build())); + editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_PREFIX, + /*namespace_id=*/0); + editor.BufferTerm("foo"); + editor.IndexAllBufferedTerms(); + + // Attempt to query a non-existent property. + std::string query = CreateQuery("propertyDefined(\"nonexistentproperty\")"); ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node, ParseQueryHelper(query)); QueryVisitor query_visitor( @@ -3697,14 +3808,62 @@ TEST_P(QueryVisitorTest, PropertyDefinedFunctionCurrentlyReturnsEverything) { root_node->Accept(&query_visitor); ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results, std::move(query_visitor).ConsumeResults()); - EXPECT_THAT( - query_results.features_in_use, - UnorderedElementsAre(kPropertyDefinedInSchemaCustomFunctionFeature, - kListFilterQueryLanguageFeature)); + EXPECT_THAT(query_results.features_in_use, + UnorderedElementsAre(kListFilterQueryLanguageFeature)); + + EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()), IsEmpty()); +} + +TEST_P(QueryVisitorTest, + PropertyDefinedFunctionWithNegationMatchesDocsWithNoSuchProperty) { + // Set up two schemas, one with a "url" field and one without. + ICING_ASSERT_OK(schema_store_->SetSchema( + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder() + .SetType("typeWithUrl") + .AddProperty(PropertyConfigBuilder() + .SetName("url") + .SetDataType(TYPE_STRING) + .SetCardinality(CARDINALITY_OPTIONAL))) + .AddType(SchemaTypeConfigBuilder().SetType("typeWithoutUrl")) + .Build(), + /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); + + // Document 0 has the term "foo" and its schema has the url property. + ICING_ASSERT_OK(document_store_->Put( + DocumentBuilder().SetKey("ns", "uri0").SetSchema("typeWithUrl").Build())); + Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1, + TERM_MATCH_PREFIX, /*namespace_id=*/0); + editor.BufferTerm("foo"); + editor.IndexAllBufferedTerms(); + + // Document 1 has the term "foo" and its schema DOESN'T have the url property. + ICING_ASSERT_OK(document_store_->Put(DocumentBuilder() + .SetKey("ns", "uri1") + .SetSchema("typeWithoutUrl") + .Build())); + editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_PREFIX, + /*namespace_id=*/0); + editor.BufferTerm("foo"); + editor.IndexAllBufferedTerms(); + + std::string query = CreateQuery("foo AND NOT propertyDefined(\"url\")"); + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node, + ParseQueryHelper(query)); + QueryVisitor query_visitor( + index_.get(), numeric_index_.get(), document_store_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), query, + DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, + /*needs_term_frequency_info_=*/true); + root_node->Accept(&query_visitor); + ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results, + std::move(query_visitor).ConsumeResults()); + EXPECT_THAT(query_results.features_in_use, + UnorderedElementsAre(kListFilterQueryLanguageFeature)); - // TODO(b/268680462): Update once the feature is actually implemented. EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()), - UnorderedElementsAre(kDocumentId0, kDocumentId1)); + UnorderedElementsAre(kDocumentId1)); } INSTANTIATE_TEST_SUITE_P(QueryVisitorTest, QueryVisitorTest, diff --git a/icing/query/query-features.h b/icing/query/query-features.h index 6e4fb94..158e13e 100644 --- a/icing/query/query-features.h +++ b/icing/query/query-features.h @@ -44,20 +44,13 @@ constexpr Feature kVerbatimSearchFeature = // - expanding support for negation and property restriction expressions // - prefix operator '*' // - 'NOT' operator +// - propertyDefined("url") constexpr Feature kListFilterQueryLanguageFeature = "LIST_FILTER_QUERY_LANGUAGE"; // Features#LIST_FILTER_QUERY_LANGUAGE -// This feature enables the custom function hasPropertyDefined(member). For -// example, a query "hasPropertyDefined(url)" will only return documents whose -// schemas have defined a "url" property. -// TODO(b/268680462): Update Features.java to sync with this Feature. -constexpr Feature kPropertyDefinedInSchemaCustomFunctionFeature = - "PROPERTY_DEFINED_IN_SCHEMA"; // Features#PROPERTY_DEFINED_IN_SCHEMA - inline std::unordered_set<Feature> GetQueryFeaturesSet() { return {kNumericSearchFeature, kVerbatimSearchFeature, - kListFilterQueryLanguageFeature, - kPropertyDefinedInSchemaCustomFunctionFeature}; + kListFilterQueryLanguageFeature}; } } // namespace lib diff --git a/icing/query/query-processor.cc b/icing/query/query-processor.cc index 6760fad..c9704fe 100644 --- a/icing/query/query-processor.cc +++ b/icing/query/query-processor.cc @@ -346,7 +346,6 @@ libtextclassifier3::StatusOr<QueryResults> QueryProcessor::ParseRawQuery( break; } case Token::Type::INVALID: - ICING_LOG(ERROR) << "INVALID"; [[fallthrough]]; default: // This wouldn't happen if tokenizer and query processor both work diff --git a/icing/query/query-processor_benchmark.cc b/icing/query/query-processor_benchmark.cc index 6826c22..3596082 100644 --- a/icing/query/query-processor_benchmark.cc +++ b/icing/query/query-processor_benchmark.cc @@ -144,7 +144,9 @@ void BM_QueryOneTerm(benchmark::State& state) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<SchemaStore> schema_store, SchemaStore::Create(&filesystem, schema_dir, &clock)); - ICING_ASSERT_OK(schema_store->SetSchema(schema)); + ICING_ASSERT_OK(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); DocumentStore::CreateResult create_result = CreateDocumentStore(&filesystem, doc_store_dir, &clock, @@ -270,7 +272,9 @@ void BM_QueryFiveTerms(benchmark::State& state) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<SchemaStore> schema_store, SchemaStore::Create(&filesystem, schema_dir, &clock)); - ICING_ASSERT_OK(schema_store->SetSchema(schema)); + ICING_ASSERT_OK(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); DocumentStore::CreateResult create_result = CreateDocumentStore(&filesystem, doc_store_dir, &clock, @@ -414,7 +418,9 @@ void BM_QueryDiacriticTerm(benchmark::State& state) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<SchemaStore> schema_store, SchemaStore::Create(&filesystem, schema_dir, &clock)); - ICING_ASSERT_OK(schema_store->SetSchema(schema)); + ICING_ASSERT_OK(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); DocumentStore::CreateResult create_result = CreateDocumentStore(&filesystem, doc_store_dir, &clock, @@ -543,7 +549,9 @@ void BM_QueryHiragana(benchmark::State& state) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<SchemaStore> schema_store, SchemaStore::Create(&filesystem, schema_dir, &clock)); - ICING_ASSERT_OK(schema_store->SetSchema(schema)); + ICING_ASSERT_OK(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); DocumentStore::CreateResult create_result = CreateDocumentStore(&filesystem, doc_store_dir, &clock, diff --git a/icing/query/query-processor_test.cc b/icing/query/query-processor_test.cc index 47245fd..be20b04 100644 --- a/icing/query/query-processor_test.cc +++ b/icing/query/query-processor_test.cc @@ -220,7 +220,10 @@ TEST_P(QueryProcessorTest, EmptyGroupMatchAllDocuments) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("email")) .Build(); - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, document_store_->Put(DocumentBuilder() @@ -264,7 +267,10 @@ TEST_P(QueryProcessorTest, EmptyQueryMatchAllDocuments) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("email")) .Build(); - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, document_store_->Put(DocumentBuilder() @@ -300,7 +306,10 @@ TEST_P(QueryProcessorTest, QueryTermNormalized) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("email")) .Build(); - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); // These documents don't actually match to the tokens in the index. We're // inserting the documents to get the appropriate number of documents and @@ -357,7 +366,10 @@ TEST_P(QueryProcessorTest, OneTermPrefixMatch) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("email")) .Build(); - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); // These documents don't actually match to the tokens in the index. We're // inserting the documents to get the appropriate number of documents and @@ -409,7 +421,10 @@ TEST_P(QueryProcessorTest, OneTermPrefixMatchWithMaxSectionID) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("email")) .Build(); - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); // These documents don't actually match to the tokens in the index. We're // inserting the documents to get the appropriate number of documents and @@ -463,7 +478,10 @@ TEST_P(QueryProcessorTest, OneTermExactMatch) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("email")) .Build(); - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); // These documents don't actually match to the tokens in the index. We're // inserting the documents to get the appropriate number of documents and @@ -515,7 +533,10 @@ TEST_P(QueryProcessorTest, AndSameTermExactMatch) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("email")) .Build(); - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); // These documents don't actually match to the tokens in the index. We're // just inserting the documents so that the DocHitInfoIterators will see @@ -569,7 +590,10 @@ TEST_P(QueryProcessorTest, AndTwoTermExactMatch) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("email")) .Build(); - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); // These documents don't actually match to the tokens in the index. We're // just inserting the documents so that the DocHitInfoIterators will see @@ -626,7 +650,10 @@ TEST_P(QueryProcessorTest, AndSameTermPrefixMatch) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("email")) .Build(); - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); // These documents don't actually match to the tokens in the index. We're // just inserting the documents so that the DocHitInfoIterators will see @@ -680,7 +707,10 @@ TEST_P(QueryProcessorTest, AndTwoTermPrefixMatch) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("email")) .Build(); - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); // These documents don't actually match to the tokens in the index. We're // just inserting the documents so that the DocHitInfoIterators will see @@ -738,7 +768,10 @@ TEST_P(QueryProcessorTest, AndTwoTermPrefixAndExactMatch) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("email")) .Build(); - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); // These documents don't actually match to the tokens in the index. We're // just inserting the documents so that the DocHitInfoIterators will see @@ -796,7 +829,10 @@ TEST_P(QueryProcessorTest, OrTwoTermExactMatch) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("email")) .Build(); - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); // These documents don't actually match to the tokens in the index. We're // just inserting the documents so that the DocHitInfoIterators will see @@ -867,7 +903,10 @@ TEST_P(QueryProcessorTest, OrTwoTermPrefixMatch) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("email")) .Build(); - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); // These documents don't actually match to the tokens in the index. We're // just inserting the documents so that the DocHitInfoIterators will see @@ -938,7 +977,10 @@ TEST_P(QueryProcessorTest, OrTwoTermPrefixAndExactMatch) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("email")) .Build(); - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); // These documents don't actually match to the tokens in the index. We're // just inserting the documents so that the DocHitInfoIterators will see @@ -1007,7 +1049,10 @@ TEST_P(QueryProcessorTest, CombinedAndOrTerms) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("email")) .Build(); - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); // These documents don't actually match to the tokens in the index. We're // just inserting the documents so that the DocHitInfoIterators will see @@ -1175,7 +1220,10 @@ TEST_P(QueryProcessorTest, OneGroup) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("email")) .Build(); - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); // These documents don't actually match to the tokens in the index. We're // just inserting the documents so that the DocHitInfoIterators will see @@ -1238,7 +1286,10 @@ TEST_P(QueryProcessorTest, TwoGroups) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("email")) .Build(); - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); // These documents don't actually match to the tokens in the index. We're // just inserting the documents so that the DocHitInfoIterators will see @@ -1304,7 +1355,10 @@ TEST_P(QueryProcessorTest, ManyLevelNestedGrouping) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("email")) .Build(); - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); // These documents don't actually match to the tokens in the index. We're // just inserting the documents so that the DocHitInfoIterators will see @@ -1367,7 +1421,10 @@ TEST_P(QueryProcessorTest, OneLevelNestedGrouping) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("email")) .Build(); - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); // These documents don't actually match to the tokens in the index. We're // just inserting the documents so that the DocHitInfoIterators will see @@ -1432,7 +1489,10 @@ TEST_P(QueryProcessorTest, ExcludeTerm) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("email")) .Build(); - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); // These documents don't actually match to the tokens in the index. We're // just inserting the documents so that they'll bump the @@ -1483,7 +1543,10 @@ TEST_P(QueryProcessorTest, ExcludeNonexistentTerm) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("email")) .Build(); - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); // These documents don't actually match to the tokens in the index. We're // just inserting the documents so that they'll bump the @@ -1532,7 +1595,10 @@ TEST_P(QueryProcessorTest, ExcludeAnd) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("email")) .Build(); - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); // These documents don't actually match to the tokens in the index. We're // just inserting the documents so that they'll bump the @@ -1613,7 +1679,10 @@ TEST_P(QueryProcessorTest, ExcludeOr) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("email")) .Build(); - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); // These documents don't actually match to the tokens in the index. We're // just inserting the documents so that they'll bump the @@ -1697,7 +1766,10 @@ TEST_P(QueryProcessorTest, WithoutTermFrequency) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("email")) .Build(); - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); // These documents don't actually match to the tokens in the index. We're // just inserting the documents so that the DocHitInfoIterators will see @@ -1793,7 +1865,10 @@ TEST_P(QueryProcessorTest, DeletedFilter) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("email")) .Build(); - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); // These documents don't actually match to the tokens in the index. We're // inserting the documents to get the appropriate number of documents and @@ -1854,7 +1929,10 @@ TEST_P(QueryProcessorTest, NamespaceFilter) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("email")) .Build(); - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); // These documents don't actually match to the tokens in the index. We're // inserting the documents to get the appropriate number of documents and @@ -1917,7 +1995,10 @@ TEST_P(QueryProcessorTest, SchemaTypeFilter) { .AddType(SchemaTypeConfigBuilder().SetType("email")) .AddType(SchemaTypeConfigBuilder().SetType("message")) .Build(); - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); // These documents don't actually match to the tokens in the index. We're // inserting the documents to get the appropriate number of documents and @@ -1981,7 +2062,10 @@ TEST_P(QueryProcessorTest, PropertyFilterForOneDocument) { .Build(); // First and only indexed property, so it gets a section_id of 0 int subject_section_id = 0; - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); // These documents don't actually match to the tokens in the index. We're // inserting the documents to get the appropriate number of documents and @@ -2050,7 +2134,10 @@ TEST_P(QueryProcessorTest, PropertyFilterAcrossSchemaTypes) { // alphabetically. int email_foo_section_id = 1; int message_foo_section_id = 0; - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); // These documents don't actually match to the tokens in the index. We're // inserting the documents to get the appropriate number of documents and @@ -2120,7 +2207,10 @@ TEST_P(QueryProcessorTest, PropertyFilterWithinSchemaType) { .Build(); int email_foo_section_id = 0; int message_foo_section_id = 0; - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); // These documents don't actually match to the tokens in the index. We're // inserting the documents to get the appropriate number of documents and @@ -2208,7 +2298,10 @@ TEST_P(QueryProcessorTest, NestedPropertyFilter) { TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_OPTIONAL))) .Build(); - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); // These documents don't actually match to the tokens in the index. We're // inserting the documents to get the appropriate number of documents and @@ -2269,7 +2362,10 @@ TEST_P(QueryProcessorTest, PropertyFilterRespectsDifferentSectionIds) { .Build(); int email_foo_section_id = 0; int message_foo_section_id = 0; - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); // These documents don't actually match to the tokens in the index. We're // inserting the documents to get the appropriate number of documents and @@ -2329,7 +2425,10 @@ TEST_P(QueryProcessorTest, NonexistentPropertyFilterReturnsEmptyResults) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("email")) .Build(); - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); // These documents don't actually match to the tokens in the index. We're // inserting the documents to get the appropriate number of documents and @@ -2383,7 +2482,10 @@ TEST_P(QueryProcessorTest, UnindexedPropertyFilterReturnsEmptyResults) { .SetDataType(TYPE_STRING) .SetCardinality(CARDINALITY_OPTIONAL))) .Build(); - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); // These documents don't actually match to the tokens in the index. We're // inserting the documents to get the appropriate number of documents and @@ -2440,7 +2542,10 @@ TEST_P(QueryProcessorTest, PropertyFilterTermAndUnrestrictedTerm) { .Build(); int email_foo_section_id = 0; int message_foo_section_id = 0; - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); // These documents don't actually match to the tokens in the index. We're // inserting the documents to get the appropriate number of documents and @@ -2503,7 +2608,10 @@ TEST_P(QueryProcessorTest, DocumentBeforeTtlNotFilteredOut) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("email")) .Build(); - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); // Arbitrary value, just has to be less than the document's creation // timestamp + ttl @@ -2561,7 +2669,10 @@ TEST_P(QueryProcessorTest, DocumentPastTtlFilteredOut) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("email")) .Build(); - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); // Arbitrary value, just has to be greater than the document's creation // timestamp + ttl @@ -2634,7 +2745,10 @@ TEST_P(QueryProcessorTest, NumericFilter) { // SectionIds are assigned alphabetically SectionId cost_section_id = 0; SectionId price_section_id = 1; - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); ICING_ASSERT_OK_AND_ASSIGN( DocumentId document_one_id, @@ -2729,7 +2843,10 @@ TEST_P(QueryProcessorTest, NumericFilterWithoutEnablingFeatureFails) { .SetCardinality(CARDINALITY_OPTIONAL))) .Build(); SectionId price_section_id = 0; - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); ICING_ASSERT_OK_AND_ASSIGN( DocumentId document_one_id, diff --git a/icing/query/suggestion-processor_test.cc b/icing/query/suggestion-processor_test.cc index 7d45de7..4937f39 100644 --- a/icing/query/suggestion-processor_test.cc +++ b/icing/query/suggestion-processor_test.cc @@ -163,7 +163,10 @@ TEST_F(SuggestionProcessorTest, MultipleTermsTest_And) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("email")) .Build(); - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); // These documents don't actually match to the tokens in the index. We're // inserting the documents to get the appropriate number of documents and @@ -206,7 +209,10 @@ TEST_F(SuggestionProcessorTest, MultipleTermsTest_AndNary) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("email")) .Build(); - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); // These documents don't actually match to the tokens in the index. We're // inserting the documents to get the appropriate number of documents and @@ -253,7 +259,10 @@ TEST_F(SuggestionProcessorTest, MultipleTermsTest_Or) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("email")) .Build(); - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); // These documents don't actually match to the tokens in the index. We're // inserting the documents to get the appropriate number of documents and @@ -302,7 +311,10 @@ TEST_F(SuggestionProcessorTest, MultipleTermsTest_OrNary) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("email")) .Build(); - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); // These documents don't actually match to the tokens in the index. We're // inserting the documents to get the appropriate number of documents and @@ -364,7 +376,10 @@ TEST_F(SuggestionProcessorTest, MultipleTermsTest_NormalizedTerm) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("email")) .Build(); - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); // These documents don't actually match to the tokens in the index. We're // inserting the documents to get the appropriate number of documents and @@ -421,7 +436,10 @@ TEST_F(SuggestionProcessorTest, NonExistentPrefixTest) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("email")) .Build(); - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); // These documents don't actually match to the tokens in the index. We're // inserting the documents to get the appropriate number of documents and @@ -453,7 +471,10 @@ TEST_F(SuggestionProcessorTest, PrefixTrailingSpaceTest) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("email")) .Build(); - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); // These documents don't actually match to the tokens in the index. We're // inserting the documents to get the appropriate number of documents and @@ -485,7 +506,10 @@ TEST_F(SuggestionProcessorTest, NormalizePrefixTest) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("email")) .Build(); - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); // These documents don't actually match to the tokens in the index. We're // inserting the documents to get the appropriate number of documents and @@ -531,7 +555,10 @@ TEST_F(SuggestionProcessorTest, ParenthesesOperatorPrefixTest) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("email")) .Build(); - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); // These documents don't actually match to the tokens in the index. We're // inserting the documents to get the appropriate number of documents and @@ -573,7 +600,10 @@ TEST_F(SuggestionProcessorTest, OtherSpecialPrefixTest) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("email")) .Build(); - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); // These documents don't actually match to the tokens in the index. We're // inserting the documents to get the appropriate number of documents and @@ -629,7 +659,10 @@ TEST_F(SuggestionProcessorTest, InvalidPrefixTest) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("email")) .Build(); - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); // These documents don't actually match to the tokens in the index. We're // inserting the documents to get the appropriate number of documents and diff --git a/icing/result/projection-tree.cc b/icing/result/projection-tree.cc index fded576..9896491 100644 --- a/icing/result/projection-tree.cc +++ b/icing/result/projection-tree.cc @@ -22,8 +22,9 @@ namespace icing { namespace lib { -ProjectionTree::ProjectionTree(const TypePropertyMask& type_field_mask) { - for (const std::string& field_mask : type_field_mask.paths()) { +ProjectionTree::ProjectionTree( + const SchemaStore::ExpandedTypePropertyMask& type_field_mask) { + for (const std::string& field_mask : type_field_mask.paths) { Node* current_node = &root_; for (std::string_view sub_field_mask : property_util::SplitPropertyPathExpr(field_mask)) { diff --git a/icing/result/projection-tree.h b/icing/result/projection-tree.h index 5916fe6..cdf268a 100644 --- a/icing/result/projection-tree.h +++ b/icing/result/projection-tree.h @@ -19,14 +19,13 @@ #include <vector> #include "icing/proto/search.pb.h" +#include "icing/schema/schema-store.h" namespace icing { namespace lib { class ProjectionTree { public: - static constexpr std::string_view kSchemaTypeWildcard = "*"; - struct Node { explicit Node(std::string name = "") : name(std::move(name)) {} @@ -38,7 +37,8 @@ class ProjectionTree { } }; - explicit ProjectionTree(const TypePropertyMask& type_field_mask); + explicit ProjectionTree( + const SchemaStore::ExpandedTypePropertyMask& type_field_mask); const Node& root() const { return root_; } diff --git a/icing/result/projection-tree_test.cc b/icing/result/projection-tree_test.cc index 2b0f966..46d0c12 100644 --- a/icing/result/projection-tree_test.cc +++ b/icing/result/projection-tree_test.cc @@ -17,6 +17,7 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" #include "icing/proto/search.pb.h" +#include "icing/schema/schema-store.h" namespace icing { namespace lib { @@ -28,72 +29,87 @@ using ::testing::IsEmpty; using ::testing::SizeIs; TEST(ProjectionTreeTest, CreateEmptyFieldMasks) { - TypePropertyMask type_field_mask; - ProjectionTree tree(type_field_mask); + ProjectionTree tree({}); EXPECT_THAT(tree.root().name, IsEmpty()); EXPECT_THAT(tree.root().children, IsEmpty()); } TEST(ProjectionTreeTest, CreateTreeTopLevel) { - TypePropertyMask type_field_mask; - type_field_mask.add_paths("subject"); + SchemaStore::ExpandedTypePropertyMask type_field_mask{"", {"subject"}}; ProjectionTree tree(type_field_mask); EXPECT_THAT(tree.root().name, IsEmpty()); ASSERT_THAT(tree.root().children, SizeIs(1)); - ASSERT_THAT(tree.root().children.at(0).name, Eq("subject")); - ASSERT_THAT(tree.root().children.at(0).children, IsEmpty()); + EXPECT_THAT(tree.root().children.at(0).name, Eq("subject")); + EXPECT_THAT(tree.root().children.at(0).children, IsEmpty()); } TEST(ProjectionTreeTest, CreateTreeMultipleTopLevel) { - TypePropertyMask type_field_mask; - type_field_mask.add_paths("subject"); - type_field_mask.add_paths("body"); + SchemaStore::ExpandedTypePropertyMask type_field_mask{"", + {"subject", "body"}}; ProjectionTree tree(type_field_mask); EXPECT_THAT(tree.root().name, IsEmpty()); ASSERT_THAT(tree.root().children, SizeIs(2)); - ASSERT_THAT(tree.root().children.at(0).name, Eq("subject")); - ASSERT_THAT(tree.root().children.at(0).children, IsEmpty()); - ASSERT_THAT(tree.root().children.at(1).name, Eq("body")); - ASSERT_THAT(tree.root().children.at(1).children, IsEmpty()); + + const ProjectionTree::Node* child0 = &tree.root().children.at(0); + const ProjectionTree::Node* child1 = &tree.root().children.at(1); + if (child0->name != "subject") { + std::swap(child0, child1); + } + + EXPECT_THAT(child0->name, Eq("subject")); + EXPECT_THAT(child0->children, IsEmpty()); + EXPECT_THAT(child1->name, Eq("body")); + EXPECT_THAT(child1->children, IsEmpty()); } TEST(ProjectionTreeTest, CreateTreeNested) { - TypePropertyMask type_field_mask; - type_field_mask.add_paths("subject.body"); - type_field_mask.add_paths("body"); + SchemaStore::ExpandedTypePropertyMask type_field_mask{ + "", {"subject.body", "body"}}; ProjectionTree tree(type_field_mask); EXPECT_THAT(tree.root().name, IsEmpty()); ASSERT_THAT(tree.root().children, SizeIs(2)); - ASSERT_THAT(tree.root().children.at(0).name, Eq("subject")); - ASSERT_THAT(tree.root().children.at(0).children, SizeIs(1)); - ASSERT_THAT(tree.root().children.at(0).children.at(0).name, Eq("body")); - ASSERT_THAT(tree.root().children.at(0).children.at(0).children, IsEmpty()); - ASSERT_THAT(tree.root().children.at(1).name, Eq("body")); - ASSERT_THAT(tree.root().children.at(1).children, IsEmpty()); + + const ProjectionTree::Node* child0 = &tree.root().children.at(0); + const ProjectionTree::Node* child1 = &tree.root().children.at(1); + if (child0->name != "subject.body") { + std::swap(child0, child1); + } + + EXPECT_THAT(child0->name, Eq("subject")); + ASSERT_THAT(child0->children, SizeIs(1)); + EXPECT_THAT(child0->children.at(0).name, Eq("body")); + EXPECT_THAT(child0->children.at(0).children, IsEmpty()); + EXPECT_THAT(child1->name, Eq("body")); + EXPECT_THAT(child1->children, IsEmpty()); } TEST(ProjectionTreeTest, CreateTreeNestedSharedNode) { - TypePropertyMask type_field_mask; - type_field_mask.add_paths("sender.name.first"); - type_field_mask.add_paths("sender.emailAddress"); + SchemaStore::ExpandedTypePropertyMask type_field_mask{ + "", {"sender.name.first", "sender.emailAddress"}}; ProjectionTree tree(type_field_mask); EXPECT_THAT(tree.root().name, IsEmpty()); ASSERT_THAT(tree.root().children, SizeIs(1)); - ASSERT_THAT(tree.root().children.at(0).name, Eq("sender")); + EXPECT_THAT(tree.root().children.at(0).name, Eq("sender")); ASSERT_THAT(tree.root().children.at(0).children, SizeIs(2)); - ASSERT_THAT(tree.root().children.at(0).children.at(0).name, Eq("name")); - ASSERT_THAT(tree.root().children.at(0).children.at(0).children, SizeIs(1)); - ASSERT_THAT(tree.root().children.at(0).children.at(0).children.at(0).name, - Eq("first")); - ASSERT_THAT(tree.root().children.at(0).children.at(0).children.at(0).children, - IsEmpty()); - ASSERT_THAT(tree.root().children.at(0).children.at(1).name, - Eq("emailAddress")); - ASSERT_THAT(tree.root().children.at(0).children.at(1).children, IsEmpty()); + + const ProjectionTree::Node* child0_child0 = + &tree.root().children.at(0).children.at(0); + const ProjectionTree::Node* child0_child1 = + &tree.root().children.at(0).children.at(1); + if (child0_child0->name != "name") { + std::swap(child0_child0, child0_child1); + } + + EXPECT_THAT(child0_child0->name, Eq("name")); + ASSERT_THAT(child0_child0->children, SizeIs(1)); + EXPECT_THAT(child0_child0->children.at(0).name, Eq("first")); + EXPECT_THAT(child0_child0->children.at(0).children, IsEmpty()); + EXPECT_THAT(child0_child1->name, Eq("emailAddress")); + EXPECT_THAT(child0_child1->children, IsEmpty()); } } // namespace diff --git a/icing/result/result-adjustment-info.cc b/icing/result/result-adjustment-info.cc index 763cd10..00ac379 100644 --- a/icing/result/result-adjustment-info.cc +++ b/icing/result/result-adjustment-info.cc @@ -22,6 +22,7 @@ #include "icing/proto/term.pb.h" #include "icing/result/projection-tree.h" #include "icing/result/snippet-context.h" +#include "icing/schema/schema-store.h" namespace icing { namespace lib { @@ -46,15 +47,16 @@ SnippetContext CreateSnippetContext(const SearchSpecProto& search_spec, ResultAdjustmentInfo::ResultAdjustmentInfo( const SearchSpecProto& search_spec, const ScoringSpecProto& scoring_spec, - const ResultSpecProto& result_spec, + const ResultSpecProto& result_spec, const SchemaStore* schema_store, SectionRestrictQueryTermsMap query_terms) : snippet_context(CreateSnippetContext(search_spec, result_spec, std::move(query_terms))), remaining_num_to_snippet(snippet_context.snippet_spec.num_to_snippet()) { - for (const TypePropertyMask& type_field_mask : - result_spec.type_property_masks()) { + for (const SchemaStore::ExpandedTypePropertyMask& type_field_mask : + schema_store->ExpandTypePropertyMasks( + result_spec.type_property_masks())) { projection_tree_map.insert( - {type_field_mask.schema_type(), ProjectionTree(type_field_mask)}); + {type_field_mask.schema_type, ProjectionTree(type_field_mask)}); } } diff --git a/icing/result/result-adjustment-info.h b/icing/result/result-adjustment-info.h index 98fa7f5..e859492 100644 --- a/icing/result/result-adjustment-info.h +++ b/icing/result/result-adjustment-info.h @@ -22,6 +22,7 @@ #include "icing/proto/search.pb.h" #include "icing/result/projection-tree.h" #include "icing/result/snippet-context.h" +#include "icing/schema/schema-store.h" namespace icing { namespace lib { @@ -42,6 +43,7 @@ struct ResultAdjustmentInfo { explicit ResultAdjustmentInfo(const SearchSpecProto& search_spec, const ScoringSpecProto& scoring_spec, const ResultSpecProto& result_spec, + const SchemaStore* schema_store, SectionRestrictQueryTermsMap query_terms); }; diff --git a/icing/result/result-adjustment-info_test.cc b/icing/result/result-adjustment-info_test.cc index 1c5aea1..cbce557 100644 --- a/icing/result/result-adjustment-info_test.cc +++ b/icing/result/result-adjustment-info_test.cc @@ -16,14 +16,19 @@ #include <string> #include <unordered_set> +#include <vector> #include "gtest/gtest.h" -#include "icing/portable/equals-proto.h" #include "icing/proto/scoring.pb.h" #include "icing/proto/search.pb.h" #include "icing/proto/term.pb.h" #include "icing/result/projection-tree.h" #include "icing/result/snippet-context.h" +#include "icing/schema-builder.h" +#include "icing/schema/schema-store.h" +#include "icing/testing/common-matchers.h" +#include "icing/testing/fake-clock.h" +#include "icing/testing/tmp-directory.h" namespace icing { namespace lib { @@ -31,11 +36,44 @@ namespace lib { namespace { using ::icing::lib::portable_equals_proto::EqualsProto; +using ::testing::AnyOf; using ::testing::Eq; using ::testing::IsEmpty; using ::testing::Pair; using ::testing::UnorderedElementsAre; +class ResultAdjustmentInfoTest : public testing::Test { + protected: + ResultAdjustmentInfoTest() : test_dir_(GetTestTempDir() + "/icing") { + filesystem_.CreateDirectoryRecursively(test_dir_.c_str()); + } + + void SetUp() override { + ICING_ASSERT_OK_AND_ASSIGN( + schema_store_, + SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_)); + + SchemaProto schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder().SetType("Email")) + .AddType(SchemaTypeConfigBuilder().SetType("Phone")) + .Build(); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); + } + + void TearDown() override { + filesystem_.DeleteDirectoryRecursively(test_dir_.c_str()); + } + + const Filesystem filesystem_; + const std::string test_dir_; + std::unique_ptr<SchemaStore> schema_store_; + FakeClock fake_clock_; +}; + SearchSpecProto CreateSearchSpec(TermMatchType::Code match_type) { SearchSpecProto search_spec; search_spec.set_term_match_type(match_type); @@ -57,7 +95,8 @@ ResultSpecProto CreateResultSpec( return result_spec; } -TEST(ResultAdjustmentInfoTest, ShouldConstructSnippetContextAccordingToSpecs) { +TEST_F(ResultAdjustmentInfoTest, + ShouldConstructSnippetContextAccordingToSpecs) { ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE); result_spec.mutable_snippet_spec()->set_num_to_snippet(5); @@ -70,7 +109,7 @@ TEST(ResultAdjustmentInfoTest, ShouldConstructSnippetContextAccordingToSpecs) { ResultAdjustmentInfo result_adjustment_info( CreateSearchSpec(TermMatchType::EXACT_ONLY), CreateScoringSpec(/*is_descending_order=*/true), result_spec, - query_terms_map); + schema_store_.get(), query_terms_map); const SnippetContext snippet_context = result_adjustment_info.snippet_context; // Snippet context should be derived from the specs above. @@ -84,7 +123,7 @@ TEST(ResultAdjustmentInfoTest, ShouldConstructSnippetContextAccordingToSpecs) { EXPECT_THAT(result_adjustment_info.remaining_num_to_snippet, Eq(5)); } -TEST(ResultAdjustmentInfoTest, NoSnippetingShouldReturnNull) { +TEST_F(ResultAdjustmentInfoTest, NoSnippetingShouldReturnNull) { ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE); // Setting num_to_snippet to 0 so that snippeting info won't be @@ -99,7 +138,7 @@ TEST(ResultAdjustmentInfoTest, NoSnippetingShouldReturnNull) { ResultAdjustmentInfo result_adjustment_info( CreateSearchSpec(TermMatchType::EXACT_ONLY), CreateScoringSpec(/*is_descending_order=*/true), result_spec, - query_terms_map); + schema_store_.get(), query_terms_map); EXPECT_THAT(result_adjustment_info.snippet_context.query_terms, IsEmpty()); EXPECT_THAT( @@ -110,8 +149,8 @@ TEST(ResultAdjustmentInfoTest, NoSnippetingShouldReturnNull) { EXPECT_THAT(result_adjustment_info.remaining_num_to_snippet, Eq(0)); } -TEST(ResultAdjustmentInfoTest, - ShouldConstructProjectionTreeMapAccordingToSpecs) { +TEST_F(ResultAdjustmentInfoTest, + ShouldConstructProjectionTreeMapAccordingToSpecs) { // Create a ResultSpec with type property mask. ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE); @@ -127,20 +166,30 @@ TEST(ResultAdjustmentInfoTest, TypePropertyMask* wildcard_type_property_mask = result_spec.add_type_property_masks(); wildcard_type_property_mask->set_schema_type( - std::string(ProjectionTree::kSchemaTypeWildcard)); + std::string(SchemaStore::kSchemaTypeWildcard)); wildcard_type_property_mask->add_paths("wild.card"); ResultAdjustmentInfo result_adjustment_info( CreateSearchSpec(TermMatchType::EXACT_ONLY), CreateScoringSpec(/*is_descending_order=*/true), result_spec, + schema_store_.get(), /*query_terms=*/{}); + ProjectionTree email_projection_tree = + ProjectionTree({"Email", {"sender.name", "sender.emailAddress"}}); + ProjectionTree alternative_email_projection_tree = + ProjectionTree({"Email", {"sender.emailAddress", "sender.name"}}); + ProjectionTree phone_projection_tree = ProjectionTree({"Phone", {"caller"}}); + ProjectionTree wildcard_projection_tree = ProjectionTree( + {std::string(SchemaStore::kSchemaTypeWildcard), {"wild.card"}}); + EXPECT_THAT(result_adjustment_info.projection_tree_map, UnorderedElementsAre( - Pair("Email", ProjectionTree(*email_type_property_mask)), - Pair("Phone", ProjectionTree(*phone_type_property_mask)), - Pair(std::string(ProjectionTree::kSchemaTypeWildcard), - ProjectionTree(*wildcard_type_property_mask)))); + Pair("Email", AnyOf(email_projection_tree, + alternative_email_projection_tree)), + Pair("Phone", phone_projection_tree), + Pair(std::string(SchemaStore::kSchemaTypeWildcard), + wildcard_projection_tree))); } } // namespace diff --git a/icing/result/result-retriever-v2.cc b/icing/result/result-retriever-v2.cc index a617f45..c7a8fcd 100644 --- a/icing/result/result-retriever-v2.cc +++ b/icing/result/result-retriever-v2.cc @@ -14,12 +14,16 @@ #include "icing/result/result-retriever-v2.h" +#include <cstddef> +#include <cstdint> #include <memory> +#include <string> #include <unordered_map> #include <utility> #include <vector> #include "icing/text_classifier/lib3/utils/base/statusor.h" +#include "icing/absl_ports/mutex.h" #include "icing/proto/document.pb.h" #include "icing/proto/search.pb.h" #include "icing/result/page-result.h" @@ -29,12 +33,15 @@ #include "icing/result/result-state-v2.h" #include "icing/result/snippet-context.h" #include "icing/result/snippet-retriever.h" +#include "icing/schema/schema-store.h" #include "icing/schema/section.h" #include "icing/scoring/scored-document-hit.h" +#include "icing/store/document-filter-data.h" #include "icing/store/document-store.h" #include "icing/store/namespace-id.h" #include "icing/tokenization/language-segmenter.h" #include "icing/transform/normalizer.h" +#include "icing/util/logging.h" #include "icing/util/status-macros.h" namespace icing { @@ -54,7 +61,7 @@ void ApplyProjection(const ResultAdjustmentInfo* adjustment_info, } else { auto wildcard_projection_tree_itr = adjustment_info->projection_tree_map.find( - std::string(ProjectionTree::kSchemaTypeWildcard)); + std::string(SchemaStore::kSchemaTypeWildcard)); if (wildcard_projection_tree_itr != adjustment_info->projection_tree_map.end()) { projector::Project(wildcard_projection_tree_itr->second.root().children, @@ -199,6 +206,11 @@ std::pair<PageResult, bool> ResultRetrieverV2::RetrieveNextPage( // Retrieve child documents for (const ScoredDocumentHit& child_scored_document_hit : next_best_document_hit.child_scored_document_hits()) { + if (result.joined_results_size() >= + result_state.max_joined_children_per_parent_to_return()) { + break; + } + libtextclassifier3::StatusOr<DocumentProto> child_document_or = doc_store_.Get(child_scored_document_hit.document_id()); if (!child_document_or.ok()) { diff --git a/icing/result/result-retriever-v2.h b/icing/result/result-retriever-v2.h index 48fb88d..0499ae1 100644 --- a/icing/result/result-retriever-v2.h +++ b/icing/result/result-retriever-v2.h @@ -15,19 +15,20 @@ #ifndef ICING_RESULT_RETRIEVER_V2_H_ #define ICING_RESULT_RETRIEVER_V2_H_ +#include <cstdint> #include <memory> #include <unordered_map> #include <utility> #include <vector> #include "icing/text_classifier/lib3/utils/base/statusor.h" +#include "icing/proto/search.pb.h" #include "icing/result/page-result.h" #include "icing/result/result-state-v2.h" #include "icing/result/snippet-retriever.h" #include "icing/schema/schema-store.h" #include "icing/scoring/scored-document-hit.h" #include "icing/store/document-store.h" -#include "icing/store/namespace-id.h" #include "icing/tokenization/language-segmenter.h" #include "icing/transform/normalizer.h" diff --git a/icing/result/result-retriever-v2_group-result-limiter_test.cc b/icing/result/result-retriever-v2_group-result-limiter_test.cc index d4aaa38..c9e0587 100644 --- a/icing/result/result-retriever-v2_group-result-limiter_test.cc +++ b/icing/result/result-retriever-v2_group-result-limiter_test.cc @@ -83,7 +83,9 @@ class ResultRetrieverV2GroupResultLimiterTest : public testing::Test { schema.add_types()->set_schema_type("Document"); schema.add_types()->set_schema_type("Message"); schema.add_types()->set_schema_type("Person"); - ICING_ASSERT_OK(schema_store_->SetSchema(std::move(schema))); + ICING_ASSERT_OK(schema_store_->SetSchema( + std::move(schema), /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, diff --git a/icing/result/result-retriever-v2_projection_test.cc b/icing/result/result-retriever-v2_projection_test.cc index 94580d4..377e14c 100644 --- a/icing/result/result-retriever-v2_projection_test.cc +++ b/icing/result/result-retriever-v2_projection_test.cc @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include <limits> #include <memory> #include <vector> @@ -109,8 +110,77 @@ class ResultRetrieverV2ProjectionTest : public testing::Test { .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_OPTIONAL))) + .AddType( + SchemaTypeConfigBuilder() + .SetType("Artist") + .AddParentType("Person") + .AddProperty(PropertyConfigBuilder() + .SetName("name") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("emailAddress") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .AddType( + SchemaTypeConfigBuilder() + .SetType("Musician") + .AddParentType("Artist") + .AddProperty(PropertyConfigBuilder() + .SetName("name") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("emailAddress") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .AddType( + SchemaTypeConfigBuilder() + .SetType("WithPhone") + .AddProperty(PropertyConfigBuilder() + .SetName("phoneNumber") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("phoneModel") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .AddType(SchemaTypeConfigBuilder() + .SetType("PersonWithPhone") + .AddParentType("Person") + .AddParentType("WithPhone") + .AddProperty(PropertyConfigBuilder() + .SetName("name") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("emailAddress") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("phoneNumber") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("phoneModel") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .Build()) .Build(); - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, @@ -232,7 +302,7 @@ TEST_F(ResultRetrieverV2ProjectionTest, ProjectionTopLevelLeadNodeFieldPath) { std::make_unique<ResultAdjustmentInfo>( CreateSearchSpec(TermMatchType::EXACT_ONLY), CreateScoringSpec(/*is_descending_order=*/false), result_spec, - SectionRestrictQueryTermsMap()), + schema_store_.get(), SectionRestrictQueryTermsMap()), /*child_adjustment_info=*/nullptr, result_spec, *document_store_); ICING_ASSERT_OK_AND_ASSIGN( @@ -330,7 +400,7 @@ TEST_F(ResultRetrieverV2ProjectionTest, ProjectionNestedLeafNodeFieldPath) { std::make_unique<ResultAdjustmentInfo>( CreateSearchSpec(TermMatchType::EXACT_ONLY), CreateScoringSpec(/*is_descending_order=*/false), result_spec, - SectionRestrictQueryTermsMap()), + schema_store_.get(), SectionRestrictQueryTermsMap()), /*child_adjustment_info=*/nullptr, result_spec, *document_store_); ICING_ASSERT_OK_AND_ASSIGN( @@ -439,7 +509,7 @@ TEST_F(ResultRetrieverV2ProjectionTest, ProjectionIntermediateNodeFieldPath) { std::make_unique<ResultAdjustmentInfo>( CreateSearchSpec(TermMatchType::EXACT_ONLY), CreateScoringSpec(/*is_descending_order=*/false), result_spec, - SectionRestrictQueryTermsMap()), + schema_store_.get(), SectionRestrictQueryTermsMap()), /*child_adjustment_info=*/nullptr, result_spec, *document_store_); ICING_ASSERT_OK_AND_ASSIGN( @@ -552,7 +622,7 @@ TEST_F(ResultRetrieverV2ProjectionTest, ProjectionMultipleNestedFieldPaths) { std::make_unique<ResultAdjustmentInfo>( CreateSearchSpec(TermMatchType::EXACT_ONLY), CreateScoringSpec(/*is_descending_order=*/false), result_spec, - SectionRestrictQueryTermsMap()), + schema_store_.get(), SectionRestrictQueryTermsMap()), /*child_adjustment_info=*/nullptr, result_spec, *document_store_); ICING_ASSERT_OK_AND_ASSIGN( @@ -648,7 +718,7 @@ TEST_F(ResultRetrieverV2ProjectionTest, ProjectionEmptyFieldPath) { std::make_unique<ResultAdjustmentInfo>( CreateSearchSpec(TermMatchType::EXACT_ONLY), CreateScoringSpec(/*is_descending_order=*/false), result_spec, - SectionRestrictQueryTermsMap()), + schema_store_.get(), SectionRestrictQueryTermsMap()), /*child_adjustment_info=*/nullptr, result_spec, *document_store_); ICING_ASSERT_OK_AND_ASSIGN( @@ -727,7 +797,7 @@ TEST_F(ResultRetrieverV2ProjectionTest, ProjectionInvalidFieldPath) { std::make_unique<ResultAdjustmentInfo>( CreateSearchSpec(TermMatchType::EXACT_ONLY), CreateScoringSpec(/*is_descending_order=*/false), result_spec, - SectionRestrictQueryTermsMap()), + schema_store_.get(), SectionRestrictQueryTermsMap()), /*child_adjustment_info=*/nullptr, result_spec, *document_store_); ICING_ASSERT_OK_AND_ASSIGN( @@ -807,7 +877,7 @@ TEST_F(ResultRetrieverV2ProjectionTest, ProjectionValidAndInvalidFieldPath) { std::make_unique<ResultAdjustmentInfo>( CreateSearchSpec(TermMatchType::EXACT_ONLY), CreateScoringSpec(/*is_descending_order=*/false), result_spec, - SectionRestrictQueryTermsMap()), + schema_store_.get(), SectionRestrictQueryTermsMap()), /*child_adjustment_info=*/nullptr, result_spec, *document_store_); ICING_ASSERT_OK_AND_ASSIGN( @@ -889,7 +959,7 @@ TEST_F(ResultRetrieverV2ProjectionTest, ProjectionMultipleTypesNoWildcards) { std::make_unique<ResultAdjustmentInfo>( CreateSearchSpec(TermMatchType::EXACT_ONLY), CreateScoringSpec(/*is_descending_order=*/false), result_spec, - SectionRestrictQueryTermsMap()), + schema_store_.get(), SectionRestrictQueryTermsMap()), /*child_adjustment_info=*/nullptr, result_spec, *document_store_); ICING_ASSERT_OK_AND_ASSIGN( @@ -963,7 +1033,7 @@ TEST_F(ResultRetrieverV2ProjectionTest, ProjectionMultipleTypesWildcard) { TypePropertyMask* wildcard_type_property_mask = result_spec.add_type_property_masks(); wildcard_type_property_mask->set_schema_type( - std::string(ProjectionTree::kSchemaTypeWildcard)); + std::string(SchemaStore::kSchemaTypeWildcard)); wildcard_type_property_mask->add_paths("name"); // 4. Create ResultState with custom ResultSpec. @@ -975,7 +1045,7 @@ TEST_F(ResultRetrieverV2ProjectionTest, ProjectionMultipleTypesWildcard) { std::make_unique<ResultAdjustmentInfo>( CreateSearchSpec(TermMatchType::EXACT_ONLY), CreateScoringSpec(/*is_descending_order=*/false), result_spec, - SectionRestrictQueryTermsMap()), + schema_store_.get(), SectionRestrictQueryTermsMap()), /*child_adjustment_info=*/nullptr, result_spec, *document_store_); ICING_ASSERT_OK_AND_ASSIGN( @@ -1053,7 +1123,7 @@ TEST_F(ResultRetrieverV2ProjectionTest, TypePropertyMask* wildcard_type_property_mask = result_spec.add_type_property_masks(); wildcard_type_property_mask->set_schema_type( - std::string(ProjectionTree::kSchemaTypeWildcard)); + std::string(SchemaStore::kSchemaTypeWildcard)); wildcard_type_property_mask->add_paths("name"); // 4. Create ResultState with custom ResultSpec. @@ -1065,7 +1135,7 @@ TEST_F(ResultRetrieverV2ProjectionTest, std::make_unique<ResultAdjustmentInfo>( CreateSearchSpec(TermMatchType::EXACT_ONLY), CreateScoringSpec(/*is_descending_order=*/false), result_spec, - SectionRestrictQueryTermsMap()), + schema_store_.get(), SectionRestrictQueryTermsMap()), /*child_adjustment_info=*/nullptr, result_spec, *document_store_); ICING_ASSERT_OK_AND_ASSIGN( @@ -1152,7 +1222,7 @@ TEST_F(ResultRetrieverV2ProjectionTest, TypePropertyMask* wildcard_type_property_mask = result_spec.add_type_property_masks(); wildcard_type_property_mask->set_schema_type( - std::string(ProjectionTree::kSchemaTypeWildcard)); + std::string(SchemaStore::kSchemaTypeWildcard)); wildcard_type_property_mask->add_paths("name"); // 4. Create ResultState with custom ResultSpec. @@ -1164,7 +1234,7 @@ TEST_F(ResultRetrieverV2ProjectionTest, std::make_unique<ResultAdjustmentInfo>( CreateSearchSpec(TermMatchType::EXACT_ONLY), CreateScoringSpec(/*is_descending_order=*/false), result_spec, - SectionRestrictQueryTermsMap()), + schema_store_.get(), SectionRestrictQueryTermsMap()), /*child_adjustment_info=*/nullptr, result_spec, *document_store_); ICING_ASSERT_OK_AND_ASSIGN( @@ -1255,7 +1325,7 @@ TEST_F(ResultRetrieverV2ProjectionTest, TypePropertyMask* wildcard_type_property_mask = result_spec.add_type_property_masks(); wildcard_type_property_mask->set_schema_type( - std::string(ProjectionTree::kSchemaTypeWildcard)); + std::string(SchemaStore::kSchemaTypeWildcard)); wildcard_type_property_mask->add_paths("sender"); // 4. Create ResultState with custom ResultSpec. @@ -1267,7 +1337,7 @@ TEST_F(ResultRetrieverV2ProjectionTest, std::make_unique<ResultAdjustmentInfo>( CreateSearchSpec(TermMatchType::EXACT_ONLY), CreateScoringSpec(/*is_descending_order=*/false), result_spec, - SectionRestrictQueryTermsMap()), + schema_store_.get(), SectionRestrictQueryTermsMap()), /*child_adjustment_info=*/nullptr, result_spec, *document_store_); ICING_ASSERT_OK_AND_ASSIGN( @@ -1369,6 +1439,8 @@ TEST_F(ResultRetrieverV2ProjectionTest, ProjectionJoinDocuments) { // 4. Create parent ResultSpec with type property mask. ResultSpecProto parent_result_spec = CreateResultSpec(/*num_per_page=*/2); + parent_result_spec.set_max_joined_children_per_parent_to_return( + std::numeric_limits<int>::max()); TypePropertyMask* type_property_mask = parent_result_spec.add_type_property_masks(); type_property_mask->set_schema_type("Person"); @@ -1389,12 +1461,12 @@ TEST_F(ResultRetrieverV2ProjectionTest, ProjectionJoinDocuments) { std::make_unique<ResultAdjustmentInfo>( CreateSearchSpec(TermMatchType::EXACT_ONLY), CreateScoringSpec(/*is_descending_order=*/false), parent_result_spec, - SectionRestrictQueryTermsMap()), + schema_store_.get(), SectionRestrictQueryTermsMap()), /*child_adjustment_info=*/ std::make_unique<ResultAdjustmentInfo>( CreateSearchSpec(TermMatchType::EXACT_ONLY), CreateScoringSpec(/*is_descending_order=*/false), child_result_spec, - SectionRestrictQueryTermsMap()), + schema_store_.get(), SectionRestrictQueryTermsMap()), parent_result_spec, *document_store_); ICING_ASSERT_OK_AND_ASSIGN( @@ -1446,6 +1518,384 @@ TEST_F(ResultRetrieverV2ProjectionTest, ProjectionJoinDocuments) { EqualsProto(projected_email_document2)); } +TEST_F(ResultRetrieverV2ProjectionTest, ProjectionPolymorphism) { + // 1. Add two documents + DocumentProto document_one = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Person") + .AddStringProperty("name", "Joe Fox") + .AddStringProperty("emailAddress", "ny152@aol.com") + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, + document_store_->Put(document_one)); + + DocumentProto document_two = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Artist") + .AddStringProperty("name", "Joe Artist") + .AddStringProperty("emailAddress", "artist@aol.com") + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, + document_store_->Put(document_two)); + + // 2. Setup the scored results. + std::vector<ScoredDocumentHit> scored_document_hits = { + {document_id1, kSectionIdMaskAll, /*score=*/0}, + {document_id2, kSectionIdMaskAll, /*score=*/0}}; + + // 3. Create a ResultSpec with type property mask. + ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2); + // Since Artist is a child type of Person, the TypePropertyMask for Person + // also applies to Artist. + TypePropertyMask* person_type_property_mask = + result_spec.add_type_property_masks(); + person_type_property_mask->set_schema_type("Person"); + person_type_property_mask->add_paths("name"); + + // 4. Create ResultState with custom ResultSpec. + ResultStateV2 result_state( + std::make_unique< + PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( + std::move(scored_document_hits), /*is_descending=*/false), + /*parent_adjustment_info=*/ + std::make_unique<ResultAdjustmentInfo>( + CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/false), result_spec, + schema_store_.get(), SectionRestrictQueryTermsMap()), + /*child_adjustment_info=*/nullptr, result_spec, *document_store_); + + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<ResultRetrieverV2> result_retriever, + ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(), + language_segmenter_.get(), normalizer_.get())); + + // 5. Verify that the returned Person and Artist results only contain the + // 'name' property. + PageResult page_result = + result_retriever->RetrieveNextPage(result_state).first; + ASSERT_THAT(page_result.results, SizeIs(2)); + + DocumentProto projected_document_one = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Person") + .AddStringProperty("name", "Joe Fox") + .Build(); + EXPECT_THAT(page_result.results.at(0).document(), + EqualsProto(projected_document_one)); + + DocumentProto projected_document_two = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Artist") + .AddStringProperty("name", "Joe Artist") + .Build(); + EXPECT_THAT(page_result.results.at(1).document(), + EqualsProto(projected_document_two)); +} + +TEST_F(ResultRetrieverV2ProjectionTest, ProjectionTransitivePolymorphism) { + // 1. Add two documents + DocumentProto document_one = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Person") + .AddStringProperty("name", "Joe Fox") + .AddStringProperty("emailAddress", "ny152@aol.com") + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, + document_store_->Put(document_one)); + + DocumentProto document_two = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Musician") + .AddStringProperty("name", "Joe Musician") + .AddStringProperty("emailAddress", "Musician@aol.com") + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, + document_store_->Put(document_two)); + + // 2. Setup the scored results. + std::vector<ScoredDocumentHit> scored_document_hits = { + {document_id1, kSectionIdMaskAll, /*score=*/0}, + {document_id2, kSectionIdMaskAll, /*score=*/0}}; + + // 3. Create a ResultSpec with type property mask. + ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2); + // Since Musician is a transitive child type of Person, the TypePropertyMask + // for Person also applies to Musician. + TypePropertyMask* person_type_property_mask = + result_spec.add_type_property_masks(); + person_type_property_mask->set_schema_type("Person"); + person_type_property_mask->add_paths("name"); + + // 4. Create ResultState with custom ResultSpec. + ResultStateV2 result_state( + std::make_unique< + PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( + std::move(scored_document_hits), /*is_descending=*/false), + /*parent_adjustment_info=*/ + std::make_unique<ResultAdjustmentInfo>( + CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/false), result_spec, + schema_store_.get(), SectionRestrictQueryTermsMap()), + /*child_adjustment_info=*/nullptr, result_spec, *document_store_); + + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<ResultRetrieverV2> result_retriever, + ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(), + language_segmenter_.get(), normalizer_.get())); + + // 5. Verify that the returned Person and Musician results only contain the + // 'name' property. + PageResult page_result = + result_retriever->RetrieveNextPage(result_state).first; + ASSERT_THAT(page_result.results, SizeIs(2)); + + DocumentProto projected_document_one = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Person") + .AddStringProperty("name", "Joe Fox") + .Build(); + EXPECT_THAT(page_result.results.at(0).document(), + EqualsProto(projected_document_one)); + + DocumentProto projected_document_two = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Musician") + .AddStringProperty("name", "Joe Musician") + .Build(); + EXPECT_THAT(page_result.results.at(1).document(), + EqualsProto(projected_document_two)); +} + +TEST_F(ResultRetrieverV2ProjectionTest, + ProjectionPolymorphismChildMissingProperty) { + // 1. Add an artist document with missing 'emailAddress', which is allowed + // since 'emailAddress' in the parent type 'Person' is defined as optional. + DocumentProto document = DocumentBuilder() + .SetKey("namespace", "uri") + .SetCreationTimestampMs(1000) + .SetSchema("Artist") + .AddStringProperty("name", "Joe Artist") + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id, + document_store_->Put(document)); + + // 2. Setup the scored results. + std::vector<ScoredDocumentHit> scored_document_hits = { + {document_id, kSectionIdMaskAll, /*score=*/0}}; + + // 3. Create a ResultSpec with type property mask for the missing property + // 'emailAddress' in the Person type. Since Artist is a child type of Person, + // the TypePropertyMask for Person also applies to Artist. + ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2); + TypePropertyMask* person_type_property_mask = + result_spec.add_type_property_masks(); + person_type_property_mask->set_schema_type("Person"); + person_type_property_mask->add_paths("emailAddress"); + + // 4. Create ResultState with custom ResultSpec. + ResultStateV2 result_state( + std::make_unique< + PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( + std::move(scored_document_hits), /*is_descending=*/false), + /*parent_adjustment_info=*/ + std::make_unique<ResultAdjustmentInfo>( + CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/false), result_spec, + schema_store_.get(), SectionRestrictQueryTermsMap()), + /*child_adjustment_info=*/nullptr, result_spec, *document_store_); + + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<ResultRetrieverV2> result_retriever, + ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(), + language_segmenter_.get(), normalizer_.get())); + + // 5. Verify that the returned person document does not contain any property, + // since 'emailAddress' is missing. + PageResult page_result = + result_retriever->RetrieveNextPage(result_state).first; + ASSERT_THAT(page_result.results, SizeIs(1)); + DocumentProto projected_document = DocumentBuilder() + .SetKey("namespace", "uri") + .SetCreationTimestampMs(1000) + .SetSchema("Artist") + .Build(); + EXPECT_THAT(page_result.results.at(0).document(), + EqualsProto(projected_document)); +} + +TEST_F(ResultRetrieverV2ProjectionTest, ProjectionPolymorphismMerge) { + // 1. Add two documents + DocumentProto document_one = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Person") + .AddStringProperty("name", "Joe Fox") + .AddStringProperty("emailAddress", "ny152@aol.com") + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, + document_store_->Put(document_one)); + + DocumentProto document_two = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Artist") + .AddStringProperty("name", "Joe Artist") + .AddStringProperty("emailAddress", "artist@aol.com") + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, + document_store_->Put(document_two)); + + // 2. Setup the scored results. + std::vector<ScoredDocumentHit> scored_document_hits = { + {document_id1, kSectionIdMaskAll, /*score=*/0}, + {document_id2, kSectionIdMaskAll, /*score=*/0}}; + + // 3. Create a ResultSpec with type property mask. + ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2); + TypePropertyMask* person_type_property_mask = + result_spec.add_type_property_masks(); + person_type_property_mask->set_schema_type("Person"); + person_type_property_mask->add_paths("name"); + // Since Artist is a child type of Person, the TypePropertyMask for Person + // will be merged to Artist's TypePropertyMask by polymorphism, so that 'name' + // will also show in Artist's projection results. + TypePropertyMask* artist_type_property_mask = + result_spec.add_type_property_masks(); + artist_type_property_mask->set_schema_type("Artist"); + artist_type_property_mask->add_paths("emailAddress"); + + // 4. Create ResultState with custom ResultSpec. + ResultStateV2 result_state( + std::make_unique< + PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( + std::move(scored_document_hits), /*is_descending=*/false), + /*parent_adjustment_info=*/ + std::make_unique<ResultAdjustmentInfo>( + CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/false), result_spec, + schema_store_.get(), SectionRestrictQueryTermsMap()), + /*child_adjustment_info=*/nullptr, result_spec, *document_store_); + + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<ResultRetrieverV2> result_retriever, + ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(), + language_segmenter_.get(), normalizer_.get())); + + // 5. Verify that the returned Person results only contain the 'name' + // property and the returned Artist results contain both the 'name' and + // 'emailAddress' properties. + PageResult page_result = + result_retriever->RetrieveNextPage(result_state).first; + ASSERT_THAT(page_result.results, SizeIs(2)); + + DocumentProto projected_document_one = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Person") + .AddStringProperty("name", "Joe Fox") + .Build(); + EXPECT_THAT(page_result.results.at(0).document(), + EqualsProto(projected_document_one)); + + DocumentProto projected_document_two = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Artist") + .AddStringProperty("name", "Joe Artist") + .AddStringProperty("emailAddress", "artist@aol.com") + .Build(); + EXPECT_THAT(page_result.results.at(1).document(), + EqualsProto(projected_document_two)); +} + +TEST_F(ResultRetrieverV2ProjectionTest, ProjectionMultipleParentPolymorphism) { + // 1. Add a document + DocumentProto document = DocumentBuilder() + .SetKey("namespace", "uri") + .SetCreationTimestampMs(1000) + .SetSchema("PersonWithPhone") + .AddStringProperty("name", "name") + .AddStringProperty("emailAddress", "email") + .AddStringProperty("phoneNumber", "12345") + .AddStringProperty("phoneModel", "pixel") + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id, + document_store_->Put(document)); + + // 2. Setup the scored results. + std::vector<ScoredDocumentHit> scored_document_hits = { + {document_id, kSectionIdMaskAll, /*score=*/0}}; + + // 3. Create a ResultSpec with type property mask. + ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/1); + // Since PersonWithPhone is a child type of Person, the TypePropertyMask + // also applies to PersonWithPhone. + TypePropertyMask* person_type_property_mask = + result_spec.add_type_property_masks(); + person_type_property_mask->set_schema_type("Person"); + person_type_property_mask->add_paths("name"); + // Since PersonWithPhone is a child type of WithPhone, the + // TypePropertyMask also applies to PersonWithPhone. + TypePropertyMask* with_phone_type_property_mask = + result_spec.add_type_property_masks(); + with_phone_type_property_mask->set_schema_type("WithPhone"); + with_phone_type_property_mask->add_paths("phoneNumber"); + + // 4. Create ResultState with custom ResultSpec. + ResultStateV2 result_state( + std::make_unique< + PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( + std::move(scored_document_hits), /*is_descending=*/false), + /*parent_adjustment_info=*/ + std::make_unique<ResultAdjustmentInfo>( + CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/false), result_spec, + schema_store_.get(), SectionRestrictQueryTermsMap()), + /*child_adjustment_info=*/nullptr, result_spec, *document_store_); + + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<ResultRetrieverV2> result_retriever, + ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(), + language_segmenter_.get(), normalizer_.get())); + + // 5. Verify that the returned document only contains the 'name' and the + // 'phoneNumber' property. + PageResult page_result = + result_retriever->RetrieveNextPage(result_state).first; + ASSERT_THAT(page_result.results, SizeIs(1)); + + DocumentProto projected_document = + DocumentBuilder() + .SetKey("namespace", "uri") + .SetCreationTimestampMs(1000) + .SetSchema("PersonWithPhone") + .AddStringProperty("name", "name") + .AddStringProperty("phoneNumber", "12345") + .Build(); + EXPECT_THAT(page_result.results.at(0).document(), + EqualsProto(projected_document)); +} + } // namespace } // namespace lib diff --git a/icing/result/result-retriever-v2_snippet_test.cc b/icing/result/result-retriever-v2_snippet_test.cc index 3dce0ef..b2ba8f7 100644 --- a/icing/result/result-retriever-v2_snippet_test.cc +++ b/icing/result/result-retriever-v2_snippet_test.cc @@ -102,7 +102,10 @@ class ResultRetrieverV2SnippetTest : public testing::Test { .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_OPTIONAL))) .Build(); - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, @@ -236,7 +239,7 @@ TEST_F(ResultRetrieverV2SnippetTest, std::make_unique<ResultAdjustmentInfo>( CreateSearchSpec(TermMatchType::EXACT_ONLY), CreateScoringSpec(/*is_descending_order=*/true), result_spec, - SectionRestrictQueryTermsMap()), + schema_store_.get(), SectionRestrictQueryTermsMap()), /*child_adjustment_info=*/nullptr, result_spec, *document_store_); PageResult page_result = result_retriever->RetrieveNextPage(result_state).first; @@ -285,6 +288,7 @@ TEST_F(ResultRetrieverV2SnippetTest, SimpleSnippeted) { std::make_unique<ResultAdjustmentInfo>( CreateSearchSpec(TermMatchType::EXACT_ONLY), CreateScoringSpec(/*is_descending_order=*/false), result_spec, + schema_store_.get(), SectionRestrictQueryTermsMap({{"", {"foo", "bar"}}})), /*child_adjustment_info=*/nullptr, result_spec, *document_store_); @@ -393,6 +397,7 @@ TEST_F(ResultRetrieverV2SnippetTest, OnlyOneDocumentSnippeted) { std::make_unique<ResultAdjustmentInfo>( CreateSearchSpec(TermMatchType::EXACT_ONLY), CreateScoringSpec(/*is_descending_order=*/false), result_spec, + schema_store_.get(), SectionRestrictQueryTermsMap({{"", {"foo", "bar"}}})), /*child_adjustment_info=*/nullptr, result_spec, *document_store_); @@ -468,6 +473,7 @@ TEST_F(ResultRetrieverV2SnippetTest, ShouldSnippetAllResults) { std::make_unique<ResultAdjustmentInfo>( CreateSearchSpec(TermMatchType::EXACT_ONLY), CreateScoringSpec(/*is_descending_order=*/false), result_spec, + schema_store_.get(), SectionRestrictQueryTermsMap({{"", {"foo", "bar"}}})), /*child_adjustment_info=*/nullptr, result_spec, *document_store_); @@ -520,6 +526,7 @@ TEST_F(ResultRetrieverV2SnippetTest, ShouldSnippetSomeResults) { std::make_unique<ResultAdjustmentInfo>( CreateSearchSpec(TermMatchType::EXACT_ONLY), CreateScoringSpec(/*is_descending_order=*/false), result_spec, + schema_store_.get(), SectionRestrictQueryTermsMap({{"", {"foo", "bar"}}})), /*child_adjustment_info=*/nullptr, result_spec, *document_store_); { @@ -575,6 +582,7 @@ TEST_F(ResultRetrieverV2SnippetTest, ShouldNotSnippetAnyResults) { std::make_unique<ResultAdjustmentInfo>( CreateSearchSpec(TermMatchType::EXACT_ONLY), CreateScoringSpec(/*is_descending_order=*/false), result_spec, + schema_store_.get(), SectionRestrictQueryTermsMap({{"", {"foo", "bar"}}})), /*child_adjustment_info=*/nullptr, result_spec, *document_store_); { @@ -632,6 +640,7 @@ TEST_F(ResultRetrieverV2SnippetTest, std::make_unique<ResultAdjustmentInfo>( CreateSearchSpec(TermMatchType::EXACT_ONLY), CreateScoringSpec(/*is_descending_order=*/false), result_spec, + schema_store_.get(), SectionRestrictQueryTermsMap({{"", {"foo", "bar"}}})), /*child_adjustment_info=*/nullptr, result_spec, *document_store_); @@ -718,6 +727,8 @@ TEST_F(ResultRetrieverV2SnippetTest, JoinSnippeted) { // Create parent ResultSpec with custom snippet spec. ResultSpecProto parent_result_spec = CreateResultSpec(/*num_per_page=*/3); + parent_result_spec.set_max_joined_children_per_parent_to_return( + std::numeric_limits<int32_t>::max()); *parent_result_spec.mutable_snippet_spec() = CreateSnippetSpec(); // Create child ResultSpec with custom snippet spec. @@ -735,11 +746,13 @@ TEST_F(ResultRetrieverV2SnippetTest, JoinSnippeted) { std::make_unique<ResultAdjustmentInfo>( CreateSearchSpec(TermMatchType::EXACT_ONLY), CreateScoringSpec(/*is_descending_order=*/false), parent_result_spec, + schema_store_.get(), SectionRestrictQueryTermsMap({{"", {"person"}}})), /*child_adjustment_info=*/ std::make_unique<ResultAdjustmentInfo>( CreateSearchSpec(TermMatchType::EXACT_ONLY), CreateScoringSpec(/*is_descending_order=*/false), child_result_spec, + schema_store_.get(), SectionRestrictQueryTermsMap({{"", {"foo", "bar"}}})), parent_result_spec, *document_store_); @@ -939,6 +952,8 @@ TEST_F(ResultRetrieverV2SnippetTest, ShouldSnippetAllJoinedResults) { ResultSpecProto::SnippetSpecProto parent_snippet_spec = CreateSnippetSpec(); parent_snippet_spec.set_num_to_snippet(1); ResultSpecProto parent_result_spec = CreateResultSpec(/*num_per_page=*/3); + parent_result_spec.set_max_joined_children_per_parent_to_return( + std::numeric_limits<int32_t>::max()); *parent_result_spec.mutable_snippet_spec() = std::move(parent_snippet_spec); // Create child ResultSpec with custom snippet spec. @@ -957,11 +972,13 @@ TEST_F(ResultRetrieverV2SnippetTest, ShouldSnippetAllJoinedResults) { std::make_unique<ResultAdjustmentInfo>( CreateSearchSpec(TermMatchType::EXACT_ONLY), CreateScoringSpec(/*is_descending_order=*/false), parent_result_spec, + schema_store_.get(), SectionRestrictQueryTermsMap({{"", {"person"}}})), /*child_adjustment_info=*/ std::make_unique<ResultAdjustmentInfo>( CreateSearchSpec(TermMatchType::EXACT_ONLY), CreateScoringSpec(/*is_descending_order=*/false), child_result_spec, + schema_store_.get(), SectionRestrictQueryTermsMap({{"", {"foo", "bar"}}})), parent_result_spec, *document_store_); @@ -1051,6 +1068,8 @@ TEST_F(ResultRetrieverV2SnippetTest, ShouldSnippetSomeJoinedResults) { ResultSpecProto::SnippetSpecProto parent_snippet_spec = CreateSnippetSpec(); parent_snippet_spec.set_num_to_snippet(3); ResultSpecProto parent_result_spec = CreateResultSpec(/*num_per_page=*/3); + parent_result_spec.set_max_joined_children_per_parent_to_return( + std::numeric_limits<int32_t>::max()); *parent_result_spec.mutable_snippet_spec() = std::move(parent_snippet_spec); // Create child ResultSpec with custom snippet spec. @@ -1069,11 +1088,13 @@ TEST_F(ResultRetrieverV2SnippetTest, ShouldSnippetSomeJoinedResults) { std::make_unique<ResultAdjustmentInfo>( CreateSearchSpec(TermMatchType::EXACT_ONLY), CreateScoringSpec(/*is_descending_order=*/false), parent_result_spec, + schema_store_.get(), SectionRestrictQueryTermsMap({{"", {"person"}}})), /*child_adjustment_info=*/ std::make_unique<ResultAdjustmentInfo>( CreateSearchSpec(TermMatchType::EXACT_ONLY), CreateScoringSpec(/*is_descending_order=*/false), child_result_spec, + schema_store_.get(), SectionRestrictQueryTermsMap({{"", {"foo", "bar"}}})), parent_result_spec, *document_store_); diff --git a/icing/result/result-retriever-v2_test.cc b/icing/result/result-retriever-v2_test.cc index 462d535..411562b 100644 --- a/icing/result/result-retriever-v2_test.cc +++ b/icing/result/result-retriever-v2_test.cc @@ -15,17 +15,27 @@ #include "icing/result/result-retriever-v2.h" #include <atomic> +#include <cstddef> +#include <cstdint> #include <memory> +#include <string> #include <unordered_map> +#include <utility> #include <vector> +#include "icing/text_classifier/lib3/utils/base/status.h" +#include "icing/text_classifier/lib3/utils/base/statusor.h" #include "gmock/gmock.h" #include "gtest/gtest.h" +#include "icing/absl_ports/mutex.h" #include "icing/document-builder.h" +#include "icing/file/filesystem.h" #include "icing/file/mock-filesystem.h" +#include "icing/file/portable-file-backed-proto-log.h" #include "icing/portable/equals-proto.h" #include "icing/portable/platform.h" #include "icing/proto/document.pb.h" +#include "icing/proto/document_wrapper.pb.h" #include "icing/proto/schema.pb.h" #include "icing/proto/search.pb.h" #include "icing/result/page-result.h" @@ -35,15 +45,19 @@ #include "icing/schema/section.h" #include "icing/scoring/priority-queue-scored-document-hits-ranker.h" #include "icing/scoring/scored-document-hit.h" +#include "icing/store/document-filter-data.h" #include "icing/store/document-id.h" +#include "icing/store/document-store.h" #include "icing/testing/common-matchers.h" #include "icing/testing/fake-clock.h" #include "icing/testing/icu-data-file-helper.h" #include "icing/testing/test-data.h" #include "icing/testing/tmp-directory.h" #include "icing/tokenization/language-segmenter-factory.h" +#include "icing/tokenization/language-segmenter.h" #include "icing/transform/normalizer-factory.h" #include "icing/transform/normalizer.h" +#include "icing/util/clock.h" #include "unicode/uloc.h" namespace icing { @@ -134,7 +148,10 @@ class ResultRetrieverV2Test : public ::testing::Test { TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_OPTIONAL))) .Build(); - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); num_total_hits_ = 0; } @@ -394,6 +411,159 @@ TEST_F(ResultRetrieverV2Test, ShouldIgnoreNonInternalErrors) { ElementsAre(EqualsProto(result1), EqualsProto(result2))); } +TEST_F(ResultRetrieverV2Test, + ShouldLimitNumChildDocumentsByMaxJoinedChildPerParent) { + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::CreateResult create_result, + CreateDocumentStore(&filesystem_, test_dir_, &fake_clock_, + schema_store_.get())); + std::unique_ptr<DocumentStore> doc_store = + std::move(create_result.document_store); + + // 1. Add 2 Person document + DocumentProto person_document1 = + DocumentBuilder() + .SetKey("namespace", "Person/1") + .SetCreationTimestampMs(1000) + .SetSchema("Person") + .AddStringProperty("name", "Joe Fox") + .AddStringProperty("emailAddress", "ny152@aol.com") + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId person_document_id1, + doc_store->Put(person_document1)); + + DocumentProto person_document2 = + DocumentBuilder() + .SetKey("namespace", "Person/2") + .SetCreationTimestampMs(1000) + .SetSchema("Person") + .AddStringProperty("name", "Meg Ryan") + .AddStringProperty("emailAddress", "shopgirl@aol.com") + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId person_document_id2, + doc_store->Put(person_document2)); + + // 2. Add 4 Email documents + DocumentProto email_document1 = DocumentBuilder() + .SetKey("namespace", "Email/1") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddStringProperty("name", "Test 1") + .AddStringProperty("body", "Test 1") + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id1, + doc_store->Put(email_document1)); + + DocumentProto email_document2 = DocumentBuilder() + .SetKey("namespace", "Email/2") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddStringProperty("name", "Test 2") + .AddStringProperty("body", "Test 2") + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id2, + doc_store->Put(email_document2)); + + DocumentProto email_document3 = DocumentBuilder() + .SetKey("namespace", "Email/3") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddStringProperty("name", "Test 3") + .AddStringProperty("body", "Test 3") + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id3, + doc_store->Put(email_document3)); + + DocumentProto email_document4 = DocumentBuilder() + .SetKey("namespace", "Email/4") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddStringProperty("name", "Test 4") + .AddStringProperty("body", "Test 4") + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id4, + doc_store->Put(email_document4)); + + // 3. Setup the joined scored results. + std::vector<SectionId> person_hit_section_ids = { + GetSectionId("Person", "name")}; + std::vector<SectionId> email_hit_section_ids = { + GetSectionId("Email", "name"), GetSectionId("Email", "body")}; + SectionIdMask person_hit_section_id_mask = + CreateSectionIdMask(person_hit_section_ids); + SectionIdMask email_hit_section_id_mask = + CreateSectionIdMask(email_hit_section_ids); + + ScoredDocumentHit person1_scored_doc_hit( + person_document_id1, person_hit_section_id_mask, /*score=*/1); + ScoredDocumentHit person2_scored_doc_hit( + person_document_id2, person_hit_section_id_mask, /*score=*/2); + ScoredDocumentHit email1_scored_doc_hit( + email_document_id1, email_hit_section_id_mask, /*score=*/3); + ScoredDocumentHit email2_scored_doc_hit( + email_document_id2, email_hit_section_id_mask, /*score=*/4); + ScoredDocumentHit email3_scored_doc_hit( + email_document_id3, email_hit_section_id_mask, /*score=*/5); + ScoredDocumentHit email4_scored_doc_hit( + email_document_id4, email_hit_section_id_mask, /*score=*/6); + // Create JoinedScoredDocumentHits mapping: + // - Person1 to Email1 + // - Person2 to Email2, Email3, Email4 + std::vector<JoinedScoredDocumentHit> joined_scored_document_hits = { + JoinedScoredDocumentHit( + /*final_score=*/1, + /*parent_scored_document_hit=*/person1_scored_doc_hit, + /*child_scored_document_hits=*/{email1_scored_doc_hit}), + JoinedScoredDocumentHit( + /*final_score=*/3, + /*parent_scored_document_hit=*/person2_scored_doc_hit, + /*child_scored_document_hits=*/ + {email4_scored_doc_hit, email3_scored_doc_hit, + email2_scored_doc_hit})}; + + // 4. Retrieve result with max_joined_children_per_parent_to_return = 2. + ResultSpecProto result_spec = + CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE); + result_spec.set_max_joined_children_per_parent_to_return(2); + + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<ResultRetrieverV2> result_retriever, + ResultRetrieverV2::Create(doc_store.get(), schema_store_.get(), + language_segmenter_.get(), normalizer_.get())); + ResultStateV2 result_state( + std::make_unique< + PriorityQueueScoredDocumentHitsRanker<JoinedScoredDocumentHit>>( + std::move(joined_scored_document_hits), /*is_descending=*/true), + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, + result_spec, *doc_store); + + // Result1: person2 with child docs = [email4, email3] + SearchResultProto::ResultProto result1; + *result1.mutable_document() = person_document2; + result1.set_score(3); + SearchResultProto::ResultProto* child1 = result1.add_joined_results(); + *child1->mutable_document() = email_document4; + child1->set_score(6); + SearchResultProto::ResultProto* child2 = result1.add_joined_results(); + *child2->mutable_document() = email_document3; + child2->set_score(5); + + // Result2: person1 with child docs = [email1] + SearchResultProto::ResultProto result2; + *result2.mutable_document() = person_document1; + result2.set_score(1); + SearchResultProto::ResultProto* child3 = result2.add_joined_results(); + *child3->mutable_document() = email_document1; + child3->set_score(3); + + auto [page_result, has_more_results] = + result_retriever->RetrieveNextPage(result_state); + EXPECT_THAT(page_result.results, + ElementsAre(EqualsProto(result1), EqualsProto(result2))); + // No more results. + EXPECT_FALSE(has_more_results); +} + TEST_F(ResultRetrieverV2Test, ShouldIgnoreInternalErrors) { MockFilesystem mock_filesystem; EXPECT_CALL(mock_filesystem, diff --git a/icing/result/result-state-manager_test.cc b/icing/result/result-state-manager_test.cc index 44bfe2d..ce4589b 100644 --- a/icing/result/result-state-manager_test.cc +++ b/icing/result/result-state-manager_test.cc @@ -98,7 +98,9 @@ class ResultStateManagerTest : public testing::Test { SchemaStore::Create(&filesystem_, test_dir_, clock_.get())); SchemaProto schema; schema.add_types()->set_schema_type("Document"); - ICING_ASSERT_OK(schema_store_->SetSchema(std::move(schema))); + ICING_ASSERT_OK(schema_store_->SetSchema( + std::move(schema), /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); ICING_ASSERT_OK_AND_ASSIGN(normalizer_, normalizer_factory::Create( /*max_term_byte_size=*/10000)); @@ -162,6 +164,9 @@ class ResultStateManagerTest : public testing::Test { DocumentStore& document_store() { return *document_store_; } const DocumentStore& document_store() const { return *document_store_; } + SchemaStore& schema_store() { return *schema_store_; } + const SchemaStore& schema_store() const { return *schema_store_; } + const ResultRetrieverV2& result_retriever() const { return *result_retriever_; } @@ -436,7 +441,8 @@ TEST_F(ResultStateManagerTest, std::move(scored_document_hits1), /*is_descending=*/true), /*parent_adjustment_info=*/ std::make_unique<ResultAdjustmentInfo>(search_spec, scoring_spec, - result_spec, query_terms), + result_spec, &schema_store(), + query_terms), /*child_adjustment_info=*/nullptr, result_spec, document_store(), result_retriever())); ASSERT_THAT(page_result_info1.first, Not(Eq(kInvalidNextPageToken))); @@ -451,7 +457,8 @@ TEST_F(ResultStateManagerTest, std::move(scored_document_hits2), /*is_descending=*/true), /*parent_adjustment_info=*/ std::make_unique<ResultAdjustmentInfo>(search_spec, scoring_spec, - result_spec, query_terms), + result_spec, &schema_store(), + query_terms), /*child_adjustment_info=*/nullptr, result_spec, document_store(), result_retriever())); diff --git a/icing/result/result-state-manager_thread-safety_test.cc b/icing/result/result-state-manager_thread-safety_test.cc index 670578f..06eaaf4 100644 --- a/icing/result/result-state-manager_thread-safety_test.cc +++ b/icing/result/result-state-manager_thread-safety_test.cc @@ -91,7 +91,9 @@ class ResultStateManagerThreadSafetyTest : public testing::Test { SchemaStore::Create(&filesystem_, test_dir_, clock_.get())); SchemaProto schema; schema.add_types()->set_schema_type("Document"); - ICING_ASSERT_OK(schema_store_->SetSchema(std::move(schema))); + ICING_ASSERT_OK(schema_store_->SetSchema( + std::move(schema), /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); ICING_ASSERT_OK_AND_ASSIGN(normalizer_, normalizer_factory::Create( /*max_term_byte_size=*/10000)); diff --git a/icing/result/result-state-v2.cc b/icing/result/result-state-v2.cc index 9459910..3aa9359 100644 --- a/icing/result/result-state-v2.cc +++ b/icing/result/result-state-v2.cc @@ -40,6 +40,8 @@ ResultStateV2::ResultStateV2( num_per_page_(result_spec.num_per_page()), num_total_bytes_per_page_threshold_( result_spec.num_total_bytes_per_page_threshold()), + max_joined_children_per_parent_to_return_( + result_spec.max_joined_children_per_parent_to_return()), num_total_hits_(nullptr), result_group_type_(result_spec.result_group_type()) { for (const ResultSpecProto::ResultGrouping& result_grouping : diff --git a/icing/result/result-state-v2.h b/icing/result/result-state-v2.h index b01bee9..919710e 100644 --- a/icing/result/result-state-v2.h +++ b/icing/result/result-state-v2.h @@ -22,6 +22,7 @@ #include <vector> #include "icing/absl_ports/mutex.h" +#include "icing/absl_ports/thread_annotations.h" #include "icing/proto/search.pb.h" #include "icing/result/result-adjustment-info.h" #include "icing/scoring/scored-document-hits-ranker.h" @@ -86,7 +87,7 @@ class ResultStateV2 { return entry_id_group_id_map_; } - int num_per_page() const ICING_SHARED_LOCKS_REQUIRED(mutex) { + int32_t num_per_page() const ICING_SHARED_LOCKS_REQUIRED(mutex) { return num_per_page_; } @@ -95,6 +96,11 @@ class ResultStateV2 { return num_total_bytes_per_page_threshold_; } + int32_t max_joined_children_per_parent_to_return() const + ICING_SHARED_LOCKS_REQUIRED(mutex) { + return max_joined_children_per_parent_to_return_; + } + ResultSpecProto::ResultGroupingType result_group_type() ICING_SHARED_LOCKS_REQUIRED(mutex) { return result_group_type_; @@ -139,7 +145,7 @@ class ResultStateV2 { ICING_GUARDED_BY(mutex); // Number of results to return in each page. - int num_per_page_ ICING_GUARDED_BY(mutex); + int32_t num_per_page_ ICING_GUARDED_BY(mutex); // The threshold of total bytes of all documents to cutoff, in order to limit // # of bytes in a single page. @@ -149,6 +155,10 @@ class ResultStateV2 { // threshold too much. int32_t num_total_bytes_per_page_threshold_ ICING_GUARDED_BY(mutex); + // Max # of joined child documents to be attached in the result for each + // parent document. + int32_t max_joined_children_per_parent_to_return_ ICING_GUARDED_BY(mutex); + // Pointer to a global counter to sum up the size of scored_document_hits in // all ResultStates. // Does not own. diff --git a/icing/result/result-state-v2_test.cc b/icing/result/result-state-v2_test.cc index 35b6401..ab29d6e 100644 --- a/icing/result/result-state-v2_test.cc +++ b/icing/result/result-state-v2_test.cc @@ -16,19 +16,26 @@ #include <atomic> #include <cstdint> +#include <limits> #include <memory> #include <string> +#include <utility> #include <vector> +#include "gmock/gmock.h" #include "gtest/gtest.h" #include "icing/absl_ports/mutex.h" #include "icing/file/filesystem.h" +#include "icing/file/portable-file-backed-proto-log.h" #include "icing/proto/document.pb.h" +#include "icing/proto/document_wrapper.pb.h" #include "icing/proto/schema.pb.h" #include "icing/proto/search.pb.h" #include "icing/schema/schema-store.h" +#include "icing/schema/section.h" #include "icing/scoring/priority-queue-scored-document-hits-ranker.h" #include "icing/scoring/scored-document-hit.h" +#include "icing/store/document-id.h" #include "icing/store/document-store.h" #include "icing/testing/common-matchers.h" #include "icing/testing/tmp-directory.h" @@ -61,7 +68,9 @@ class ResultStateV2Test : public ::testing::Test { SchemaStore::Create(&filesystem_, schema_store_base_dir_, &clock_)); SchemaProto schema; schema.add_types()->set_schema_type("Document"); - ICING_ASSERT_OK(schema_store_->SetSchema(std::move(schema))); + ICING_ASSERT_OK(schema_store_->SetSchema( + std::move(schema), /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); doc_store_base_dir_ = GetTestTempDir() + "/document_store"; filesystem_.CreateDirectoryRecursively(doc_store_base_dir_.c_str()); @@ -113,6 +122,7 @@ TEST_F(ResultStateV2Test, ShouldInitializeValuesAccordingToSpecs) { ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE); result_spec.set_num_total_bytes_per_page_threshold(4096); + result_spec.set_max_joined_children_per_parent_to_return(2048); // Adjustment info is not important in this test. ResultStateV2 result_state( @@ -128,6 +138,8 @@ TEST_F(ResultStateV2Test, ShouldInitializeValuesAccordingToSpecs) { EXPECT_THAT(result_state.num_per_page(), Eq(result_spec.num_per_page())); EXPECT_THAT(result_state.num_total_bytes_per_page_threshold(), Eq(result_spec.num_total_bytes_per_page_threshold())); + EXPECT_THAT(result_state.max_joined_children_per_parent_to_return(), + Eq(result_spec.max_joined_children_per_parent_to_return())); } TEST_F(ResultStateV2Test, ShouldInitializeValuesAccordingToDefaultSpecs) { @@ -152,6 +164,9 @@ TEST_F(ResultStateV2Test, ShouldInitializeValuesAccordingToDefaultSpecs) { Eq(default_result_spec.num_per_page())); EXPECT_THAT(result_state.num_total_bytes_per_page_threshold(), Eq(default_result_spec.num_total_bytes_per_page_threshold())); + EXPECT_THAT( + result_state.max_joined_children_per_parent_to_return(), + Eq(default_result_spec.max_joined_children_per_parent_to_return())); } TEST_F(ResultStateV2Test, diff --git a/icing/result/snippet-retriever.cc b/icing/result/snippet-retriever.cc index 2c4023c..fcaba4c 100644 --- a/icing/result/snippet-retriever.cc +++ b/icing/result/snippet-retriever.cc @@ -488,9 +488,7 @@ void GetEntriesFromProperty(const PropertyProto* current_property, current_property->string_values_size(), /*index=*/i, property_path)); std::string_view value = current_property->string_values(i); std::unique_ptr<Tokenizer::Iterator> iterator = - tokenizer - ->Tokenize(value, LanguageSegmenter::AccessType::kForwardIterator) - .ValueOrDie(); + tokenizer->Tokenize(value).ValueOrDie(); // All iterators are moved through positions sequentially. Constructing them // each time resets them to the beginning of the string. This means that, // for t tokens and in a string of n chars, each MoveToUtf8 call from the diff --git a/icing/result/snippet-retriever_benchmark.cc b/icing/result/snippet-retriever_benchmark.cc index 9af8efa..e574325 100644 --- a/icing/result/snippet-retriever_benchmark.cc +++ b/icing/result/snippet-retriever_benchmark.cc @@ -104,7 +104,9 @@ void BM_SnippetOneProperty(benchmark::State& state) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<SchemaStore> schema_store, SchemaStore::Create(&filesystem, schema_dir, &clock)); - ICING_ASSERT_OK(schema_store->SetSchema(schema)); + ICING_ASSERT_OK(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); auto snippet_retriever = SnippetRetriever::Create(schema_store.get(), language_segmenter.get(), @@ -231,7 +233,9 @@ void BM_SnippetRfcOneProperty(benchmark::State& state) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<SchemaStore> schema_store, SchemaStore::Create(&filesystem, schema_dir, &clock)); - ICING_ASSERT_OK(schema_store->SetSchema(schema)); + ICING_ASSERT_OK(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); auto snippet_retriever = SnippetRetriever::Create(schema_store.get(), language_segmenter.get(), diff --git a/icing/result/snippet-retriever_test.cc b/icing/result/snippet-retriever_test.cc index 80d00d5..8d81b43 100644 --- a/icing/result/snippet-retriever_test.cc +++ b/icing/result/snippet-retriever_test.cc @@ -113,7 +113,9 @@ class SnippetRetrieverTest : public testing::Test { TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_OPTIONAL))) .Build(); - ICING_ASSERT_OK(schema_store_->SetSchema(schema)); + ICING_ASSERT_OK(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); ICING_ASSERT_OK_AND_ASSIGN(normalizer_, normalizer_factory::Create( /*max_term_byte_size=*/10000)); @@ -1021,7 +1023,8 @@ TEST_F(SnippetRetrieverTest, SnippetingTestOneLevel) { .SetCardinality(CARDINALITY_REPEATED))) .Build(); ICING_ASSERT_OK(schema_store_->SetSchema( - schema, /*ignore_errors_and_delete_documents=*/true)); + schema, /*ignore_errors_and_delete_documents=*/true, + /*allow_circular_schema_definitions=*/false)); ICING_ASSERT_OK_AND_ASSIGN( snippet_retriever_, SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(), @@ -1111,7 +1114,8 @@ TEST_F(SnippetRetrieverTest, SnippetingTestMultiLevel) { .SetCardinality(CARDINALITY_OPTIONAL))) .Build(); ICING_ASSERT_OK(schema_store_->SetSchema( - schema, /*ignore_errors_and_delete_documents=*/true)); + schema, /*ignore_errors_and_delete_documents=*/true, + /*allow_circular_schema_definitions=*/false)); ICING_ASSERT_OK_AND_ASSIGN( snippet_retriever_, SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(), @@ -1217,7 +1221,8 @@ TEST_F(SnippetRetrieverTest, SnippetingTestMultiLevelRepeated) { .SetCardinality(CARDINALITY_REPEATED))) .Build(); ICING_ASSERT_OK(schema_store_->SetSchema( - schema, /*ignore_errors_and_delete_documents=*/true)); + schema, /*ignore_errors_and_delete_documents=*/true, + /*allow_circular_schema_definitions=*/false)); ICING_ASSERT_OK_AND_ASSIGN( snippet_retriever_, SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(), @@ -1331,7 +1336,8 @@ TEST_F(SnippetRetrieverTest, SnippetingTestMultiLevelSingleValue) { .SetCardinality(CARDINALITY_REPEATED))) .Build(); ICING_ASSERT_OK(schema_store_->SetSchema( - schema, /*ignore_errors_and_delete_documents=*/true)); + schema, /*ignore_errors_and_delete_documents=*/true, + /*allow_circular_schema_definitions=*/false)); ICING_ASSERT_OK_AND_ASSIGN( snippet_retriever_, SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(), @@ -1604,7 +1610,8 @@ TEST_F(SnippetRetrieverTest, SnippettingVerbatimAscii) { .SetCardinality(CARDINALITY_REPEATED))) .Build(); ICING_ASSERT_OK(schema_store_->SetSchema( - schema, /*ignore_errors_and_delete_documents=*/true)); + schema, /*ignore_errors_and_delete_documents=*/true, + /*allow_circular_schema_definitions=*/false)); ICING_ASSERT_OK_AND_ASSIGN( snippet_retriever_, SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(), @@ -1657,7 +1664,8 @@ TEST_F(SnippetRetrieverTest, SnippettingVerbatimCJK) { .SetCardinality(CARDINALITY_REPEATED))) .Build(); ICING_ASSERT_OK(schema_store_->SetSchema( - schema, /*ignore_errors_and_delete_documents=*/true)); + schema, /*ignore_errors_and_delete_documents=*/true, + /*allow_circular_schema_definitions=*/false)); ICING_ASSERT_OK_AND_ASSIGN( snippet_retriever_, SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(), @@ -1715,7 +1723,8 @@ TEST_F(SnippetRetrieverTest, SnippettingRfc822Ascii) { .SetCardinality(CARDINALITY_REPEATED))) .Build(); ICING_ASSERT_OK(schema_store_->SetSchema( - schema, /*ignore_errors_and_delete_documents=*/true)); + schema, /*ignore_errors_and_delete_documents=*/true, + /*allow_circular_schema_definitions=*/false)); ICING_ASSERT_OK_AND_ASSIGN( snippet_retriever_, @@ -1790,7 +1799,8 @@ TEST_F(SnippetRetrieverTest, SnippettingRfc822CJK) { .SetCardinality(CARDINALITY_REPEATED))) .Build(); ICING_ASSERT_OK(schema_store_->SetSchema( - schema, /*ignore_errors_and_delete_documents=*/true)); + schema, /*ignore_errors_and_delete_documents=*/true, + /*allow_circular_schema_definitions=*/false)); ICING_ASSERT_OK_AND_ASSIGN( snippet_retriever_, @@ -1835,13 +1845,11 @@ TEST_F(SnippetRetrieverTest, SnippettingRfc822CJK) { TEST_F(SnippetRetrieverTest, SnippettingUrlAscii) { SchemaProto schema = SchemaBuilder() - .AddType(SchemaTypeConfigBuilder() - .SetType("urlType") - .AddProperty(PropertyConfigBuilder() - .SetName("url") - .SetDataTypeString(MATCH_PREFIX, - TOKENIZER_URL) - .SetCardinality(CARDINALITY_REPEATED))) + .AddType(SchemaTypeConfigBuilder().SetType("urlType").AddProperty( + PropertyConfigBuilder() + .SetName("url") + .SetDataTypeString(MATCH_PREFIX, TOKENIZER_URL) + .SetCardinality(CARDINALITY_REPEATED))) .Build(); ICING_ASSERT_OK(schema_store_->SetSchema( schema, /*ignore_errors_and_delete_documents=*/true)); diff --git a/icing/schema-builder.h b/icing/schema-builder.h index 1dceb62..9e384c5 100644 --- a/icing/schema-builder.h +++ b/icing/schema-builder.h @@ -158,8 +158,8 @@ class SchemaTypeConfigBuilder { return *this; } - SchemaTypeConfigBuilder& SetParentType(std::string_view parent_type) { - type_config_.set_parent_type(std::string(parent_type)); + SchemaTypeConfigBuilder& AddParentType(std::string_view parent_type) { + type_config_.add_parent_types(std::string(parent_type)); return *this; } diff --git a/icing/schema/backup-schema-producer.cc b/icing/schema/backup-schema-producer.cc new file mode 100644 index 0000000..d0a0554 --- /dev/null +++ b/icing/schema/backup-schema-producer.cc @@ -0,0 +1,164 @@ +// Copyright (C) 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "icing/schema/backup-schema-producer.h" + +#include <string_view> +#include <unordered_map> +#include <vector> + +#include "icing/proto/schema.pb.h" +#include "icing/proto/term.pb.h" +#include "icing/schema/property-util.h" +#include "icing/schema/section.h" +#include "icing/util/status-macros.h" + +namespace icing { +namespace lib { + +namespace { + +// Creates a map of property to indexed id count based on the list of indexed +// properties provided by metadata_list. +// For all non-document properties, the value will always be 1. +// For document properties, the value will be the number of nested properties +// that are indexed with that document type. +std::unordered_map<std::string_view, int> CreateIndexedIdCountMap( + const std::vector<SectionMetadata>* metadata_list) { + std::unordered_map<std::string_view, int> property_indexed_id_count_map; + for (const SectionMetadata& metadata : *metadata_list) { + std::string_view top_level_property; + size_t separator_pos = + metadata.path.find(property_util::kPropertyPathSeparator); + if (separator_pos == std::string::npos) { + top_level_property = metadata.path; + } else { + top_level_property = + std::string_view(metadata.path.c_str(), separator_pos); + } + int& count = property_indexed_id_count_map[top_level_property]; + ++count; + } + return property_indexed_id_count_map; +} + +// Returns the indices (within schema.types()) of all types that are rollback +// incompatible (old code cannot handle these types if they are unmodified). +// +// Currently, this means types that: +// 1. Use RFC822 tokenization for any properties +// 2. Use more than 16 indexed properties +libtextclassifier3::StatusOr<std::vector<int>> +GetRollbackIncompatibleTypeIndices(const SchemaProto& schema, + const SectionManager& type_manager) { + std::vector<int> invalid_type_indices; + for (int i = 0; i < schema.types_size(); ++i) { + const SchemaTypeConfigProto& type = schema.types(i); + bool rollback_incompatible = false; + for (const PropertyConfigProto& property : type.properties()) { + if (property.string_indexing_config().tokenizer_type() == + StringIndexingConfig::TokenizerType::RFC822) { + rollback_incompatible = true; + break; + } + } + if (rollback_incompatible) { + invalid_type_indices.push_back(i); + continue; + } + + ICING_ASSIGN_OR_RETURN(const std::vector<SectionMetadata>* metadata_list, + type_manager.GetMetadataList(type.schema_type())); + if (metadata_list->size() > kOldTotalNumSections) { + invalid_type_indices.push_back(i); + } + } + return invalid_type_indices; +} + +} // namespace + +/* static */ libtextclassifier3::StatusOr<BackupSchemaProducer> +BackupSchemaProducer::Create(const SchemaProto& schema, + const SectionManager& type_manager) { + ICING_ASSIGN_OR_RETURN( + std::vector<int> invalid_type_indices, + GetRollbackIncompatibleTypeIndices(schema, type_manager)); + if (invalid_type_indices.empty()) { + return BackupSchemaProducer(); + } + + SchemaProto backup_schema(schema); + std::unordered_map<std::string_view, int> type_indexed_property_count; + for (int i : invalid_type_indices) { + SchemaTypeConfigProto* type = backup_schema.mutable_types(i); + + // This should never cause an error - every type should have an entry in the + // type_manager. + ICING_ASSIGN_OR_RETURN(const std::vector<SectionMetadata>* metadata_list, + type_manager.GetMetadataList(type->schema_type())); + int num_indexed_sections = metadata_list->size(); + std::unordered_map<std::string_view, int> property_indexed_id_count_map; + if (num_indexed_sections > kOldTotalNumSections) { + property_indexed_id_count_map = CreateIndexedIdCountMap(metadata_list); + } + + // Step 1. Switch all properties with RFC tokenizer as unindexed. + for (PropertyConfigProto& property : *type->mutable_properties()) { + // If the property uses the RFC tokenizer, then we need to set it to NONE + // and set match type UNKNOWN. + if (property.string_indexing_config().tokenizer_type() == + StringIndexingConfig::TokenizerType::RFC822) { + property.clear_string_indexing_config(); + --num_indexed_sections; + property_indexed_id_count_map.erase(property.property_name()); + } + } + + // Step 2. If there are any types that exceed the old indexed property + // limit, then mark indexed properties as unindexed until we're back under + // the limit. + if (num_indexed_sections <= kOldTotalNumSections) { + continue; + } + + // We expect that the last properties were the ones added most recently and + // are the least crucial, so we do removal in reverse order. This is a bit + // arbitrary, but we don't really have sufficient information to make this + // judgment anyways. + for (auto itr = type->mutable_properties()->rbegin(); + itr != type->mutable_properties()->rend(); ++itr) { + auto indexed_count_itr = + property_indexed_id_count_map.find(itr->property_name()); + if (indexed_count_itr == property_indexed_id_count_map.end()) { + continue; + } + + // Mark this property as unindexed and subtract all indexed property ids + // consumed by this property. + PropertyConfigProto& property = *itr; + property.clear_document_indexing_config(); + property.clear_string_indexing_config(); + property.clear_integer_indexing_config(); + num_indexed_sections -= indexed_count_itr->second; + if (num_indexed_sections <= kOldTotalNumSections) { + break; + } + } + } + return BackupSchemaProducer(std::move(backup_schema)); +} + +} // namespace lib +} // namespace icing diff --git a/icing/schema/backup-schema-producer.h b/icing/schema/backup-schema-producer.h new file mode 100644 index 0000000..61dcde6 --- /dev/null +++ b/icing/schema/backup-schema-producer.h @@ -0,0 +1,55 @@ +// Copyright (C) 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ICING_SCHEMA_BACKUP_SCHEMA_PRODUCER_H_ +#define ICING_SCHEMA_BACKUP_SCHEMA_PRODUCER_H_ + +#include "icing/text_classifier/lib3/utils/base/statusor.h" +#include "icing/proto/schema.pb.h" +#include "icing/schema/section-manager.h" +#include "icing/schema/section.h" + +namespace icing { +namespace lib { + +class BackupSchemaProducer { + public: + // Creates a BackupSchemaProducer based off of schema. + // If schema doesn't require a backup schema (because it is fully + // rollback-proof) then no copies will be made and `is_backup_necessary` will + // return false. + // If schema *does* require a backup schema, then `is_backup_necessary` will + // return true and the backup schema can be retrieved by calling `Produce`. + // Returns: + // - On success, a BackupSchemaProducer + // - INTERNAL_ERROR if the schema is inconsistent with the type_manager. + static libtextclassifier3::StatusOr<BackupSchemaProducer> Create( + const SchemaProto& schema, const SectionManager& type_manager); + + SchemaProto Produce() && { return std::move(cached_schema_); } + + bool is_backup_necessary() const { return !cached_schema_.types().empty(); } + + private: + BackupSchemaProducer() = default; + explicit BackupSchemaProducer(SchemaProto&& schema) + : cached_schema_(std::move(schema)) {} + + SchemaProto cached_schema_; +}; + +} // namespace lib +} // namespace icing + +#endif // ICING_SCHEMA_BACKUP_SCHEMA_PRODUCER_H_ diff --git a/icing/schema/backup-schema-producer_test.cc b/icing/schema/backup-schema-producer_test.cc new file mode 100644 index 0000000..424fec0 --- /dev/null +++ b/icing/schema/backup-schema-producer_test.cc @@ -0,0 +1,630 @@ +// Copyright (C) 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "icing/schema/backup-schema-producer.h" + +#include <string> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "icing/file/filesystem.h" +#include "icing/proto/schema.pb.h" +#include "icing/schema-builder.h" +#include "icing/schema/schema-type-manager.h" +#include "icing/schema/schema-util.h" +#include "icing/store/document-filter-data.h" +#include "icing/store/dynamic-trie-key-mapper.h" +#include "icing/store/key-mapper.h" +#include "icing/testing/common-matchers.h" +#include "icing/testing/tmp-directory.h" + +namespace icing { +namespace lib { + +namespace { + +using ::testing::Eq; + +class BackupSchemaProducerTest : public ::testing::Test { + protected: + void SetUp() override { + test_dir_ = GetTestTempDir() + "/icing"; + schema_store_dir_ = test_dir_ + "/schema_store"; + filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str()); + } + + void TearDown() override { + ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(test_dir_.c_str())); + } + + Filesystem filesystem_; + std::string test_dir_; + std::string schema_store_dir_; +}; + +TEST_F(BackupSchemaProducerTest, EmptySchema) { + SchemaProto empty; + SchemaUtil::TypeConfigMap type_config_map; + SchemaUtil::BuildTypeConfigMap(empty, &type_config_map); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<DynamicTrieKeyMapper<SchemaTypeId>> type_id_mapper, + DynamicTrieKeyMapper<SchemaTypeId>::Create(filesystem_, schema_store_dir_, + /*maximum_size_bytes=*/10000)); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaTypeManager> schema_type_manager, + SchemaTypeManager::Create(type_config_map, type_id_mapper.get())); + + ICING_ASSERT_OK_AND_ASSIGN( + BackupSchemaProducer backup_producer, + BackupSchemaProducer::Create(empty, + schema_type_manager->section_manager())); + EXPECT_THAT(backup_producer.is_backup_necessary(), Eq(false)); +} + +TEST_F(BackupSchemaProducerTest, NoIndexedPropertySchema) { + SchemaProto schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder() + .SetType("TypeA") + .AddProperty(PropertyConfigBuilder() + .SetName("prop1") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataType(TYPE_STRING)) + .AddProperty(PropertyConfigBuilder() + .SetName("prop2") + .SetCardinality(CARDINALITY_REQUIRED) + .SetDataType(TYPE_INT64))) + .AddType(SchemaTypeConfigBuilder() + .SetType("TypeB") + .AddProperty( + PropertyConfigBuilder() + .SetName("prop3") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument( + "TypeA", /*index_nested_properties=*/false)) + .AddProperty(PropertyConfigBuilder() + .SetName("prop4") + .SetCardinality(CARDINALITY_REPEATED) + .SetDataType(TYPE_STRING))) + .Build(); + + SchemaUtil::TypeConfigMap type_config_map; + SchemaUtil::BuildTypeConfigMap(schema, &type_config_map); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<DynamicTrieKeyMapper<SchemaTypeId>> type_id_mapper, + DynamicTrieKeyMapper<SchemaTypeId>::Create(filesystem_, schema_store_dir_, + /*maximum_size_bytes=*/10000)); + ASSERT_THAT(type_id_mapper->Put("TypeA", 0), IsOk()); + ASSERT_THAT(type_id_mapper->Put("TypeB", 1), IsOk()); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaTypeManager> schema_type_manager, + SchemaTypeManager::Create(type_config_map, type_id_mapper.get())); + + ICING_ASSERT_OK_AND_ASSIGN( + BackupSchemaProducer backup_producer, + BackupSchemaProducer::Create(schema, + schema_type_manager->section_manager())); + EXPECT_THAT(backup_producer.is_backup_necessary(), Eq(false)); +} + +TEST_F(BackupSchemaProducerTest, RollbackCompatibleSchema) { + SchemaProto schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder() + .SetType("TypeA") + .AddProperty(PropertyConfigBuilder() + .SetName("prop1") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN)) + .AddProperty(PropertyConfigBuilder() + .SetName("prop2") + .SetCardinality(CARDINALITY_REQUIRED) + .SetDataTypeInt64(NUMERIC_MATCH_RANGE))) + .AddType(SchemaTypeConfigBuilder() + .SetType("TypeB") + .AddProperty( + PropertyConfigBuilder() + .SetName("prop3") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument( + "TypeA", /*index_nested_properties=*/true)) + .AddProperty(PropertyConfigBuilder() + .SetName("prop4") + .SetCardinality(CARDINALITY_REPEATED) + .SetDataTypeString(TERM_MATCH_EXACT, + TOKENIZER_VERBATIM))) + .Build(); + + SchemaUtil::TypeConfigMap type_config_map; + SchemaUtil::BuildTypeConfigMap(schema, &type_config_map); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<DynamicTrieKeyMapper<SchemaTypeId>> type_id_mapper, + DynamicTrieKeyMapper<SchemaTypeId>::Create(filesystem_, schema_store_dir_, + /*maximum_size_bytes=*/10000)); + ASSERT_THAT(type_id_mapper->Put("TypeA", 0), IsOk()); + ASSERT_THAT(type_id_mapper->Put("TypeB", 1), IsOk()); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaTypeManager> schema_type_manager, + SchemaTypeManager::Create(type_config_map, type_id_mapper.get())); + + ICING_ASSERT_OK_AND_ASSIGN( + BackupSchemaProducer backup_producer, + BackupSchemaProducer::Create(schema, + schema_type_manager->section_manager())); + EXPECT_THAT(backup_producer.is_backup_necessary(), Eq(false)); +} + +TEST_F(BackupSchemaProducerTest, RemoveRfc822) { + SchemaProto schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder().SetType("TypeA").AddProperty( + PropertyConfigBuilder() + .SetName("prop1") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822))) + .Build(); + + SchemaUtil::TypeConfigMap type_config_map; + SchemaUtil::BuildTypeConfigMap(schema, &type_config_map); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<DynamicTrieKeyMapper<SchemaTypeId>> type_id_mapper, + DynamicTrieKeyMapper<SchemaTypeId>::Create(filesystem_, schema_store_dir_, + /*maximum_size_bytes=*/10000)); + ASSERT_THAT(type_id_mapper->Put("TypeA", 0), IsOk()); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaTypeManager> schema_type_manager, + SchemaTypeManager::Create(type_config_map, type_id_mapper.get())); + + ICING_ASSERT_OK_AND_ASSIGN( + BackupSchemaProducer backup_producer, + BackupSchemaProducer::Create(schema, + schema_type_manager->section_manager())); + EXPECT_THAT(backup_producer.is_backup_necessary(), Eq(true)); + SchemaProto backup = std::move(backup_producer).Produce(); + + SchemaProto expected_backup = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder().SetType("TypeA").AddProperty( + PropertyConfigBuilder() + .SetName("prop1") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataType(TYPE_STRING))) + .Build(); + EXPECT_THAT(backup, testing::EqualsProto(expected_backup)); +} + +TEST_F(BackupSchemaProducerTest, MakeExtraStringIndexedPropertiesUnindexed) { + PropertyConfigBuilder indexed_string_property_builder = + PropertyConfigBuilder() + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN); + SchemaTypeConfigProto type = + SchemaTypeConfigBuilder() + .SetType("TypeA") + .AddProperty(indexed_string_property_builder.SetName("prop0")) + .AddProperty(indexed_string_property_builder.SetName("prop1")) + .AddProperty(indexed_string_property_builder.SetName("prop2")) + .AddProperty(indexed_string_property_builder.SetName("prop3")) + .AddProperty(indexed_string_property_builder.SetName("prop4")) + .AddProperty(indexed_string_property_builder.SetName("prop5")) + .AddProperty(indexed_string_property_builder.SetName("prop6")) + .AddProperty(indexed_string_property_builder.SetName("prop7")) + .AddProperty(indexed_string_property_builder.SetName("prop8")) + .AddProperty(indexed_string_property_builder.SetName("prop9")) + .AddProperty(indexed_string_property_builder.SetName("prop10")) + .AddProperty(indexed_string_property_builder.SetName("prop11")) + .AddProperty(indexed_string_property_builder.SetName("prop12")) + .AddProperty(indexed_string_property_builder.SetName("prop13")) + .AddProperty(indexed_string_property_builder.SetName("prop14")) + .AddProperty(indexed_string_property_builder.SetName("prop15")) + .AddProperty(indexed_string_property_builder.SetName("prop16")) + .AddProperty(indexed_string_property_builder.SetName("prop17")) + .AddProperty(indexed_string_property_builder.SetName("prop18")) + .AddProperty(indexed_string_property_builder.SetName("prop19")) + .Build(); + SchemaProto schema = SchemaBuilder().AddType(type).Build(); + + SchemaUtil::TypeConfigMap type_config_map; + SchemaUtil::BuildTypeConfigMap(schema, &type_config_map); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<DynamicTrieKeyMapper<SchemaTypeId>> type_id_mapper, + DynamicTrieKeyMapper<SchemaTypeId>::Create(filesystem_, schema_store_dir_, + /*maximum_size_bytes=*/10000)); + ASSERT_THAT(type_id_mapper->Put("TypeA", 0), IsOk()); + + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaTypeManager> schema_type_manager, + SchemaTypeManager::Create(type_config_map, type_id_mapper.get())); + + ICING_ASSERT_OK_AND_ASSIGN( + BackupSchemaProducer backup_producer, + BackupSchemaProducer::Create(schema, + schema_type_manager->section_manager())); + EXPECT_THAT(backup_producer.is_backup_necessary(), Eq(true)); + SchemaProto backup = std::move(backup_producer).Produce(); + + PropertyConfigBuilder unindexed_string_property_builder = + PropertyConfigBuilder() + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataType(TYPE_STRING); + SchemaTypeConfigProto expected_type = + SchemaTypeConfigBuilder() + .SetType("TypeA") + .AddProperty(indexed_string_property_builder.SetName("prop0")) + .AddProperty(indexed_string_property_builder.SetName("prop1")) + .AddProperty(indexed_string_property_builder.SetName("prop2")) + .AddProperty(indexed_string_property_builder.SetName("prop3")) + .AddProperty(indexed_string_property_builder.SetName("prop4")) + .AddProperty(indexed_string_property_builder.SetName("prop5")) + .AddProperty(indexed_string_property_builder.SetName("prop6")) + .AddProperty(indexed_string_property_builder.SetName("prop7")) + .AddProperty(indexed_string_property_builder.SetName("prop8")) + .AddProperty(indexed_string_property_builder.SetName("prop9")) + .AddProperty(indexed_string_property_builder.SetName("prop10")) + .AddProperty(indexed_string_property_builder.SetName("prop11")) + .AddProperty(indexed_string_property_builder.SetName("prop12")) + .AddProperty(indexed_string_property_builder.SetName("prop13")) + .AddProperty(indexed_string_property_builder.SetName("prop14")) + .AddProperty(indexed_string_property_builder.SetName("prop15")) + .AddProperty(unindexed_string_property_builder.SetName("prop16")) + .AddProperty(unindexed_string_property_builder.SetName("prop17")) + .AddProperty(unindexed_string_property_builder.SetName("prop18")) + .AddProperty(unindexed_string_property_builder.SetName("prop19")) + .Build(); + SchemaProto expected_backup = SchemaBuilder().AddType(expected_type).Build(); + EXPECT_THAT(backup, testing::EqualsProto(expected_backup)); +} + +TEST_F(BackupSchemaProducerTest, MakeExtraIntIndexedPropertiesUnindexed) { + PropertyConfigBuilder indexed_int_property_builder = + PropertyConfigBuilder() + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeInt64(NUMERIC_MATCH_RANGE); + SchemaTypeConfigProto type = + SchemaTypeConfigBuilder() + .SetType("TypeA") + .AddProperty(indexed_int_property_builder.SetName("prop0")) + .AddProperty(indexed_int_property_builder.SetName("prop1")) + .AddProperty(indexed_int_property_builder.SetName("prop2")) + .AddProperty(indexed_int_property_builder.SetName("prop3")) + .AddProperty(indexed_int_property_builder.SetName("prop4")) + .AddProperty(indexed_int_property_builder.SetName("prop5")) + .AddProperty(indexed_int_property_builder.SetName("prop6")) + .AddProperty(indexed_int_property_builder.SetName("prop7")) + .AddProperty(indexed_int_property_builder.SetName("prop8")) + .AddProperty(indexed_int_property_builder.SetName("prop9")) + .AddProperty(indexed_int_property_builder.SetName("prop10")) + .AddProperty(indexed_int_property_builder.SetName("prop11")) + .AddProperty(indexed_int_property_builder.SetName("prop12")) + .AddProperty(indexed_int_property_builder.SetName("prop13")) + .AddProperty(indexed_int_property_builder.SetName("prop14")) + .AddProperty(indexed_int_property_builder.SetName("prop15")) + .AddProperty(indexed_int_property_builder.SetName("prop16")) + .AddProperty(indexed_int_property_builder.SetName("prop17")) + .AddProperty(indexed_int_property_builder.SetName("prop18")) + .AddProperty(indexed_int_property_builder.SetName("prop19")) + .Build(); + SchemaProto schema = SchemaBuilder().AddType(type).Build(); + + SchemaUtil::TypeConfigMap type_config_map; + SchemaUtil::BuildTypeConfigMap(schema, &type_config_map); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<DynamicTrieKeyMapper<SchemaTypeId>> type_id_mapper, + DynamicTrieKeyMapper<SchemaTypeId>::Create(filesystem_, schema_store_dir_, + /*maximum_size_bytes=*/10000)); + ASSERT_THAT(type_id_mapper->Put("TypeA", 0), IsOk()); + + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaTypeManager> schema_type_manager, + SchemaTypeManager::Create(type_config_map, type_id_mapper.get())); + + ICING_ASSERT_OK_AND_ASSIGN( + BackupSchemaProducer backup_producer, + BackupSchemaProducer::Create(schema, + schema_type_manager->section_manager())); + EXPECT_THAT(backup_producer.is_backup_necessary(), Eq(true)); + SchemaProto backup = std::move(backup_producer).Produce(); + + PropertyConfigBuilder unindexed_int_property_builder = + PropertyConfigBuilder() + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataType(TYPE_INT64); + SchemaTypeConfigProto expected_type = + SchemaTypeConfigBuilder() + .SetType("TypeA") + .AddProperty(indexed_int_property_builder.SetName("prop0")) + .AddProperty(indexed_int_property_builder.SetName("prop1")) + .AddProperty(indexed_int_property_builder.SetName("prop2")) + .AddProperty(indexed_int_property_builder.SetName("prop3")) + .AddProperty(indexed_int_property_builder.SetName("prop4")) + .AddProperty(indexed_int_property_builder.SetName("prop5")) + .AddProperty(indexed_int_property_builder.SetName("prop6")) + .AddProperty(indexed_int_property_builder.SetName("prop7")) + .AddProperty(indexed_int_property_builder.SetName("prop8")) + .AddProperty(indexed_int_property_builder.SetName("prop9")) + .AddProperty(indexed_int_property_builder.SetName("prop10")) + .AddProperty(indexed_int_property_builder.SetName("prop11")) + .AddProperty(indexed_int_property_builder.SetName("prop12")) + .AddProperty(indexed_int_property_builder.SetName("prop13")) + .AddProperty(indexed_int_property_builder.SetName("prop14")) + .AddProperty(indexed_int_property_builder.SetName("prop15")) + .AddProperty(unindexed_int_property_builder.SetName("prop16")) + .AddProperty(unindexed_int_property_builder.SetName("prop17")) + .AddProperty(unindexed_int_property_builder.SetName("prop18")) + .AddProperty(unindexed_int_property_builder.SetName("prop19")) + .Build(); + SchemaProto expected_backup = SchemaBuilder().AddType(expected_type).Build(); + EXPECT_THAT(backup, testing::EqualsProto(expected_backup)); +} + +TEST_F(BackupSchemaProducerTest, MakeExtraDocumentIndexedPropertiesUnindexed) { + PropertyConfigBuilder indexed_string_property_builder = + PropertyConfigBuilder() + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN); + SchemaTypeConfigProto typeB = + SchemaTypeConfigBuilder() + .SetType("TypeB") + .AddProperty(indexed_string_property_builder.SetName("prop0")) + .AddProperty(indexed_string_property_builder.SetName("prop1")) + .AddProperty(indexed_string_property_builder.SetName("prop2")) + .AddProperty(indexed_string_property_builder.SetName("prop3")) + .AddProperty(indexed_string_property_builder.SetName("prop4")) + .AddProperty(indexed_string_property_builder.SetName("prop5")) + .AddProperty(indexed_string_property_builder.SetName("prop6")) + .AddProperty(indexed_string_property_builder.SetName("prop7")) + .AddProperty(indexed_string_property_builder.SetName("prop8")) + .AddProperty(indexed_string_property_builder.SetName("prop9")) + .Build(); + + PropertyConfigBuilder indexed_document_property_builder = + PropertyConfigBuilder() + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("TypeB", /*index_nested_properties=*/true); + SchemaTypeConfigProto typeA = + SchemaTypeConfigBuilder() + .SetType("TypeA") + .AddProperty(indexed_document_property_builder.SetName("propA")) + .AddProperty(indexed_document_property_builder.SetName("propB")) + .Build(); + + SchemaProto schema = SchemaBuilder().AddType(typeA).AddType(typeB).Build(); + + SchemaUtil::TypeConfigMap type_config_map; + SchemaUtil::BuildTypeConfigMap(schema, &type_config_map); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<DynamicTrieKeyMapper<SchemaTypeId>> type_id_mapper, + DynamicTrieKeyMapper<SchemaTypeId>::Create(filesystem_, schema_store_dir_, + /*maximum_size_bytes=*/10000)); + ASSERT_THAT(type_id_mapper->Put("TypeA", 0), IsOk()); + ASSERT_THAT(type_id_mapper->Put("TypeB", 1), IsOk()); + + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaTypeManager> schema_type_manager, + SchemaTypeManager::Create(type_config_map, type_id_mapper.get())); + + ICING_ASSERT_OK_AND_ASSIGN( + BackupSchemaProducer backup_producer, + BackupSchemaProducer::Create(schema, + schema_type_manager->section_manager())); + EXPECT_THAT(backup_producer.is_backup_necessary(), Eq(true)); + SchemaProto backup = std::move(backup_producer).Produce(); + + PropertyConfigProto unindexed_document_property = + PropertyConfigBuilder() + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataType(TYPE_DOCUMENT) + .Build(); + unindexed_document_property.set_schema_type("TypeB"); + PropertyConfigBuilder unindexed_document_property_builder( + unindexed_document_property); + SchemaTypeConfigProto expected_typeA = + SchemaTypeConfigBuilder() + .SetType("TypeA") + .AddProperty(indexed_document_property_builder.SetName("propA")) + .AddProperty(unindexed_document_property_builder.SetName("propB")) + .Build(); + SchemaProto expected_backup = + SchemaBuilder().AddType(expected_typeA).AddType(typeB).Build(); + EXPECT_THAT(backup, testing::EqualsProto(expected_backup)); +} + +TEST_F(BackupSchemaProducerTest, MakeRfcPropertiesUnindexedFirst) { + PropertyConfigBuilder indexed_string_property_builder = + PropertyConfigBuilder() + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN); + SchemaTypeConfigProto typeA = + SchemaTypeConfigBuilder() + .SetType("TypeA") + .AddProperty(indexed_string_property_builder.SetName("prop0")) + .AddProperty(indexed_string_property_builder.SetName("prop1")) + .AddProperty(indexed_string_property_builder.SetName("prop2")) + .AddProperty(indexed_string_property_builder.SetName("prop3")) + .AddProperty(indexed_string_property_builder.SetName("prop4")) + .AddProperty( + PropertyConfigBuilder() + .SetName("propRfc") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822)) + .AddProperty(indexed_string_property_builder.SetName("prop6")) + .AddProperty(indexed_string_property_builder.SetName("prop7")) + .AddProperty(indexed_string_property_builder.SetName("prop8")) + .AddProperty(indexed_string_property_builder.SetName("prop9")) + .AddProperty(indexed_string_property_builder.SetName("prop10")) + .AddProperty(indexed_string_property_builder.SetName("prop11")) + .AddProperty(indexed_string_property_builder.SetName("prop12")) + .AddProperty(indexed_string_property_builder.SetName("prop13")) + .AddProperty(indexed_string_property_builder.SetName("prop14")) + .AddProperty(indexed_string_property_builder.SetName("prop15")) + .AddProperty(indexed_string_property_builder.SetName("prop16")) + .Build(); + + SchemaProto schema = SchemaBuilder().AddType(typeA).Build(); + + SchemaUtil::TypeConfigMap type_config_map; + SchemaUtil::BuildTypeConfigMap(schema, &type_config_map); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<DynamicTrieKeyMapper<SchemaTypeId>> type_id_mapper, + DynamicTrieKeyMapper<SchemaTypeId>::Create(filesystem_, schema_store_dir_, + /*maximum_size_bytes=*/10000)); + ASSERT_THAT(type_id_mapper->Put("TypeA", 0), IsOk()); + + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaTypeManager> schema_type_manager, + SchemaTypeManager::Create(type_config_map, type_id_mapper.get())); + + ICING_ASSERT_OK_AND_ASSIGN( + BackupSchemaProducer backup_producer, + BackupSchemaProducer::Create(schema, + schema_type_manager->section_manager())); + EXPECT_THAT(backup_producer.is_backup_necessary(), Eq(true)); + SchemaProto backup = std::move(backup_producer).Produce(); + + SchemaTypeConfigProto expected_typeA = + SchemaTypeConfigBuilder() + .SetType("TypeA") + .AddProperty(indexed_string_property_builder.SetName("prop0")) + .AddProperty(indexed_string_property_builder.SetName("prop1")) + .AddProperty(indexed_string_property_builder.SetName("prop2")) + .AddProperty(indexed_string_property_builder.SetName("prop3")) + .AddProperty(indexed_string_property_builder.SetName("prop4")) + .AddProperty(PropertyConfigBuilder() + .SetName("propRfc") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataType(TYPE_STRING)) + .AddProperty(indexed_string_property_builder.SetName("prop6")) + .AddProperty(indexed_string_property_builder.SetName("prop7")) + .AddProperty(indexed_string_property_builder.SetName("prop8")) + .AddProperty(indexed_string_property_builder.SetName("prop9")) + .AddProperty(indexed_string_property_builder.SetName("prop10")) + .AddProperty(indexed_string_property_builder.SetName("prop11")) + .AddProperty(indexed_string_property_builder.SetName("prop12")) + .AddProperty(indexed_string_property_builder.SetName("prop13")) + .AddProperty(indexed_string_property_builder.SetName("prop14")) + .AddProperty(indexed_string_property_builder.SetName("prop15")) + .AddProperty(indexed_string_property_builder.SetName("prop16")) + .Build(); + SchemaProto expected_backup = SchemaBuilder().AddType(expected_typeA).Build(); + EXPECT_THAT(backup, testing::EqualsProto(expected_backup)); +} + +TEST_F(BackupSchemaProducerTest, MakeExtraPropertiesUnindexedMultipleTypes) { + PropertyConfigBuilder indexed_string_property_builder = + PropertyConfigBuilder() + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN); + PropertyConfigBuilder indexed_int_property_builder = + PropertyConfigBuilder() + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeInt64(NUMERIC_MATCH_RANGE); + SchemaTypeConfigProto typeB = + SchemaTypeConfigBuilder() + .SetType("TypeB") + .AddProperty(indexed_string_property_builder.SetName("prop0")) + .AddProperty(indexed_int_property_builder.SetName("prop1")) + .AddProperty(indexed_string_property_builder.SetName("prop2")) + .AddProperty(indexed_int_property_builder.SetName("prop3")) + .AddProperty(indexed_string_property_builder.SetName("prop4")) + .AddProperty(indexed_int_property_builder.SetName("prop5")) + .AddProperty(indexed_string_property_builder.SetName("prop6")) + .AddProperty(indexed_int_property_builder.SetName("prop7")) + .AddProperty(indexed_string_property_builder.SetName("prop8")) + .AddProperty(indexed_int_property_builder.SetName("prop9")) + .Build(); + + PropertyConfigBuilder indexed_document_property_builder = + PropertyConfigBuilder() + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("TypeB", /*index_nested_properties=*/true); + SchemaTypeConfigProto typeA = + SchemaTypeConfigBuilder() + .SetType("TypeA") + .AddProperty(indexed_string_property_builder.SetName("propA")) + .AddProperty(indexed_int_property_builder.SetName("propB")) + .AddProperty(indexed_string_property_builder.SetName("propC")) + .AddProperty(indexed_int_property_builder.SetName("propD")) + .AddProperty(indexed_string_property_builder.SetName("propE")) + .AddProperty(indexed_int_property_builder.SetName("propF")) + .AddProperty(indexed_string_property_builder.SetName("propG")) + .AddProperty(indexed_int_property_builder.SetName("propH")) + .AddProperty(indexed_document_property_builder.SetName("propI")) + .AddProperty(indexed_string_property_builder.SetName("propJ")) + .AddProperty(indexed_int_property_builder.SetName("propK")) + .Build(); + + SchemaProto schema = SchemaBuilder().AddType(typeA).AddType(typeB).Build(); + + SchemaUtil::TypeConfigMap type_config_map; + SchemaUtil::BuildTypeConfigMap(schema, &type_config_map); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<DynamicTrieKeyMapper<SchemaTypeId>> type_id_mapper, + DynamicTrieKeyMapper<SchemaTypeId>::Create(filesystem_, schema_store_dir_, + /*maximum_size_bytes=*/10000)); + ASSERT_THAT(type_id_mapper->Put("TypeA", 0), IsOk()); + ASSERT_THAT(type_id_mapper->Put("TypeB", 1), IsOk()); + + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaTypeManager> schema_type_manager, + SchemaTypeManager::Create(type_config_map, type_id_mapper.get())); + + ICING_ASSERT_OK_AND_ASSIGN( + BackupSchemaProducer backup_producer, + BackupSchemaProducer::Create(schema, + schema_type_manager->section_manager())); + EXPECT_THAT(backup_producer.is_backup_necessary(), Eq(true)); + SchemaProto backup = std::move(backup_producer).Produce(); + + PropertyConfigBuilder unindexed_string_property_builder = + PropertyConfigBuilder() + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataType(TYPE_STRING); + PropertyConfigBuilder unindexed_int_property_builder = + PropertyConfigBuilder() + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataType(TYPE_INT64); + PropertyConfigProto unindexed_document_property = + PropertyConfigBuilder() + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataType(TYPE_DOCUMENT) + .Build(); + unindexed_document_property.set_schema_type("TypeB"); + PropertyConfigBuilder unindexed_document_property_builder( + unindexed_document_property); + + SchemaTypeConfigProto expected_typeA = + SchemaTypeConfigBuilder() + .SetType("TypeA") + .AddProperty(indexed_string_property_builder.SetName("propA")) + .AddProperty(indexed_int_property_builder.SetName("propB")) + .AddProperty(indexed_string_property_builder.SetName("propC")) + .AddProperty(indexed_int_property_builder.SetName("propD")) + .AddProperty(indexed_string_property_builder.SetName("propE")) + .AddProperty(indexed_int_property_builder.SetName("propF")) + .AddProperty(indexed_string_property_builder.SetName("propG")) + .AddProperty(indexed_int_property_builder.SetName("propH")) + .AddProperty(unindexed_document_property_builder.SetName("propI")) + .AddProperty(unindexed_string_property_builder.SetName("propJ")) + .AddProperty(unindexed_int_property_builder.SetName("propK")) + .Build(); + SchemaProto expected_backup = + SchemaBuilder().AddType(expected_typeA).AddType(typeB).Build(); + EXPECT_THAT(backup, testing::EqualsProto(expected_backup)); +} + +} // namespace + +} // namespace lib +} // namespace icing diff --git a/icing/schema/schema-property-iterator.cc b/icing/schema/schema-property-iterator.cc index 455b61b..e1078c2 100644 --- a/icing/schema/schema-property-iterator.cc +++ b/icing/schema/schema-property-iterator.cc @@ -58,9 +58,13 @@ libtextclassifier3::Status SchemaPropertyIterator::Advance() { if (parent_type_config_names_.count( nested_type_config_iter->second.schema_type()) > 0) { - // Cycle detected. Abort the iteration. - return absl_ports::InvalidArgumentError( - "Detect nested schema cycle dependency"); + // Cycle detected. The schema definition is guaranteed to be valid here + // since it must have already been validated during SchemaUtil::Validate, + // which would have rejected any schema with bad cycles. + // + // We do not need to iterate this type further so we simply move on to + // other properties in the parent type. + continue; } std::string curr_property_path = levels_.back().GetCurrentPropertyPath(); diff --git a/icing/schema/schema-property-iterator.h b/icing/schema/schema-property-iterator.h index 696dc72..f60a56e 100644 --- a/icing/schema/schema-property-iterator.h +++ b/icing/schema/schema-property-iterator.h @@ -33,6 +33,9 @@ namespace lib { // (non-document-type) properties will be returned, and for document type // properties, the iterator will traverse down to the next nested level of // schema. +// +// REQUIRED: The schema in which this SchemaTypeConfigProto is defined must have +// already passed the validation step during SetSchema. class SchemaPropertyIterator { public: explicit SchemaPropertyIterator( diff --git a/icing/schema/schema-property-iterator_test.cc b/icing/schema/schema-property-iterator_test.cc index e14eabb..080d574 100644 --- a/icing/schema/schema-property-iterator_test.cc +++ b/icing/schema/schema-property-iterator_test.cc @@ -15,13 +15,10 @@ #include "icing/schema/schema-property-iterator.h" #include <string> -#include <utility> -#include <vector> #include "icing/text_classifier/lib3/utils/base/status.h" #include "gmock/gmock.h" #include "gtest/gtest.h" -#include "icing/portable/equals-proto.h" #include "icing/proto/schema.pb.h" #include "icing/schema-builder.h" #include "icing/schema/schema-util.h" @@ -239,53 +236,6 @@ TEST(SchemaPropertyIteratorTest, StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE)); } -TEST(SchemaPropertyIteratorTest, - SchemaTypeConfigWithCycleDependencyShouldGetInvalidArgumentError) { - std::string schema_type_name1 = "SchemaOne"; - std::string schema_type_name2 = "SchemaTwo"; - - SchemaTypeConfigProto schema_type_config1 = - SchemaTypeConfigBuilder() - .SetType(schema_type_name1) - .AddProperty( - PropertyConfigBuilder().SetName("Foo").SetDataTypeDocument( - schema_type_name2, /*index_nested_properties=*/true)) - .Build(); - SchemaTypeConfigProto schema_type_config2 = - SchemaTypeConfigBuilder() - .SetType(schema_type_name2) - .AddProperty( - PropertyConfigBuilder().SetName("Bar").SetDataTypeDocument( - schema_type_name1, /*index_nested_properties=*/true)) - .Build(); - SchemaUtil::TypeConfigMap type_config_map = { - {schema_type_name1, schema_type_config1}, - {schema_type_name2, schema_type_config2}}; - - SchemaPropertyIterator iterator(schema_type_config1, type_config_map); - EXPECT_THAT(iterator.Advance(), - StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); -} - -TEST(SchemaPropertyIteratorTest, - SchemaTypeConfigWithSelfDependencyShouldGetInvalidArgumentError) { - std::string schema_type_name = "SchemaOne"; - - SchemaTypeConfigProto schema_type_config = - SchemaTypeConfigBuilder() - .SetType(schema_type_name) - .AddProperty( - PropertyConfigBuilder().SetName("Foo").SetDataTypeDocument( - schema_type_name, /*index_nested_properties=*/true)) - .Build(); - SchemaUtil::TypeConfigMap type_config_map = { - {schema_type_name, schema_type_config}}; - - SchemaPropertyIterator iterator(schema_type_config, type_config_map); - EXPECT_THAT(iterator.Advance(), - StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); -} - TEST(SchemaPropertyIteratorTest, NestedIndexable) { std::string schema_type_name1 = "SchemaOne"; std::string schema_type_name2 = "SchemaTwo"; @@ -464,6 +414,435 @@ TEST(SchemaPropertyIteratorTest, NestedIndexable) { StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE)); } +TEST(SchemaPropertyIteratorTest, SingleLevelCycle) { + std::string schema_a = "A"; + std::string schema_b = "B"; + + // Create schema with A -> B -> B -> B... + SchemaTypeConfigProto schema_type_config_a = + SchemaTypeConfigBuilder() + .SetType(schema_a) + .AddProperty(PropertyConfigBuilder() + .SetName("schemaAprop1") + .SetDataTypeDocument( + schema_b, /*index_nested_properties=*/true)) + .AddProperty( + PropertyConfigBuilder() + .SetName("schemaAprop2") + .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)) + .Build(); + SchemaTypeConfigProto schema_type_config_b = + SchemaTypeConfigBuilder() + .SetType(schema_b) + .AddProperty(PropertyConfigBuilder() + .SetName("schemaBprop1") + .SetDataTypeDocument( + schema_b, /*index_nested_properties=*/false)) + .AddProperty( + PropertyConfigBuilder() + .SetName("schemaBprop2") + .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)) + .Build(); + + SchemaUtil::TypeConfigMap type_config_map = { + {schema_a, schema_type_config_a}, {schema_b, schema_type_config_b}}; + + // Order of iteration for schema A: + // {"schemaAprop1.schemaBprop2", "schemaAprop2"}, both indexable + SchemaPropertyIterator schema_a_iterator(schema_type_config_a, + type_config_map); + + EXPECT_THAT(schema_a_iterator.Advance(), IsOk()); + EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(), + Eq("schemaAprop1.schemaBprop2")); + EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(), + EqualsProto(schema_type_config_b.properties(1))); + EXPECT_THAT(schema_a_iterator.GetCurrentNestedIndexable(), IsTrue()); + + EXPECT_THAT(schema_a_iterator.Advance(), IsOk()); + EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(), Eq("schemaAprop2")); + EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(), + EqualsProto(schema_type_config_a.properties(1))); + EXPECT_THAT(schema_a_iterator.GetCurrentNestedIndexable(), IsTrue()); + + EXPECT_THAT(schema_a_iterator.Advance(), + StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE)); + + // Order of iteration for schema B: + // {"schemaBprop2"}, indexable. + SchemaPropertyIterator schema_b_iterator(schema_type_config_b, + type_config_map); + + EXPECT_THAT(schema_b_iterator.Advance(), IsOk()); + EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(), Eq("schemaBprop2")); + EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(), + EqualsProto(schema_type_config_b.properties(1))); + EXPECT_THAT(schema_b_iterator.GetCurrentNestedIndexable(), IsTrue()); + + EXPECT_THAT(schema_b_iterator.Advance(), + StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE)); +} + +TEST(SchemaPropertyIteratorTest, MultipleLevelCycle) { + std::string schema_a = "A"; + std::string schema_b = "B"; + std::string schema_c = "C"; + + // Create schema with A -> B -> C -> A -> B -> C... + SchemaTypeConfigProto schema_type_config_a = + SchemaTypeConfigBuilder() + .SetType(schema_a) + .AddProperty(PropertyConfigBuilder() + .SetName("schemaAprop1") + .SetDataTypeDocument( + schema_b, /*index_nested_properties=*/true)) + .AddProperty( + PropertyConfigBuilder() + .SetName("schemaAprop2") + .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)) + .Build(); + SchemaTypeConfigProto schema_type_config_b = + SchemaTypeConfigBuilder() + .SetType(schema_b) + .AddProperty(PropertyConfigBuilder() + .SetName("schemaBprop1") + .SetDataTypeDocument( + schema_c, /*index_nested_properties=*/true)) + .AddProperty( + PropertyConfigBuilder() + .SetName("schemaBprop2") + .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)) + .Build(); + SchemaTypeConfigProto schema_type_config_c = + SchemaTypeConfigBuilder() + .SetType(schema_c) + .AddProperty(PropertyConfigBuilder() + .SetName("schemaCprop1") + .SetDataTypeDocument( + schema_a, /*index_nested_properties=*/false)) + .AddProperty( + PropertyConfigBuilder() + .SetName("schemaCprop2") + .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)) + .Build(); + + SchemaUtil::TypeConfigMap type_config_map = { + {schema_a, schema_type_config_a}, + {schema_b, schema_type_config_b}, + {schema_c, schema_type_config_c}}; + + // Order of iteration for schema A: + // {"schemaAprop1.schemaBprop1.schemaCprop2", "schemaAprop1.schemaBprop2", + // "schemaAprop2"}, all indexable + SchemaPropertyIterator schema_a_iterator(schema_type_config_a, + type_config_map); + + EXPECT_THAT(schema_a_iterator.Advance(), IsOk()); + EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(), + Eq("schemaAprop1.schemaBprop1.schemaCprop2")); + EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(), + EqualsProto(schema_type_config_c.properties(1))); + EXPECT_THAT(schema_a_iterator.GetCurrentNestedIndexable(), IsTrue()); + + EXPECT_THAT(schema_a_iterator.Advance(), IsOk()); + EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(), + Eq("schemaAprop1.schemaBprop2")); + EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(), + EqualsProto(schema_type_config_b.properties(1))); + EXPECT_THAT(schema_a_iterator.GetCurrentNestedIndexable(), IsTrue()); + + EXPECT_THAT(schema_a_iterator.Advance(), IsOk()); + EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(), Eq("schemaAprop2")); + EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(), + EqualsProto(schema_type_config_a.properties(1))); + EXPECT_THAT(schema_a_iterator.GetCurrentNestedIndexable(), IsTrue()); + + EXPECT_THAT(schema_a_iterator.Advance(), + StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE)); + + // Order of iteration for schema B: + // {"schemaBprop1.schemaCprop1.schemaAprop2", "schemaBprop1.schemaCprop2", + // "schemaBprop2"} + // + // Indexable properties: {"schemaBprop1.schemaCprop2", "schemaBprop2"} + SchemaPropertyIterator schema_b_iterator(schema_type_config_b, + type_config_map); + + EXPECT_THAT(schema_b_iterator.Advance(), IsOk()); + EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(), + Eq("schemaBprop1.schemaCprop1.schemaAprop2")); + EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(), + EqualsProto(schema_type_config_a.properties(1))); + EXPECT_THAT(schema_b_iterator.GetCurrentNestedIndexable(), IsFalse()); + + EXPECT_THAT(schema_b_iterator.Advance(), IsOk()); + EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(), + Eq("schemaBprop1.schemaCprop2")); + EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(), + EqualsProto(schema_type_config_c.properties(1))); + EXPECT_THAT(schema_b_iterator.GetCurrentNestedIndexable(), IsTrue()); + + EXPECT_THAT(schema_b_iterator.Advance(), IsOk()); + EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(), Eq("schemaBprop2")); + EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(), + EqualsProto(schema_type_config_b.properties(1))); + EXPECT_THAT(schema_b_iterator.GetCurrentNestedIndexable(), IsTrue()); + + EXPECT_THAT(schema_b_iterator.Advance(), + StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE)); + + // Order of iteration for schema C: + // {"schemaCprop1.schemaAprop1.schemaBprop2", "schemaCprop1.schemaAprop2", + // "schemaCprop2"} + // + // Indexable properties: {"schemaCprop2"} + SchemaPropertyIterator schema_c_iterator(schema_type_config_c, + type_config_map); + + EXPECT_THAT(schema_c_iterator.Advance(), IsOk()); + EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(), + Eq("schemaCprop1.schemaAprop1.schemaBprop2")); + EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(), + EqualsProto(schema_type_config_b.properties(1))); + EXPECT_THAT(schema_c_iterator.GetCurrentNestedIndexable(), IsFalse()); + + EXPECT_THAT(schema_c_iterator.Advance(), IsOk()); + EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(), + Eq("schemaCprop1.schemaAprop2")); + EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(), + EqualsProto(schema_type_config_a.properties(1))); + EXPECT_THAT(schema_c_iterator.GetCurrentNestedIndexable(), IsFalse()); + + EXPECT_THAT(schema_c_iterator.Advance(), IsOk()); + EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(), Eq("schemaCprop2")); + EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(), + EqualsProto(schema_type_config_c.properties(1))); + EXPECT_THAT(schema_c_iterator.GetCurrentNestedIndexable(), IsTrue()); + + EXPECT_THAT(schema_c_iterator.Advance(), + StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE)); +} + +TEST(SchemaPropertyIteratorTest, MultipleCycles) { + std::string schema_a = "A"; + std::string schema_b = "B"; + std::string schema_c = "C"; + std::string schema_d = "D"; + + // Create schema with D <-> A -> B -> C -> A -> B -> C -> A... + // Schema type A has two cycles: A-B-C-A and A-D-A + SchemaTypeConfigProto schema_type_config_a = + SchemaTypeConfigBuilder() + .SetType(schema_a) + .AddProperty(PropertyConfigBuilder() + .SetName("schemaAprop1") + .SetDataTypeDocument( + schema_b, /*index_nested_properties=*/true)) + .AddProperty( + PropertyConfigBuilder() + .SetName("schemaAprop2") + .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)) + .AddProperty(PropertyConfigBuilder() + .SetName("schemaAprop3") + .SetDataTypeDocument( + schema_d, /*index_nested_properties=*/true)) + .Build(); + SchemaTypeConfigProto schema_type_config_b = + SchemaTypeConfigBuilder() + .SetType(schema_b) + .AddProperty(PropertyConfigBuilder() + .SetName("schemaBprop1") + .SetDataTypeDocument( + schema_c, /*index_nested_properties=*/true)) + .AddProperty( + PropertyConfigBuilder() + .SetName("schemaBprop2") + .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)) + .Build(); + SchemaTypeConfigProto schema_type_config_c = + SchemaTypeConfigBuilder() + .SetType(schema_c) + .AddProperty(PropertyConfigBuilder() + .SetName("schemaCprop1") + .SetDataTypeDocument( + schema_a, /*index_nested_properties=*/false)) + .AddProperty( + PropertyConfigBuilder() + .SetName("schemaCprop2") + .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)) + .Build(); + SchemaTypeConfigProto schema_type_config_d = + SchemaTypeConfigBuilder() + .SetType(schema_d) + .AddProperty(PropertyConfigBuilder() + .SetName("schemaDprop1") + .SetDataTypeDocument( + schema_a, /*index_nested_properties=*/false)) + .AddProperty( + PropertyConfigBuilder() + .SetName("schemaDprop2") + .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)) + .Build(); + + SchemaUtil::TypeConfigMap type_config_map = { + {schema_a, schema_type_config_a}, + {schema_b, schema_type_config_b}, + {schema_c, schema_type_config_c}, + {schema_d, schema_type_config_d}}; + + // Order of iteration for schema A: + // {"schemaAprop1.schemaBprop1.schemaCprop2", "schemaAprop1.schemaBprop2", + // "schemaAprop2", "schemaAprop3.schemaDprop2"}, all indexable + SchemaPropertyIterator schema_a_iterator(schema_type_config_a, + type_config_map); + + EXPECT_THAT(schema_a_iterator.Advance(), IsOk()); + EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(), + Eq("schemaAprop1.schemaBprop1.schemaCprop2")); + EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(), + EqualsProto(schema_type_config_c.properties(1))); + EXPECT_THAT(schema_a_iterator.GetCurrentNestedIndexable(), IsTrue()); + + EXPECT_THAT(schema_a_iterator.Advance(), IsOk()); + EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(), + Eq("schemaAprop1.schemaBprop2")); + EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(), + EqualsProto(schema_type_config_b.properties(1))); + EXPECT_THAT(schema_a_iterator.GetCurrentNestedIndexable(), IsTrue()); + + EXPECT_THAT(schema_a_iterator.Advance(), IsOk()); + EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(), Eq("schemaAprop2")); + EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(), + EqualsProto(schema_type_config_a.properties(1))); + EXPECT_THAT(schema_a_iterator.GetCurrentNestedIndexable(), IsTrue()); + + EXPECT_THAT(schema_a_iterator.Advance(), IsOk()); + EXPECT_THAT(schema_a_iterator.GetCurrentPropertyPath(), + Eq("schemaAprop3.schemaDprop2")); + EXPECT_THAT(schema_a_iterator.GetCurrentPropertyConfig(), + EqualsProto(schema_type_config_d.properties(1))); + EXPECT_THAT(schema_a_iterator.GetCurrentNestedIndexable(), IsTrue()); + + EXPECT_THAT(schema_a_iterator.Advance(), + StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE)); + + // Order of iteration for schema B: + // {"schemaBprop1.schemaCprop1.schemaAprop2", + // "schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop2", + // "schemaBprop1.schemaCprop2", "schemaBprop2"} + // + // Indexable properties: {"schemaBprop1.schemaCprop2", "schemaBprop2"} + SchemaPropertyIterator schema_b_iterator(schema_type_config_b, + type_config_map); + + EXPECT_THAT(schema_b_iterator.Advance(), IsOk()); + EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(), + Eq("schemaBprop1.schemaCprop1.schemaAprop2")); + EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(), + EqualsProto(schema_type_config_a.properties(1))); + EXPECT_THAT(schema_b_iterator.GetCurrentNestedIndexable(), IsFalse()); + + EXPECT_THAT(schema_b_iterator.Advance(), IsOk()); + EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(), + Eq("schemaBprop1.schemaCprop1.schemaAprop3.schemaDprop2")); + EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(), + EqualsProto(schema_type_config_d.properties(1))); + EXPECT_THAT(schema_b_iterator.GetCurrentNestedIndexable(), IsFalse()); + + EXPECT_THAT(schema_b_iterator.Advance(), IsOk()); + EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(), + Eq("schemaBprop1.schemaCprop2")); + EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(), + EqualsProto(schema_type_config_c.properties(1))); + EXPECT_THAT(schema_b_iterator.GetCurrentNestedIndexable(), IsTrue()); + + EXPECT_THAT(schema_b_iterator.Advance(), IsOk()); + EXPECT_THAT(schema_b_iterator.GetCurrentPropertyPath(), Eq("schemaBprop2")); + EXPECT_THAT(schema_b_iterator.GetCurrentPropertyConfig(), + EqualsProto(schema_type_config_b.properties(1))); + EXPECT_THAT(schema_b_iterator.GetCurrentNestedIndexable(), IsTrue()); + + EXPECT_THAT(schema_b_iterator.Advance(), + StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE)); + + // Order of iteration for schema C: + // {"schemaCprop1.schemaAprop1.schemaBprop2", "schemaCprop1.schemaAprop2", + // "schemaCprop1.schemaAprop3.schemaDprop2", "schemaCprop2"} + // + // Indexable properties: {"schemaCprop2"} + SchemaPropertyIterator schema_c_iterator(schema_type_config_c, + type_config_map); + + EXPECT_THAT(schema_c_iterator.Advance(), IsOk()); + EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(), + Eq("schemaCprop1.schemaAprop1.schemaBprop2")); + EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(), + EqualsProto(schema_type_config_b.properties(1))); + EXPECT_THAT(schema_c_iterator.GetCurrentNestedIndexable(), IsFalse()); + + EXPECT_THAT(schema_c_iterator.Advance(), IsOk()); + EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(), + Eq("schemaCprop1.schemaAprop2")); + EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(), + EqualsProto(schema_type_config_a.properties(1))); + EXPECT_THAT(schema_c_iterator.GetCurrentNestedIndexable(), IsFalse()); + + EXPECT_THAT(schema_c_iterator.Advance(), IsOk()); + EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(), + Eq("schemaCprop1.schemaAprop3.schemaDprop2")); + EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(), + EqualsProto(schema_type_config_d.properties(1))); + EXPECT_THAT(schema_c_iterator.GetCurrentNestedIndexable(), IsFalse()); + + EXPECT_THAT(schema_c_iterator.Advance(), IsOk()); + EXPECT_THAT(schema_c_iterator.GetCurrentPropertyPath(), Eq("schemaCprop2")); + EXPECT_THAT(schema_c_iterator.GetCurrentPropertyConfig(), + EqualsProto(schema_type_config_c.properties(1))); + EXPECT_THAT(schema_c_iterator.GetCurrentNestedIndexable(), IsTrue()); + + EXPECT_THAT(schema_c_iterator.Advance(), + StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE)); + + // Order of iteration for schema D: + // {"schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop2", + // "schemaDprop1.schemaAprop1.schemaBprop2", "schemaDprop1.schemaAprop2", + // "schemaDprop2"} + // + // Indexable properties: {"schemaDprop2"} + SchemaPropertyIterator schema_d_iterator(schema_type_config_d, + type_config_map); + + EXPECT_THAT(schema_d_iterator.Advance(), IsOk()); + EXPECT_THAT(schema_d_iterator.GetCurrentPropertyPath(), + Eq("schemaDprop1.schemaAprop1.schemaBprop1.schemaCprop2")); + EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(), + EqualsProto(schema_type_config_c.properties(1))); + EXPECT_THAT(schema_d_iterator.GetCurrentNestedIndexable(), IsFalse()); + + EXPECT_THAT(schema_d_iterator.Advance(), IsOk()); + EXPECT_THAT(schema_d_iterator.GetCurrentPropertyPath(), + Eq("schemaDprop1.schemaAprop1.schemaBprop2")); + EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(), + EqualsProto(schema_type_config_b.properties(1))); + EXPECT_THAT(schema_d_iterator.GetCurrentNestedIndexable(), IsFalse()); + + EXPECT_THAT(schema_d_iterator.Advance(), IsOk()); + EXPECT_THAT(schema_d_iterator.GetCurrentPropertyPath(), + Eq("schemaDprop1.schemaAprop2")); + EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(), + EqualsProto(schema_type_config_a.properties(1))); + EXPECT_THAT(schema_d_iterator.GetCurrentNestedIndexable(), IsFalse()); + + EXPECT_THAT(schema_d_iterator.Advance(), IsOk()); + EXPECT_THAT(schema_d_iterator.GetCurrentPropertyPath(), Eq("schemaDprop2")); + EXPECT_THAT(schema_d_iterator.GetCurrentPropertyConfig(), + EqualsProto(schema_type_config_d.properties(1))); + EXPECT_THAT(schema_d_iterator.GetCurrentNestedIndexable(), IsTrue()); + + EXPECT_THAT(schema_d_iterator.Advance(), + StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE)); +} + } // namespace } // namespace lib diff --git a/icing/schema/schema-store.cc b/icing/schema/schema-store.cc index 065157e..bcc7c2c 100644 --- a/icing/schema/schema-store.cc +++ b/icing/schema/schema-store.cc @@ -15,11 +15,14 @@ #include "icing/schema/schema-store.h" #include <algorithm> +#include <cinttypes> #include <cstdint> +#include <limits> #include <memory> #include <string> #include <string_view> #include <unordered_map> +#include <unordered_set> #include <utility> #include <vector> @@ -30,12 +33,16 @@ #include "icing/file/destructible-directory.h" #include "icing/file/file-backed-proto.h" #include "icing/file/filesystem.h" +#include "icing/file/version-util.h" #include "icing/proto/debug.pb.h" #include "icing/proto/document.pb.h" #include "icing/proto/logging.pb.h" #include "icing/proto/schema.pb.h" +#include "icing/proto/search.pb.h" #include "icing/proto/storage.pb.h" +#include "icing/schema/backup-schema-producer.h" #include "icing/schema/joinable-property.h" +#include "icing/schema/property-util.h" #include "icing/schema/schema-type-manager.h" #include "icing/schema/schema-util.h" #include "icing/schema/section.h" @@ -52,6 +59,7 @@ namespace { constexpr char kSchemaStoreHeaderFilename[] = "schema_store_header"; constexpr char kSchemaFilename[] = "schema.pb"; +constexpr char kOverlaySchemaFilename[] = "overlay_schema.pb"; constexpr char kSchemaTypeMapperFilename[] = "schema_type_mapper"; // A DynamicTrieKeyMapper stores its data across 3 arrays internally. Giving @@ -59,15 +67,19 @@ constexpr char kSchemaTypeMapperFilename[] = "schema_type_mapper"; // 384KiB. constexpr int32_t kSchemaTypeMapperMaxSize = 3 * 128 * 1024; // 384 KiB -const std::string MakeHeaderFilename(const std::string& base_dir) { +std::string MakeHeaderFilename(const std::string& base_dir) { return absl_ports::StrCat(base_dir, "/", kSchemaStoreHeaderFilename); } -const std::string MakeSchemaFilename(const std::string& base_dir) { +std::string MakeSchemaFilename(const std::string& base_dir) { return absl_ports::StrCat(base_dir, "/", kSchemaFilename); } -const std::string MakeSchemaTypeMapperFilename(const std::string& base_dir) { +std::string MakeOverlaySchemaFilename(const std::string& base_dir) { + return absl_ports::StrCat(base_dir, "/", kOverlaySchemaFilename); +} + +std::string MakeSchemaTypeMapperFilename(const std::string& base_dir) { return absl_ports::StrCat(base_dir, "/", kSchemaTypeMapperFilename); } @@ -108,6 +120,61 @@ std::unordered_set<SchemaTypeId> SchemaTypeIdsChanged( } // namespace +/* static */ libtextclassifier3::StatusOr<SchemaStore::Header> +SchemaStore::Header::Read(const Filesystem* filesystem, + const std::string& path) { + Header header; + ScopedFd sfd(filesystem->OpenForRead(path.c_str())); + if (!sfd.is_valid()) { + return absl_ports::NotFoundError("SchemaStore header doesn't exist"); + } + + // If file is sizeof(LegacyHeader), then it must be LegacyHeader. + int64_t file_size = filesystem->GetFileSize(sfd.get()); + if (file_size == sizeof(LegacyHeader)) { + LegacyHeader legacy_header; + if (!filesystem->Read(path.c_str(), &legacy_header, + sizeof(legacy_header))) { + return absl_ports::InternalError( + absl_ports::StrCat("Couldn't read: ", path)); + } + if (legacy_header.magic != Header::kMagic) { + return absl_ports::InternalError( + absl_ports::StrCat("Invalid header kMagic for file: ", path)); + } + header.set_checksum(legacy_header.checksum); + } else if (file_size == sizeof(Header)) { + if (!filesystem->Read(path.c_str(), &header, sizeof(header))) { + return absl_ports::InternalError( + absl_ports::StrCat("Couldn't read: ", path)); + } + if (header.magic() != Header::kMagic) { + return absl_ports::InternalError( + absl_ports::StrCat("Invalid header kMagic for file: ", path)); + } + } else { + int legacy_header_size = sizeof(LegacyHeader); + int header_size = sizeof(Header); + return absl_ports::InternalError(IcingStringUtil::StringPrintf( + "Unexpected header size %" PRId64 ". Expected %d or %d", file_size, + legacy_header_size, header_size)); + } + return header; +} + +libtextclassifier3::Status SchemaStore::Header::Write( + const Filesystem* filesystem, const std::string& path) { + ScopedFd scoped_fd(filesystem->OpenForWrite(path.c_str())); + // This should overwrite the header. + if (!scoped_fd.is_valid() || + !filesystem->Write(scoped_fd.get(), this, sizeof(*this)) || + !filesystem->DataSync(scoped_fd.get())) { + return absl_ports::InternalError( + absl_ports::StrCat("Failed to write SchemaStore header: ", path)); + } + return libtextclassifier3::Status::OK; +} + libtextclassifier3::StatusOr<std::unique_ptr<SchemaStore>> SchemaStore::Create( const Filesystem* filesystem, const std::string& base_dir, const Clock* clock, InitializeStatsProto* initialize_stats) { @@ -140,6 +207,106 @@ libtextclassifier3::StatusOr<std::unique_ptr<SchemaStore>> SchemaStore::Create( return schema_store; } +/* static */ libtextclassifier3::Status SchemaStore::DiscardOverlaySchema( + const Filesystem* filesystem, const std::string& base_dir, Header& header) { + std::string header_filename = MakeHeaderFilename(base_dir); + if (header.overlay_created()) { + header.SetOverlayInfo( + /*overlay_created=*/false, + /*min_overlay_version_compatibility=*/ std::numeric_limits< + int32_t>::max()); + ICING_RETURN_IF_ERROR(header.Write(filesystem, header_filename)); + } + std::string schema_overlay_filename = MakeOverlaySchemaFilename(base_dir); + if (!filesystem->DeleteFile(schema_overlay_filename.c_str())) { + return absl_ports::InternalError( + "Unable to delete stale schema overlay file."); + } + return libtextclassifier3::Status::OK; +} + +/* static */ libtextclassifier3::Status SchemaStore::MigrateSchema( + const Filesystem* filesystem, const std::string& base_dir, + version_util::StateChange version_state_change, int32_t new_version) { + if (!filesystem->DirectoryExists(base_dir.c_str())) { + // Situations when schema store directory doesn't exist: + // - Initializing new Icing instance: don't have to do anything now. The + // directory will be created later. + // - Lose schema store: there is nothing we can do now. The logic will be + // handled later by initializing. + // + // Therefore, just simply return OK here. + return libtextclassifier3::Status::OK; + } + + std::string overlay_schema_filename = MakeOverlaySchemaFilename(base_dir); + if (!filesystem->FileExists(overlay_schema_filename.c_str())) { + // The overlay doesn't exist. So there should be nothing particularly + // interesting to worry about. + return libtextclassifier3::Status::OK; + } + + std::string header_filename = MakeHeaderFilename(base_dir); + libtextclassifier3::StatusOr<Header> header_or; + switch (version_state_change) { + // No necessary actions for normal upgrades or no version change. The data + // that was produced by the previous version is fully compatible with this + // version and there's no stale data for us to clean up. + // The same is true for a normal rollforward. A normal rollforward implies + // that the previous version was one that understood the concept of the + // overlay schema and would have already discarded it if it was unusable. + case version_util::StateChange::kVersionZeroUpgrade: + // fallthrough + case version_util::StateChange::kUpgrade: + // fallthrough + case version_util::StateChange::kRollForward: + // fallthrough + case version_util::StateChange::kCompatible: + return libtextclassifier3::Status::OK; + case version_util::StateChange::kVersionZeroRollForward: + // We've rolled forward. The schema overlay file, if it exists, is + // possibly stale. We must throw it out. + header_or = Header::Read(filesystem, header_filename); + if (!header_or.ok()) { + return header_or.status(); + } + return SchemaStore::DiscardOverlaySchema(filesystem, base_dir, + header_or.ValueOrDie()); + case version_util::StateChange::kRollBack: + header_or = Header::Read(filesystem, header_filename); + if (!header_or.ok()) { + return header_or.status(); + } + if (header_or.ValueOrDie().min_overlay_version_compatibility() <= + new_version) { + // We've been rolled back, but the overlay schema claims that it + // supports this version. So we can safely return. + return libtextclassifier3::Status::OK; + } + // We've been rolled back to a version that the overlay schema doesn't + // support. We must throw it out. + return SchemaStore::DiscardOverlaySchema(filesystem, base_dir, + header_or.ValueOrDie()); + case version_util::StateChange::kUndetermined: + // It's not clear what version we're on, but the base schema should always + // be safe to use. Throw out the overlay. + header_or = Header::Read(filesystem, header_filename); + if (!header_or.ok()) { + return header_or.status(); + } + return SchemaStore::DiscardOverlaySchema(filesystem, base_dir, + header_or.ValueOrDie()); + } + return libtextclassifier3::Status::OK; +} + +/* static */ libtextclassifier3::Status SchemaStore::DiscardDerivedFiles( + const Filesystem* filesystem, const std::string& base_dir) { + // Schema type mapper + return DynamicTrieKeyMapper<SchemaTypeId>::Delete( + *filesystem, MakeSchemaTypeMapperFilename(base_dir)); +} + SchemaStore::SchemaStore(const Filesystem* filesystem, std::string base_dir, const Clock* clock) : filesystem_(filesystem), @@ -158,6 +325,7 @@ SchemaStore::~SchemaStore() { } libtextclassifier3::Status SchemaStore::Initialize(SchemaProto new_schema) { + ICING_RETURN_IF_ERROR(LoadSchema()); if (!absl_ports::IsNotFound(GetSchema().status())) { return absl_ports::FailedPreconditionError( "Incorrectly tried to initialize schema store with a new schema, when " @@ -165,11 +333,13 @@ libtextclassifier3::Status SchemaStore::Initialize(SchemaProto new_schema) { } ICING_RETURN_IF_ERROR(schema_file_->Write( std::make_unique<SchemaProto>(std::move(new_schema)))); - return InitializeInternal(/*initialize_stats=*/nullptr); + return InitializeInternal(/*create_overlay_if_necessary=*/true, + /*initialize_stats=*/nullptr); } libtextclassifier3::Status SchemaStore::Initialize( InitializeStatsProto* initialize_stats) { + ICING_RETURN_IF_ERROR(LoadSchema()); auto schema_proto_or = GetSchema(); if (absl_ports::IsNotFound(schema_proto_or.status())) { // Don't have an existing schema proto, that's fine @@ -178,11 +348,69 @@ libtextclassifier3::Status SchemaStore::Initialize( // Real error when trying to read the existing schema return schema_proto_or.status(); } - return InitializeInternal(initialize_stats); + return InitializeInternal(/*create_overlay_if_necessary=*/false, + initialize_stats); +} + +libtextclassifier3::Status SchemaStore::LoadSchema() { + libtextclassifier3::StatusOr<Header> header_or = + Header::Read(filesystem_, MakeHeaderFilename(base_dir_)); + bool header_exists = false; + if (!header_or.ok() && !absl_ports::IsNotFound(header_or.status())) { + return header_or.status(); + } else if (!header_or.ok()) { + header_ = std::make_unique<Header>(); + } else { + header_exists = true; + header_ = std::make_unique<Header>(std::move(header_or).ValueOrDie()); + } + + std::string overlay_schema_filename = MakeOverlaySchemaFilename(base_dir_); + bool overlay_schema_file_exists = + filesystem_->FileExists(overlay_schema_filename.c_str()); + + libtextclassifier3::Status base_schema_state = schema_file_->Read().status(); + if (!base_schema_state.ok() && !absl_ports::IsNotFound(base_schema_state)) { + return base_schema_state; + } + + // There are three valid cases: + // 1. Everything is missing. This is an empty schema store. + if (!base_schema_state.ok() && !overlay_schema_file_exists && + !header_exists) { + return libtextclassifier3::Status::OK; + } + + // 2. There never was a overlay schema. The header exists, the base schema + // exists and the header says the overlay schema shouldn't exist + if (base_schema_state.ok() && !overlay_schema_file_exists && header_exists && + !header_->overlay_created()) { + // Nothing else to do. Just return safely. + return libtextclassifier3::Status::OK; + } + + // 3. There is an overlay schema and a base schema and a header. The header + // says that the overlay schema should exist. + if (base_schema_state.ok() && overlay_schema_file_exists && header_exists && + header_->overlay_created()) { + overlay_schema_file_ = std::make_unique<FileBackedProto<SchemaProto>>( + *filesystem_, MakeOverlaySchemaFilename(base_dir_)); + return libtextclassifier3::Status::OK; + } + + // Something has gone wrong. We've lost part of the schema ground truth. + // Return an error. + bool overlay_created = header_->overlay_created(); + bool base_schema_exists = base_schema_state.ok(); + return absl_ports::InternalError(IcingStringUtil::StringPrintf( + "Unable to properly load schema. Header {exists:%d, overlay_created:%d}, " + "base schema exists: %d, overlay_schema_exists: %d", + header_exists, overlay_created, base_schema_exists, + overlay_schema_file_exists)); } libtextclassifier3::Status SchemaStore::InitializeInternal( - InitializeStatsProto* initialize_stats) { + bool create_overlay_if_necessary, InitializeStatsProto* initialize_stats) { if (!InitializeDerivedFiles().ok()) { ICING_VLOG(3) << "Couldn't find derived files or failed to initialize them, " @@ -192,7 +420,7 @@ libtextclassifier3::Status SchemaStore::InitializeInternal( initialize_stats->set_schema_store_recovery_cause( InitializeStatsProto::IO_ERROR); } - ICING_RETURN_IF_ERROR(RegenerateDerivedFiles()); + ICING_RETURN_IF_ERROR(RegenerateDerivedFiles(create_overlay_if_necessary)); if (initialize_stats != nullptr) { initialize_stats->set_schema_store_recovery_latency_ms( regenerate_timer->GetElapsedMilliseconds()); @@ -208,24 +436,6 @@ libtextclassifier3::Status SchemaStore::InitializeInternal( } libtextclassifier3::Status SchemaStore::InitializeDerivedFiles() { - if (!HeaderExists()) { - // Without a header, we don't know if things are consistent between each - // other so the caller should just regenerate everything from ground truth. - return absl_ports::InternalError("SchemaStore header doesn't exist"); - } - - SchemaStore::Header header; - if (!filesystem_->Read(MakeHeaderFilename(base_dir_).c_str(), &header, - sizeof(header))) { - return absl_ports::InternalError( - absl_ports::StrCat("Couldn't read: ", MakeHeaderFilename(base_dir_))); - } - - if (header.magic != SchemaStore::Header::kMagic) { - return absl_ports::InternalError(absl_ports::StrCat( - "Invalid header kMagic for file: ", MakeHeaderFilename(base_dir_))); - } - ICING_ASSIGN_OR_RETURN( schema_type_mapper_, DynamicTrieKeyMapper<SchemaTypeId>::Create( @@ -233,78 +443,105 @@ libtextclassifier3::Status SchemaStore::InitializeDerivedFiles() { kSchemaTypeMapperMaxSize)); ICING_ASSIGN_OR_RETURN(Crc32 checksum, ComputeChecksum()); - if (checksum.Get() != header.checksum) { + if (checksum.Get() != header_->checksum()) { return absl_ports::InternalError( "Combined checksum of SchemaStore was inconsistent"); } - // Update our in-memory data structures - type_config_map_.clear(); - ICING_ASSIGN_OR_RETURN(const SchemaProto* schema_proto, GetSchema()); - for (const SchemaTypeConfigProto& type_config : schema_proto->types()) { - // Update our type_config_map_ - type_config_map_.emplace(type_config.schema_type(), type_config); - } - ICING_ASSIGN_OR_RETURN( - schema_type_manager_, - SchemaTypeManager::Create(type_config_map_, schema_type_mapper_.get())); - + BuildInMemoryCache(); return libtextclassifier3::Status::OK; } -libtextclassifier3::Status SchemaStore::RegenerateDerivedFiles() { +libtextclassifier3::Status SchemaStore::RegenerateDerivedFiles( + bool create_overlay_if_necessary) { ICING_ASSIGN_OR_RETURN(const SchemaProto* schema_proto, GetSchema()); ICING_RETURN_IF_ERROR(ResetSchemaTypeMapper()); - type_config_map_.clear(); for (const SchemaTypeConfigProto& type_config : schema_proto->types()) { - // Update our type_config_map_ - type_config_map_.emplace(type_config.schema_type(), type_config); - // Assign a SchemaTypeId to the type ICING_RETURN_IF_ERROR(schema_type_mapper_->Put( type_config.schema_type(), schema_type_mapper_->num_keys())); } - - ICING_ASSIGN_OR_RETURN( - schema_type_manager_, - SchemaTypeManager::Create(type_config_map_, schema_type_mapper_.get())); + BuildInMemoryCache(); + + if (create_overlay_if_necessary) { + ICING_ASSIGN_OR_RETURN( + BackupSchemaProducer producer, + BackupSchemaProducer::Create(*schema_proto, + schema_type_manager_->section_manager())); + + if (producer.is_backup_necessary()) { + SchemaProto base_schema = std::move(producer).Produce(); + + // The overlay schema should be written to the overlay file location. + overlay_schema_file_ = std::make_unique<FileBackedProto<SchemaProto>>( + *filesystem_, MakeOverlaySchemaFilename(base_dir_)); + auto schema_ptr = std::make_unique<SchemaProto>(std::move(*schema_proto)); + ICING_RETURN_IF_ERROR(overlay_schema_file_->Write(std::move(schema_ptr))); + + // The base schema should be written to the original file + auto base_schema_ptr = + std::make_unique<SchemaProto>(std::move(base_schema)); + ICING_RETURN_IF_ERROR(schema_file_->Write(std::move(base_schema_ptr))); + + header_->SetOverlayInfo( + /*overlay_created=*/true, + /*min_overlay_version_compatibility=*/version_util::kVersionOne); + // Rebuild in memory data - references to the old schema will be invalid + // now. + BuildInMemoryCache(); + } + } // Write the header ICING_ASSIGN_OR_RETURN(Crc32 checksum, ComputeChecksum()); - ICING_RETURN_IF_ERROR(UpdateHeader(checksum)); - - return libtextclassifier3::Status::OK; -} - -bool SchemaStore::HeaderExists() { - if (!filesystem_->FileExists(MakeHeaderFilename(base_dir_).c_str())) { - return false; - } - - int64_t file_size = - filesystem_->GetFileSize(MakeHeaderFilename(base_dir_).c_str()); - - // If it's been truncated to size 0 before, we consider it to be a new file - return file_size != 0 && file_size != Filesystem::kBadFileSize; + header_->set_checksum(checksum.Get()); + return header_->Write(filesystem_, MakeHeaderFilename(base_dir_)); } -libtextclassifier3::Status SchemaStore::UpdateHeader(const Crc32& checksum) { - // Write the header - SchemaStore::Header header; - header.magic = SchemaStore::Header::kMagic; - header.checksum = checksum.Get(); +libtextclassifier3::Status SchemaStore::BuildInMemoryCache() { + ICING_ASSIGN_OR_RETURN(const SchemaProto* schema_proto, GetSchema()); + ICING_ASSIGN_OR_RETURN( + SchemaUtil::InheritanceMap inheritance_map, + SchemaUtil::BuildTransitiveInheritanceGraph(*schema_proto)); - ScopedFd scoped_fd( - filesystem_->OpenForWrite(MakeHeaderFilename(base_dir_).c_str())); - // This should overwrite the header. - if (!scoped_fd.is_valid() || - !filesystem_->Write(scoped_fd.get(), &header, sizeof(header)) || - !filesystem_->DataSync(scoped_fd.get())) { - return absl_ports::InternalError(absl_ports::StrCat( - "Failed to write SchemaStore header: ", MakeHeaderFilename(base_dir_))); + reverse_schema_type_mapper_.clear(); + type_config_map_.clear(); + schema_subtype_id_map_.clear(); + for (const SchemaTypeConfigProto& type_config : schema_proto->types()) { + std::string_view type_name = type_config.schema_type(); + ICING_ASSIGN_OR_RETURN(SchemaTypeId type_id, + schema_type_mapper_->Get(type_name)); + + // Build reverse_schema_type_mapper_ + reverse_schema_type_mapper_.insert({type_id, std::string(type_name)}); + + // Build type_config_map_ + type_config_map_.insert({std::string(type_name), type_config}); + + // Build schema_subtype_id_map_ + std::unordered_set<SchemaTypeId>& subtype_id_set = + schema_subtype_id_map_[type_id]; + // Find all child types + auto child_types_names = inheritance_map.find(type_name); + if (child_types_names != inheritance_map.end()) { + subtype_id_set.reserve(child_types_names->second.size() + 1); + for (const auto& [child_type_name, is_direct_child] : + child_types_names->second) { + ICING_ASSIGN_OR_RETURN(SchemaTypeId child_type_id, + schema_type_mapper_->Get(child_type_name)); + subtype_id_set.insert(child_type_id); + } + } + // Every type is a subtype of itself. + subtype_id_set.insert(type_id); } + + // Build schema_type_manager_ + ICING_ASSIGN_OR_RETURN( + schema_type_manager_, + SchemaTypeManager::Create(type_config_map_, schema_type_mapper_.get())); return libtextclassifier3::Status::OK; } @@ -331,7 +568,8 @@ libtextclassifier3::Status SchemaStore::ResetSchemaTypeMapper() { } libtextclassifier3::StatusOr<Crc32> SchemaStore::ComputeChecksum() const { - auto schema_proto_or = GetSchema(); + // Base schema checksum + auto schema_proto_or = schema_file_->Read(); if (absl_ports::IsNotFound(schema_proto_or.status())) { return Crc32(); } @@ -339,11 +577,23 @@ libtextclassifier3::StatusOr<Crc32> SchemaStore::ComputeChecksum() const { Crc32 schema_checksum; schema_checksum.Append(schema_proto->SerializeAsString()); + Crc32 overlay_schema_checksum; + if (overlay_schema_file_ != nullptr) { + auto schema_proto_or = schema_file_->Read(); + if (schema_proto_or.ok()) { + ICING_ASSIGN_OR_RETURN(schema_proto, schema_proto_or); + overlay_schema_checksum.Append(schema_proto->SerializeAsString()); + } + } + ICING_ASSIGN_OR_RETURN(Crc32 schema_type_mapper_checksum, schema_type_mapper_->ComputeChecksum()); Crc32 total_checksum; total_checksum.Append(std::to_string(schema_checksum.Get())); + if (overlay_schema_file_ != nullptr) { + total_checksum.Append(std::to_string(overlay_schema_checksum.Get())); + } total_checksum.Append(std::to_string(schema_type_mapper_checksum.Get())); return total_checksum; @@ -351,6 +601,9 @@ libtextclassifier3::StatusOr<Crc32> SchemaStore::ComputeChecksum() const { libtextclassifier3::StatusOr<const SchemaProto*> SchemaStore::GetSchema() const { + if (overlay_schema_file_ != nullptr) { + return overlay_schema_file_->Read(); + } return schema_file_->Read(); } @@ -360,15 +613,19 @@ libtextclassifier3::StatusOr<const SchemaProto*> SchemaStore::GetSchema() // SetSchema(SchemaProto&& new_schema) libtextclassifier3::StatusOr<const SchemaStore::SetSchemaResult> SchemaStore::SetSchema(const SchemaProto& new_schema, - bool ignore_errors_and_delete_documents) { - return SetSchema(SchemaProto(new_schema), ignore_errors_and_delete_documents); + bool ignore_errors_and_delete_documents, + bool allow_circular_schema_definitions) { + return SetSchema(SchemaProto(new_schema), ignore_errors_and_delete_documents, + allow_circular_schema_definitions); } libtextclassifier3::StatusOr<const SchemaStore::SetSchemaResult> SchemaStore::SetSchema(SchemaProto&& new_schema, - bool ignore_errors_and_delete_documents) { - ICING_ASSIGN_OR_RETURN(SchemaUtil::DependentMap new_dependent_map, - SchemaUtil::Validate(new_schema)); + bool ignore_errors_and_delete_documents, + bool allow_circular_schema_definitions) { + ICING_ASSIGN_OR_RETURN( + SchemaUtil::DependentMap new_dependent_map, + SchemaUtil::Validate(new_schema, allow_circular_schema_definitions)); SetSchemaResult result; @@ -493,6 +750,10 @@ libtextclassifier3::Status SchemaStore::ApplySchemaChange( // Manually set them to the correct paths. base_dir_ = std::move(old_base_dir); schema_file_->SetSwappedFilepath(MakeSchemaFilename(base_dir_)); + if (overlay_schema_file_ != nullptr) { + overlay_schema_file_->SetSwappedFilepath( + MakeOverlaySchemaFilename(base_dir_)); + } return libtextclassifier3::Status::OK; } @@ -515,6 +776,19 @@ libtextclassifier3::StatusOr<SchemaTypeId> SchemaStore::GetSchemaTypeId( return schema_type_mapper_->Get(schema_type); } +libtextclassifier3::StatusOr<const std::unordered_set<SchemaTypeId>*> +SchemaStore::GetSchemaTypeIdsWithChildren(std::string_view schema_type) const { + ICING_ASSIGN_OR_RETURN(SchemaTypeId schema_type_id, + GetSchemaTypeId(schema_type)); + auto iter = schema_subtype_id_map_.find(schema_type_id); + if (iter == schema_subtype_id_map_.end()) { + // This should never happen, unless there is an inconsistency or IO error. + return absl_ports::InternalError(absl_ports::StrCat( + "Schema type '", schema_type, "' is not found in the subtype map.")); + } + return &iter->second; +} + libtextclassifier3::StatusOr<const SectionMetadata*> SchemaStore::GetSectionMetadata(SchemaTypeId schema_type_id, SectionId section_id) const { @@ -551,9 +825,8 @@ libtextclassifier3::Status SchemaStore::PersistToDisk() { ICING_RETURN_IF_ERROR(schema_type_mapper_->PersistToDisk()); // Write the header ICING_ASSIGN_OR_RETURN(Crc32 checksum, ComputeChecksum()); - ICING_RETURN_IF_ERROR(UpdateHeader(checksum)); - - return libtextclassifier3::Status::OK; + header_->set_checksum(checksum.Get()); + return header_->Write(filesystem_, MakeHeaderFilename(base_dir_)); } SchemaStoreStorageInfoProto SchemaStore::GetStorageInfo() const { @@ -589,6 +862,50 @@ SchemaStore::GetSectionMetadata(const std::string& schema_type) const { return schema_type_manager_->section_manager().GetMetadataList(schema_type); } +bool SchemaStore::IsPropertyDefinedInSchema( + SchemaTypeId schema_type_id, const std::string& property_path) const { + auto schema_name_itr = reverse_schema_type_mapper_.find(schema_type_id); + if (schema_name_itr == reverse_schema_type_mapper_.end()) { + return false; + } + const std::string* current_type_name = &schema_name_itr->second; + + std::vector<std::string_view> property_path_parts = + property_util::SplitPropertyPathExpr(property_path); + for (int i = 0; i < property_path_parts.size(); ++i) { + auto type_config_itr = type_config_map_.find(*current_type_name); + if (type_config_itr == type_config_map_.end()) { + return false; + } + std::string_view property_name = property_path_parts.at(i); + const PropertyConfigProto* selected_property = nullptr; + for (const PropertyConfigProto& property : + type_config_itr->second.properties()) { + if (property.property_name() == property_name) { + selected_property = &property; + break; + } + } + if (selected_property == nullptr) { + return false; + } + if (i == property_path_parts.size() - 1) { + // We've found a property at the final part of the path. + return true; + } + if (selected_property->data_type() != + PropertyConfigProto::DataType::DOCUMENT) { + // If this isn't final part of the path, but this property isn't a + // document, so we know that this path doesn't exist. + return false; + } + current_type_name = &selected_property->schema_type(); + } + + // We should never reach this point. + return false; +} + libtextclassifier3::StatusOr<SchemaDebugInfoProto> SchemaStore::GetDebugInfo() const { SchemaDebugInfoProto debug_info; @@ -601,5 +918,55 @@ libtextclassifier3::StatusOr<SchemaDebugInfoProto> SchemaStore::GetDebugInfo() return debug_info; } +std::vector<SchemaStore::ExpandedTypePropertyMask> +SchemaStore::ExpandTypePropertyMasks( + const google::protobuf::RepeatedPtrField<TypePropertyMask>& type_property_masks) + const { + std::unordered_map<SchemaTypeId, ExpandedTypePropertyMask> result_map; + for (const TypePropertyMask& type_field_mask : type_property_masks) { + if (type_field_mask.schema_type() == kSchemaTypeWildcard) { + ExpandedTypePropertyMask entry{type_field_mask.schema_type(), + /*paths=*/{}}; + entry.paths.insert(type_field_mask.paths().begin(), + type_field_mask.paths().end()); + result_map.insert({kInvalidSchemaTypeId, std::move(entry)}); + } else { + auto schema_type_ids_or = + GetSchemaTypeIdsWithChildren(type_field_mask.schema_type()); + // If we can't find the SchemaTypeIds, just throw it away + if (!schema_type_ids_or.ok()) { + continue; + } + const std::unordered_set<SchemaTypeId>* schema_type_ids = + schema_type_ids_or.ValueOrDie(); + for (SchemaTypeId schema_type_id : *schema_type_ids) { + auto schema_type_name_iter = + reverse_schema_type_mapper_.find(schema_type_id); + if (schema_type_name_iter == reverse_schema_type_mapper_.end()) { + // This should never happen, unless there is an inconsistency or IO + // error. + ICING_LOG(ERROR) << "Got unknown schema type id: " << schema_type_id; + continue; + } + + auto iter = result_map.find(schema_type_id); + if (iter == result_map.end()) { + ExpandedTypePropertyMask entry{schema_type_name_iter->second, + /*paths=*/{}}; + iter = result_map.insert({schema_type_id, std::move(entry)}).first; + } + iter->second.paths.insert(type_field_mask.paths().begin(), + type_field_mask.paths().end()); + } + } + } + std::vector<ExpandedTypePropertyMask> result; + result.reserve(result_map.size()); + for (auto& entry : result_map) { + result.push_back(std::move(entry.second)); + } + return result; +} + } // namespace lib } // namespace icing diff --git a/icing/schema/schema-store.h b/icing/schema/schema-store.h index 5ad714e..73d7848 100644 --- a/icing/schema/schema-store.h +++ b/icing/schema/schema-store.h @@ -16,20 +16,26 @@ #define ICING_SCHEMA_SCHEMA_STORE_H_ #include <cstdint> +#include <cstring> +#include <limits> #include <memory> #include <string> #include <string_view> +#include <unordered_map> #include <unordered_set> #include <vector> #include "icing/text_classifier/lib3/utils/base/status.h" #include "icing/text_classifier/lib3/utils/base/statusor.h" +#include "icing/absl_ports/canonical_errors.h" #include "icing/file/file-backed-proto.h" #include "icing/file/filesystem.h" +#include "icing/file/version-util.h" #include "icing/proto/debug.pb.h" #include "icing/proto/document.pb.h" #include "icing/proto/logging.pb.h" #include "icing/proto/schema.pb.h" +#include "icing/proto/search.pb.h" #include "icing/proto/storage.pb.h" #include "icing/schema/joinable-property.h" #include "icing/schema/schema-type-manager.h" @@ -50,9 +56,7 @@ namespace lib { // should always call Get* from the SchemaStore. class SchemaStore { public: - struct Header { - static constexpr int32_t kMagic = 0x72650d0a; - + struct LegacyHeader { // Holds the magic as a quick sanity check against file corruption. int32_t magic; @@ -60,6 +64,63 @@ class SchemaStore { uint32_t checksum; }; + class Header { + public: + static constexpr int32_t kMagic = 0x72650d0a; + + explicit Header() + : magic_(kMagic), + checksum_(0), + overlay_created_(false), + min_overlay_version_compatibility_( + std::numeric_limits<int32_t>::max()) { + memset(padding, 0, kPaddingSize); + } + + // RETURNS: + // - On success, a valid Header instance + // - NOT_FOUND if header file doesn't exist + // - INTERNAL if unable to read header + static libtextclassifier3::StatusOr<Header> Read( + const Filesystem* filesystem, const std::string& path); + + libtextclassifier3::Status Write(const Filesystem* filesystem, + const std::string& path); + + int32_t magic() const { return magic_; } + + uint32_t checksum() const { return checksum_; } + void set_checksum(uint32_t checksum) { checksum_ = checksum; } + + bool overlay_created() const { return overlay_created_; } + + int32_t min_overlay_version_compatibility() const { + return min_overlay_version_compatibility_; + } + + void SetOverlayInfo(bool overlay_created, + int32_t min_overlay_version_compatibility) { + overlay_created_ = overlay_created; + min_overlay_version_compatibility_ = min_overlay_version_compatibility; + } + + private: + // Holds the magic as a quick sanity check against file corruption. + int32_t magic_; + + // Checksum of the SchemaStore's sub-component's checksums. + uint32_t checksum_; + + bool overlay_created_; + + int32_t min_overlay_version_compatibility_; + + static constexpr int kPaddingSize = 1008; + // Padding exists just to reserve space for additional values. + uint8_t padding[kPaddingSize]; + }; + static_assert(sizeof(Header) == 1024); + // Holds information on what may have been affected by the new schema. This is // generally data that other classes may depend on from the SchemaStore, // so that we can know if we should go update those classes as well. @@ -121,6 +182,13 @@ class SchemaStore { std::unordered_set<std::string> schema_types_join_incompatible_by_name; }; + struct ExpandedTypePropertyMask { + std::string schema_type; + std::unordered_set<std::string> paths; + }; + + static constexpr std::string_view kSchemaTypeWildcard = "*"; + // Factory function to create a SchemaStore which does not take ownership // of any input components, and all pointers must refer to valid objects that // outlive the created SchemaStore instance. The base_dir must already exist. @@ -137,6 +205,23 @@ class SchemaStore { const Filesystem* filesystem, const std::string& base_dir, const Clock* clock, InitializeStatsProto* initialize_stats = nullptr); + // Migrates schema files (backup v.s. new schema) according to version state + // change. + // + // Returns: + // OK on success or nothing to migrate + static libtextclassifier3::Status MigrateSchema( + const Filesystem* filesystem, const std::string& base_dir, + version_util::StateChange version_state_change, int32_t new_version); + + // Discards all derived data in the schema store. + // + // Returns: + // OK on success or nothing to discard + // INTERNAL_ERROR on any I/O errors + static libtextclassifier3::Status DiscardDerivedFiles( + const Filesystem* filesystem, const std::string& base_dir); + SchemaStore(SchemaStore&&) = default; SchemaStore& operator=(SchemaStore&&) = default; @@ -168,10 +253,12 @@ class SchemaStore { // INTERNAL_ERROR on any IO errors libtextclassifier3::StatusOr<const SetSchemaResult> SetSchema( const SchemaProto& new_schema, - bool ignore_errors_and_delete_documents = false); + bool ignore_errors_and_delete_documents, + bool allow_circular_schema_definitions); libtextclassifier3::StatusOr<const SetSchemaResult> SetSchema( SchemaProto&& new_schema, - bool ignore_errors_and_delete_documents = false); + bool ignore_errors_and_delete_documents, + bool allow_circular_schema_definitions); // Get the SchemaTypeConfigProto of schema_type name. // @@ -193,6 +280,17 @@ class SchemaStore { libtextclassifier3::StatusOr<SchemaTypeId> GetSchemaTypeId( std::string_view schema_type) const; + // Similar to GetSchemaTypeId but will return a set of SchemaTypeId to also + // include child types. + // + // Returns: + // A set of SchemaTypeId on success + // FAILED_PRECONDITION if schema hasn't been set yet + // NOT_FOUND_ERROR if we don't know about the schema type + // INTERNAL_ERROR on IO error + libtextclassifier3::StatusOr<const std::unordered_set<SchemaTypeId>*> + GetSchemaTypeIdsWithChildren(std::string_view schema_type) const; + // Returns the SectionMetadata associated with the SectionId that's in the // SchemaTypeId. // @@ -203,6 +301,11 @@ class SchemaStore { libtextclassifier3::StatusOr<const SectionMetadata*> GetSectionMetadata( SchemaTypeId schema_type_id, SectionId section_id) const; + // Returns true if a property is defined in the said schema, regardless of + // whether it is indexed or not. + bool IsPropertyDefinedInSchema(SchemaTypeId schema_type_id, + const std::string& property) const; + // Extracts all sections of different types from the given document and group // them by type. // - Each Section vector is sorted by section Id in ascending order. The @@ -282,6 +385,23 @@ class SchemaStore { // INTERNAL_ERROR on IO errors, crc compute error libtextclassifier3::StatusOr<SchemaDebugInfoProto> GetDebugInfo() const; + // Expands the provided type_property_masks into a vector of + // ExpandedTypePropertyMasks to account for polymorphism. If both a parent + // type and one of its child type appears in the masks, the parent type's + // paths will be merged into the child's. + // + // For example, assume that we have two schema types A and B, and we have + // - A is the parent type of B + // - Paths of A: {P1, P2} + // - Paths of B: {P3} + // + // Then, we will have the following in the result. + // - Expanded paths of A: {P1, P2} + // - Expanded paths of B: {P1, P2, P3} + std::vector<ExpandedTypePropertyMask> ExpandTypePropertyMasks( + const google::protobuf::RepeatedPtrField<TypePropertyMask>& type_property_masks) + const; + private: // Factory function to create a SchemaStore and set its schema. The created // instance does not take ownership of any input components and all pointers @@ -302,6 +422,15 @@ class SchemaStore { explicit SchemaStore(const Filesystem* filesystem, std::string base_dir, const Clock* clock); + // Deletes the overlay schema and ensures that the Header is correctly set. + // + // RETURNS: + // OK on success + // INTERNAL_ERROR on any IO errors + static libtextclassifier3::Status DiscardOverlaySchema( + const Filesystem* filesystem, const std::string& base_dir, + Header& header); + // Verifies that there is no error retrieving a previously set schema. Then // initializes like normal. // @@ -325,7 +454,7 @@ class SchemaStore { // OK on success // INTERNAL_ERROR on IO error libtextclassifier3::Status InitializeInternal( - InitializeStatsProto* initialize_stats); + bool create_overlay_if_necessary, InitializeStatsProto* initialize_stats); // Creates sub-components and verifies the integrity of each sub-component. // @@ -340,11 +469,16 @@ class SchemaStore { // OK on success // NOT_FOUND_ERROR if a schema proto has not been set // INTERNAL_ERROR on any IO errors - libtextclassifier3::Status RegenerateDerivedFiles(); + libtextclassifier3::Status RegenerateDerivedFiles( + bool create_overlay_if_necessary); - // Checks if the header exists already. This does not create the header file - // if it doesn't exist. - bool HeaderExists(); + // Build type_config_map_, schema_subtype_id_map_, and schema_type_manager_. + // + // Returns: + // OK on success + // NOT_FOUND_ERROR if a schema proto has not been set + // INTERNAL_ERROR on any IO errors + libtextclassifier3::Status BuildInMemoryCache(); // Update and replace the header file. Creates the header file if it doesn't // exist. @@ -377,6 +511,15 @@ class SchemaStore { : absl_ports::FailedPreconditionError("Schema not set yet."); } + // Correctly loads the Header, schema_file_ and (if present) the + // overlay_schema_file_. + // RETURNS: + // - OK on success + // - INTERNAL if an IO error is encountered when reading the Header or + // schemas. + // Or an invalid schema configuration is present. + libtextclassifier3::Status LoadSchema(); + const Filesystem* filesystem_; std::string base_dir_; const Clock* clock_; @@ -389,17 +532,37 @@ class SchemaStore { // Cached schema std::unique_ptr<FileBackedProto<SchemaProto>> schema_file_; + // This schema holds the definition of any schema types that are not + // compatible with older versions of Icing code. + std::unique_ptr<FileBackedProto<SchemaProto>> overlay_schema_file_; + + // Maps schema types to a densely-assigned unique id. + std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper_; + + // Maps schema type ids to the corresponding schema type. This is an inverse + // map of schema_type_mapper_. + std::unordered_map<SchemaTypeId, std::string> reverse_schema_type_mapper_; + // A hash map of (type config name -> type config), allows faster lookup of // type config in schema. The O(1) type config access makes schema-related and // section-related operations faster. SchemaUtil::TypeConfigMap type_config_map_; - // Maps schema types to a densely-assigned unique id. - std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper_; + // Maps from each type id to all of its subtype ids. + // T2 is a subtype of T1, if and only if one of the following conditions is + // met: + // - T2 is T1 + // - T2 extends T1 + // - There exists a type U, such that T2 is a subtype of U, and U is a subtype + // of T1 + std::unordered_map<SchemaTypeId, std::unordered_set<SchemaTypeId>> + schema_subtype_id_map_; // Manager of section (indexable property) and joinable property related // metadata for all Schemas. std::unique_ptr<const SchemaTypeManager> schema_type_manager_; + + std::unique_ptr<Header> header_; }; } // namespace lib diff --git a/icing/schema/schema-store_test.cc b/icing/schema/schema-store_test.cc index 4e2724f..3298b75 100644 --- a/icing/schema/schema-store_test.cc +++ b/icing/schema/schema-store_test.cc @@ -25,9 +25,11 @@ #include "icing/document-builder.h" #include "icing/file/filesystem.h" #include "icing/file/mock-filesystem.h" +#include "icing/file/version-util.h" #include "icing/portable/equals-proto.h" #include "icing/proto/debug.pb.h" #include "icing/proto/document.pb.h" +#include "icing/proto/logging.pb.h" #include "icing/proto/schema.pb.h" #include "icing/proto/storage.pb.h" #include "icing/proto/term.pb.h" @@ -56,6 +58,7 @@ using ::testing::Not; using ::testing::Pointee; using ::testing::Return; using ::testing::SizeIs; +using ::testing::UnorderedElementsAre; constexpr int64_t kDefaultTimestamp = 12345678; @@ -114,7 +117,7 @@ TEST_F(SchemaStoreTest, SchemaStoreMoveConstructible) { // Create an instance of SchemaStore. SchemaProto schema = SchemaBuilder() - .AddType(SchemaTypeConfigBuilder().SetType("TypeA").AddProperty( + .AddType(SchemaTypeConfigBuilder().SetType("type_a").AddProperty( PropertyConfigBuilder() .SetName("prop1") .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN) @@ -125,7 +128,9 @@ TEST_F(SchemaStoreTest, SchemaStoreMoveConstructible) { std::unique_ptr<SchemaStore> schema_store, SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); - ICING_ASSERT_OK(schema_store->SetSchema(schema)); + ICING_ASSERT_OK(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); ICING_ASSERT_OK_AND_ASSIGN(Crc32 expected_checksum, schema_store->ComputeChecksum()); @@ -138,7 +143,7 @@ TEST_F(SchemaStoreTest, SchemaStoreMoveConstructible) { SectionMetadata expected_metadata(/*id_in=*/0, TYPE_STRING, TOKENIZER_PLAIN, TERM_MATCH_EXACT, NUMERIC_MATCH_UNKNOWN, "prop1"); - EXPECT_THAT(move_constructed_schema_store.GetSectionMetadata("TypeA"), + EXPECT_THAT(move_constructed_schema_store.GetSectionMetadata("type_a"), IsOkAndHolds(Pointee(ElementsAre(expected_metadata)))); } @@ -146,7 +151,7 @@ TEST_F(SchemaStoreTest, SchemaStoreMoveAssignment) { // Create an instance of SchemaStore. SchemaProto schema1 = SchemaBuilder() - .AddType(SchemaTypeConfigBuilder().SetType("TypeA").AddProperty( + .AddType(SchemaTypeConfigBuilder().SetType("type_a").AddProperty( PropertyConfigBuilder() .SetName("prop1") .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN) @@ -157,14 +162,16 @@ TEST_F(SchemaStoreTest, SchemaStoreMoveAssignment) { std::unique_ptr<SchemaStore> schema_store, SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); - ICING_ASSERT_OK(schema_store->SetSchema(schema1)); + ICING_ASSERT_OK(schema_store->SetSchema( + schema1, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); ICING_ASSERT_OK_AND_ASSIGN(Crc32 expected_checksum, schema_store->ComputeChecksum()); // Construct another instance of SchemaStore SchemaProto schema2 = SchemaBuilder() - .AddType(SchemaTypeConfigBuilder().SetType("TypeB").AddProperty( + .AddType(SchemaTypeConfigBuilder().SetType("type_b").AddProperty( PropertyConfigBuilder() .SetName("prop2") .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN) @@ -174,7 +181,9 @@ TEST_F(SchemaStoreTest, SchemaStoreMoveAssignment) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<SchemaStore> move_assigned_schema_store, SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); - ICING_ASSERT_OK(schema_store->SetSchema(schema2)); + ICING_ASSERT_OK(schema_store->SetSchema( + schema2, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); // Move assign the first instance into the second one. *move_assigned_schema_store = std::move(*schema_store); @@ -185,7 +194,7 @@ TEST_F(SchemaStoreTest, SchemaStoreMoveAssignment) { SectionMetadata expected_metadata(/*id_in=*/0, TYPE_STRING, TOKENIZER_PLAIN, TERM_MATCH_EXACT, NUMERIC_MATCH_UNKNOWN, "prop1"); - EXPECT_THAT(move_assigned_schema_store->GetSectionMetadata("TypeA"), + EXPECT_THAT(move_assigned_schema_store->GetSectionMetadata("type_a"), IsOkAndHolds(Pointee(ElementsAre(expected_metadata)))); } @@ -199,7 +208,9 @@ TEST_F(SchemaStoreTest, CorruptSchemaError) { SchemaStore::SetSchemaResult result; result.success = true; result.schema_types_new_by_name.insert(schema_.types(0).schema_type()); - EXPECT_THAT(schema_store->SetSchema(schema_), + EXPECT_THAT(schema_store->SetSchema( + schema_, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), IsOkAndHolds(EqualsSetSchemaResult(result))); ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema, schema_store->GetSchema()); @@ -237,7 +248,9 @@ TEST_F(SchemaStoreTest, RecoverCorruptDerivedFileOk) { SchemaStore::SetSchemaResult result; result.success = true; result.schema_types_new_by_name.insert(schema_.types(0).schema_type()); - EXPECT_THAT(schema_store->SetSchema(schema_), + EXPECT_THAT(schema_store->SetSchema( + schema_, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), IsOkAndHolds(EqualsSetSchemaResult(result))); ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema, schema_store->GetSchema()); @@ -254,9 +267,56 @@ TEST_F(SchemaStoreTest, RecoverCorruptDerivedFileOk) { absl_ports::StrCat(schema_store_dir_, "/schema_type_mapper"); filesystem_.DeleteDirectoryRecursively(schema_type_mapper_dir.c_str()); + InitializeStatsProto initialize_stats; + fake_clock_.SetTimerElapsedMilliseconds(123); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<SchemaStore> schema_store, - SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_, + &initialize_stats)); + EXPECT_THAT(initialize_stats.schema_store_recovery_cause(), + Eq(InitializeStatsProto::IO_ERROR)); + EXPECT_THAT(initialize_stats.schema_store_recovery_latency_ms(), Eq(123)); + + // Everything looks fine, ground truth and derived data + ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema, + schema_store->GetSchema()); + EXPECT_THAT(*actual_schema, EqualsProto(schema_)); + EXPECT_THAT(schema_store->GetSchemaTypeId("email"), IsOkAndHolds(0)); +} + +TEST_F(SchemaStoreTest, RecoverDiscardDerivedFilesOk) { + { + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaStore> schema_store, + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); + + // Set it for the first time + SchemaStore::SetSchemaResult result; + result.success = true; + result.schema_types_new_by_name.insert(schema_.types(0).schema_type()); + EXPECT_THAT(schema_store->SetSchema( + schema_, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOkAndHolds(EqualsSetSchemaResult(result))); + ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema, + schema_store->GetSchema()); + EXPECT_THAT(*actual_schema, EqualsProto(schema_)); + + EXPECT_THAT(schema_store->GetSchemaTypeId("email"), IsOkAndHolds(0)); + } + + ICING_ASSERT_OK( + SchemaStore::DiscardDerivedFiles(&filesystem_, schema_store_dir_)); + + InitializeStatsProto initialize_stats; + fake_clock_.SetTimerElapsedMilliseconds(123); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaStore> schema_store, + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_, + &initialize_stats)); + EXPECT_THAT(initialize_stats.schema_store_recovery_cause(), + Eq(InitializeStatsProto::IO_ERROR)); + EXPECT_THAT(initialize_stats.schema_store_recovery_latency_ms(), Eq(123)); // Everything looks fine, ground truth and derived data ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema, @@ -275,7 +335,9 @@ TEST_F(SchemaStoreTest, RecoverBadChecksumOk) { SchemaStore::SetSchemaResult result; result.success = true; result.schema_types_new_by_name.insert(schema_.types(0).schema_type()); - EXPECT_THAT(schema_store->SetSchema(schema_), + EXPECT_THAT(schema_store->SetSchema( + schema_, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), IsOkAndHolds(EqualsSetSchemaResult(result))); ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema, schema_store->GetSchema()); @@ -289,7 +351,7 @@ TEST_F(SchemaStoreTest, RecoverBadChecksumOk) { // of derived files from ground truth. const std::string header_file = absl_ports::StrCat(schema_store_dir_, "/schema_store_header"); - SchemaStore::Header header; + SchemaStore::LegacyHeader header; header.magic = SchemaStore::Header::kMagic; header.checksum = 10; // Arbitrary garbage checksum filesystem_.DeleteFile(header_file.c_str()); @@ -348,7 +410,9 @@ TEST_F(SchemaStoreTest, CreateWithPreviousSchemaOk) { SchemaStore::SetSchemaResult result; result.success = true; result.schema_types_new_by_name.insert(schema_.types(0).schema_type()); - EXPECT_THAT(schema_store->SetSchema(schema_), + EXPECT_THAT(schema_store->SetSchema( + schema_, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), IsOkAndHolds(EqualsSetSchemaResult(result))); schema_store.reset(); @@ -374,7 +438,9 @@ TEST_F(SchemaStoreTest, MultipleCreateOk) { SchemaStore::SetSchemaResult result; result.success = true; result.schema_types_new_by_name.insert(schema_.types(0).schema_type()); - EXPECT_THAT(schema_store->SetSchema(schema_), + EXPECT_THAT(schema_store->SetSchema( + schema_, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), IsOkAndHolds(EqualsSetSchemaResult(result))); // Verify that our in-memory structures are ok @@ -419,7 +485,9 @@ TEST_F(SchemaStoreTest, SetNewSchemaOk) { SchemaStore::SetSchemaResult result; result.success = true; result.schema_types_new_by_name.insert(schema_.types(0).schema_type()); - EXPECT_THAT(schema_store->SetSchema(schema_), + EXPECT_THAT(schema_store->SetSchema( + schema_, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), IsOkAndHolds(EqualsSetSchemaResult(result))); ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema, schema_store->GetSchema()); @@ -435,7 +503,9 @@ TEST_F(SchemaStoreTest, SetSameSchemaOk) { SchemaStore::SetSchemaResult result; result.success = true; result.schema_types_new_by_name.insert(schema_.types(0).schema_type()); - EXPECT_THAT(schema_store->SetSchema(schema_), + EXPECT_THAT(schema_store->SetSchema( + schema_, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), IsOkAndHolds(EqualsSetSchemaResult(result))); ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema, schema_store->GetSchema()); @@ -444,7 +514,9 @@ TEST_F(SchemaStoreTest, SetSameSchemaOk) { // And one more for fun result = SchemaStore::SetSchemaResult(); result.success = true; - EXPECT_THAT(schema_store->SetSchema(schema_), + EXPECT_THAT(schema_store->SetSchema( + schema_, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), IsOkAndHolds(EqualsSetSchemaResult(result))); ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema()); EXPECT_THAT(*actual_schema, EqualsProto(schema_)); @@ -459,7 +531,9 @@ TEST_F(SchemaStoreTest, SetIncompatibleSchemaOk) { SchemaStore::SetSchemaResult result; result.success = true; result.schema_types_new_by_name.insert(schema_.types(0).schema_type()); - EXPECT_THAT(schema_store->SetSchema(schema_), + EXPECT_THAT(schema_store->SetSchema( + schema_, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), IsOkAndHolds(EqualsSetSchemaResult(result))); ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema, schema_store->GetSchema()); @@ -473,7 +547,9 @@ TEST_F(SchemaStoreTest, SetIncompatibleSchemaOk) { result.success = false; result.schema_types_deleted_by_name.emplace("email"); result.schema_types_deleted_by_id.emplace(0); - EXPECT_THAT(schema_store->SetSchema(schema_), + EXPECT_THAT(schema_store->SetSchema( + schema_, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), IsOkAndHolds(EqualsSetSchemaResult(result))); } @@ -490,7 +566,9 @@ TEST_F(SchemaStoreTest, SetSchemaWithAddedTypeOk) { SchemaStore::SetSchemaResult result; result.success = true; result.schema_types_new_by_name.insert("email"); - EXPECT_THAT(schema_store->SetSchema(schema), + EXPECT_THAT(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), IsOkAndHolds(EqualsSetSchemaResult(result))); ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema, schema_store->GetSchema()); @@ -505,7 +583,9 @@ TEST_F(SchemaStoreTest, SetSchemaWithAddedTypeOk) { result = SchemaStore::SetSchemaResult(); result.success = true; result.schema_types_new_by_name.insert("new_type"); - EXPECT_THAT(schema_store->SetSchema(schema), + EXPECT_THAT(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), IsOkAndHolds(EqualsSetSchemaResult(result))); ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema()); EXPECT_THAT(*actual_schema, EqualsProto(schema)); @@ -527,7 +607,9 @@ TEST_F(SchemaStoreTest, SetSchemaWithDeletedTypeOk) { result.success = true; result.schema_types_new_by_name.insert("email"); result.schema_types_new_by_name.insert("message"); - EXPECT_THAT(schema_store->SetSchema(schema), + EXPECT_THAT(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), IsOkAndHolds(EqualsSetSchemaResult(result))); ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema, schema_store->GetSchema()); @@ -552,7 +634,9 @@ TEST_F(SchemaStoreTest, SetSchemaWithDeletedTypeOk) { old_email_schema_type_id); // Can't set the incompatible schema - EXPECT_THAT(schema_store->SetSchema(schema), + EXPECT_THAT(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), IsOkAndHolds(EqualsSetSchemaResult(incompatible_result))); SchemaStore::SetSchemaResult force_result; @@ -563,7 +647,8 @@ TEST_F(SchemaStoreTest, SetSchemaWithDeletedTypeOk) { // Force set the incompatible schema EXPECT_THAT(schema_store->SetSchema( - schema, /*ignore_errors_and_delete_documents=*/true), + schema, /*ignore_errors_and_delete_documents=*/true, + /*allow_circular_schema_definitions=*/false), IsOkAndHolds(EqualsSetSchemaResult(force_result))); ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema()); EXPECT_THAT(*actual_schema, EqualsProto(schema)); @@ -585,7 +670,9 @@ TEST_F(SchemaStoreTest, SetSchemaWithReorderedTypesOk) { result.success = true; result.schema_types_new_by_name.insert("email"); result.schema_types_new_by_name.insert("message"); - EXPECT_THAT(schema_store->SetSchema(schema), + EXPECT_THAT(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), IsOkAndHolds(EqualsSetSchemaResult(result))); ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema, schema_store->GetSchema()); @@ -606,7 +693,9 @@ TEST_F(SchemaStoreTest, SetSchemaWithReorderedTypesOk) { 1); // Old SchemaTypeId of "message" // Set the compatible schema - EXPECT_THAT(schema_store->SetSchema(schema), + EXPECT_THAT(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), IsOkAndHolds(EqualsSetSchemaResult(result))); ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema()); EXPECT_THAT(*actual_schema, EqualsProto(schema)); @@ -631,7 +720,9 @@ TEST_F(SchemaStoreTest, IndexedPropertyChangeRequiresReindexingOk) { SchemaStore::SetSchemaResult result; result.success = true; result.schema_types_new_by_name.insert("email"); - EXPECT_THAT(schema_store->SetSchema(schema), + EXPECT_THAT(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), IsOkAndHolds(EqualsSetSchemaResult(result))); ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema, schema_store->GetSchema()); @@ -650,7 +741,9 @@ TEST_F(SchemaStoreTest, IndexedPropertyChangeRequiresReindexingOk) { result = SchemaStore::SetSchemaResult(); result.success = true; result.schema_types_index_incompatible_by_name.insert("email"); - EXPECT_THAT(schema_store->SetSchema(schema), + EXPECT_THAT(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), IsOkAndHolds(EqualsSetSchemaResult(result))); ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema()); EXPECT_THAT(*actual_schema, EqualsProto(schema)); @@ -698,8 +791,11 @@ TEST_F(SchemaStoreTest, IndexNestedDocumentsChangeRequiresReindexingOk) { result.success = true; result.schema_types_new_by_name.insert("email"); result.schema_types_new_by_name.insert("person"); - EXPECT_THAT(schema_store->SetSchema(no_nested_index_schema), - IsOkAndHolds(EqualsSetSchemaResult(result))); + EXPECT_THAT( + schema_store->SetSchema(no_nested_index_schema, + /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOkAndHolds(EqualsSetSchemaResult(result))); ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema, schema_store->GetSchema()); EXPECT_THAT(*actual_schema, EqualsProto(no_nested_index_schema)); @@ -709,8 +805,11 @@ TEST_F(SchemaStoreTest, IndexNestedDocumentsChangeRequiresReindexingOk) { result = SchemaStore::SetSchemaResult(); result.success = true; result.schema_types_index_incompatible_by_name.insert("person"); - EXPECT_THAT(schema_store->SetSchema(nested_index_schema), - IsOkAndHolds(EqualsSetSchemaResult(result))); + EXPECT_THAT( + schema_store->SetSchema(nested_index_schema, + /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOkAndHolds(EqualsSetSchemaResult(result))); ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema()); EXPECT_THAT(*actual_schema, EqualsProto(nested_index_schema)); @@ -719,8 +818,11 @@ TEST_F(SchemaStoreTest, IndexNestedDocumentsChangeRequiresReindexingOk) { result = SchemaStore::SetSchemaResult(); result.success = true; result.schema_types_index_incompatible_by_name.insert("person"); - EXPECT_THAT(schema_store->SetSchema(no_nested_index_schema), - IsOkAndHolds(EqualsSetSchemaResult(result))); + EXPECT_THAT( + schema_store->SetSchema(no_nested_index_schema, + /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOkAndHolds(EqualsSetSchemaResult(result))); ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema()); EXPECT_THAT(*actual_schema, EqualsProto(no_nested_index_schema)); } @@ -744,7 +846,9 @@ TEST_F(SchemaStoreTest, SetSchemaWithIncompatibleTypesOk) { SchemaStore::SetSchemaResult result; result.success = true; result.schema_types_new_by_name.insert("email"); - EXPECT_THAT(schema_store->SetSchema(schema), + EXPECT_THAT(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), IsOkAndHolds(EqualsSetSchemaResult(result))); ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema, schema_store->GetSchema()); @@ -770,7 +874,9 @@ TEST_F(SchemaStoreTest, SetSchemaWithIncompatibleTypesOk) { old_email_schema_type_id); // Can't set the incompatible schema - EXPECT_THAT(schema_store->SetSchema(schema), + EXPECT_THAT(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), IsOkAndHolds(EqualsSetSchemaResult(incompatible_result))); SchemaStore::SetSchemaResult force_result; @@ -781,7 +887,8 @@ TEST_F(SchemaStoreTest, SetSchemaWithIncompatibleTypesOk) { // Force set the incompatible schema EXPECT_THAT(schema_store->SetSchema( - schema, /*ignore_errors_and_delete_documents=*/true), + schema, /*ignore_errors_and_delete_documents=*/true, + /*allow_circular_schema_definitions=*/false), IsOkAndHolds(EqualsSetSchemaResult(force_result))); ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema()); EXPECT_THAT(*actual_schema, EqualsProto(schema)); @@ -803,7 +910,9 @@ TEST_F(SchemaStoreTest, SetSchemaWithIncompatibleNestedTypesOk) { .SetCardinality(CARDINALITY_REPEATED)); SchemaProto old_schema = SchemaBuilder().AddType(contact_point_repeated_label).Build(); - ICING_EXPECT_OK(schema_store->SetSchema(old_schema)); + ICING_EXPECT_OK(schema_store->SetSchema( + old_schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId old_contact_point_type_id, schema_store->GetSchemaTypeId("ContactPoint")); @@ -839,7 +948,8 @@ TEST_F(SchemaStoreTest, SetSchemaWithIncompatibleNestedTypesOk) { expected_result.schema_types_new_by_name.insert("Person"); EXPECT_THAT( schema_store->SetSchema(new_schema, - /*ignore_errors_and_delete_documents=*/false), + /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), IsOkAndHolds(EqualsSetSchemaResult(expected_result))); ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema, schema_store->GetSchema()); @@ -850,7 +960,8 @@ TEST_F(SchemaStoreTest, SetSchemaWithIncompatibleNestedTypesOk) { expected_result.success = true; EXPECT_THAT( schema_store->SetSchema(new_schema, - /*ignore_errors_and_delete_documents=*/true), + /*ignore_errors_and_delete_documents=*/true, + /*allow_circular_schema_definitions=*/false), IsOkAndHolds(EqualsSetSchemaResult(expected_result))); ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema()); EXPECT_THAT(*actual_schema, EqualsProto(new_schema)); @@ -873,7 +984,9 @@ TEST_F(SchemaStoreTest, SetSchemaWithIndexIncompatibleNestedTypesOk) { .SetCardinality(CARDINALITY_REPEATED)); SchemaProto old_schema = SchemaBuilder().AddType(contact_point_prefix_label).Build(); - ICING_EXPECT_OK(schema_store->SetSchema(old_schema)); + ICING_EXPECT_OK(schema_store->SetSchema( + old_schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); // 2. Create a type that references the ContactPoint type and make a index // backwards incompatible change to ContactPoint @@ -905,7 +1018,8 @@ TEST_F(SchemaStoreTest, SetSchemaWithIndexIncompatibleNestedTypesOk) { expected_result.schema_types_new_by_name.insert("Person"); EXPECT_THAT( schema_store->SetSchema(new_schema, - /*ignore_errors_and_delete_documents=*/false), + /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), IsOkAndHolds(EqualsSetSchemaResult(expected_result))); ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema, schema_store->GetSchema()); @@ -928,7 +1042,9 @@ TEST_F(SchemaStoreTest, SetSchemaWithCompatibleNestedTypesOk) { .SetCardinality(CARDINALITY_OPTIONAL)); SchemaProto old_schema = SchemaBuilder().AddType(contact_point_optional_label).Build(); - ICING_EXPECT_OK(schema_store->SetSchema(old_schema)); + ICING_EXPECT_OK(schema_store->SetSchema( + old_schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); // 2. Create a type that references the ContactPoint type and make a backwards // compatible change to ContactPoint @@ -960,7 +1076,8 @@ TEST_F(SchemaStoreTest, SetSchemaWithCompatibleNestedTypesOk) { "ContactPoint"); expected_result.schema_types_new_by_name.insert("Person"); EXPECT_THAT(schema_store->SetSchema( - new_schema, /*ignore_errors_and_delete_documents=*/false), + new_schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), IsOkAndHolds(EqualsSetSchemaResult(expected_result))); ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema, schema_store->GetSchema()); @@ -988,7 +1105,9 @@ TEST_F(SchemaStoreTest, GetSchemaTypeId) { result.success = true; result.schema_types_new_by_name.insert(first_type); result.schema_types_new_by_name.insert(second_type); - EXPECT_THAT(schema_store->SetSchema(schema_), + EXPECT_THAT(schema_store->SetSchema( + schema_, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), IsOkAndHolds(EqualsSetSchemaResult(result))); EXPECT_THAT(schema_store->GetSchemaTypeId(first_type), IsOkAndHolds(0)); @@ -1012,7 +1131,9 @@ TEST_F(SchemaStoreTest, ComputeChecksumSameBetweenCalls) { SchemaProto foo_schema = SchemaBuilder().AddType(SchemaTypeConfigBuilder().SetType("foo")).Build(); - ICING_EXPECT_OK(schema_store->SetSchema(foo_schema)); + ICING_EXPECT_OK(schema_store->SetSchema( + foo_schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); ICING_ASSERT_OK_AND_ASSIGN(Crc32 checksum, schema_store->ComputeChecksum()); @@ -1028,7 +1149,9 @@ TEST_F(SchemaStoreTest, ComputeChecksumSameAcrossInstances) { SchemaProto foo_schema = SchemaBuilder().AddType(SchemaTypeConfigBuilder().SetType("foo")).Build(); - ICING_EXPECT_OK(schema_store->SetSchema(foo_schema)); + ICING_EXPECT_OK(schema_store->SetSchema( + foo_schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); ICING_ASSERT_OK_AND_ASSIGN(Crc32 checksum, schema_store->ComputeChecksum()); @@ -1049,7 +1172,9 @@ TEST_F(SchemaStoreTest, ComputeChecksumChangesOnModification) { SchemaProto foo_schema = SchemaBuilder().AddType(SchemaTypeConfigBuilder().SetType("foo")).Build(); - ICING_EXPECT_OK(schema_store->SetSchema(foo_schema)); + ICING_EXPECT_OK(schema_store->SetSchema( + foo_schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); ICING_ASSERT_OK_AND_ASSIGN(Crc32 checksum, schema_store->ComputeChecksum()); @@ -1060,7 +1185,9 @@ TEST_F(SchemaStoreTest, ComputeChecksumChangesOnModification) { .AddType(SchemaTypeConfigBuilder().SetType("bar")) .Build(); - ICING_EXPECT_OK(schema_store->SetSchema(foo_bar_schema)); + ICING_EXPECT_OK(schema_store->SetSchema( + foo_bar_schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); EXPECT_THAT(schema_store->ComputeChecksum(), IsOkAndHolds(Not(Eq(checksum)))); } @@ -1082,7 +1209,9 @@ TEST_F(SchemaStoreTest, PersistToDiskPreservesAcrossInstances) { SchemaProto schema = SchemaBuilder().AddType(SchemaTypeConfigBuilder().SetType("foo")).Build(); - ICING_EXPECT_OK(schema_store->SetSchema(schema)); + ICING_EXPECT_OK(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); // Persisting shouldn't change anything ICING_EXPECT_OK(schema_store->PersistToDisk()); @@ -1095,7 +1224,9 @@ TEST_F(SchemaStoreTest, PersistToDiskPreservesAcrossInstances) { schema = SchemaBuilder(schema) .AddType(SchemaTypeConfigBuilder().SetType("bar")) .Build(); - ICING_EXPECT_OK(schema_store->SetSchema(schema)); + ICING_EXPECT_OK(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); // Should also persist on destruction schema_store.reset(); @@ -1138,7 +1269,9 @@ TEST_F(SchemaStoreTest, SchemaStoreStorageInfoProto) { result.success = true; result.schema_types_new_by_name.insert("email"); result.schema_types_new_by_name.insert("fullSectionsType"); - EXPECT_THAT(schema_store->SetSchema(schema), + EXPECT_THAT(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), IsOkAndHolds(EqualsSetSchemaResult(result))); SchemaStoreStorageInfoProto storage_info = schema_store->GetStorageInfo(); @@ -1155,7 +1288,9 @@ TEST_F(SchemaStoreTest, GetDebugInfo) { // Set schema ASSERT_THAT( - schema_store->SetSchema(schema_), + schema_store->SetSchema(schema_, + /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), IsOkAndHolds(EqualsSetSchemaResult(SchemaStore::SetSchemaResult{ .success = true, .schema_types_new_by_name = {schema_.types(0).schema_type()}}))); @@ -1191,7 +1326,9 @@ TEST_F(SchemaStoreTest, InitializeRegenerateDerivedFilesFailure) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("Type")) .Build(); - ICING_ASSERT_OK(schema_store->SetSchema(std::move(schema))); + ICING_ASSERT_OK(schema_store->SetSchema( + std::move(schema), /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); } auto mock_filesystem = std::make_unique<MockFilesystem>(); @@ -1226,7 +1363,9 @@ TEST_F(SchemaStoreTest, SetSchemaRegenerateDerivedFilesFailure) { std::unique_ptr<SchemaStore> schema_store, SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); SchemaProto schema = SchemaBuilder().AddType(type).Build(); - ICING_ASSERT_OK(schema_store->SetSchema(std::move(schema))); + ICING_ASSERT_OK(schema_store->SetSchema( + std::move(schema), /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); } { @@ -1244,8 +1383,11 @@ TEST_F(SchemaStoreTest, SetSchemaRegenerateDerivedFilesFailure) { .AddType(type) .AddType(SchemaTypeConfigBuilder().SetType("Type2")) .Build(); - EXPECT_THAT(schema_store->SetSchema(std::move(schema)), - StatusIs(libtextclassifier3::StatusCode::INTERNAL)); + EXPECT_THAT( + schema_store->SetSchema(std::move(schema), + /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + StatusIs(libtextclassifier3::StatusCode::INTERNAL)); DocumentProto document = DocumentBuilder() .SetSchema("Type") @@ -1273,6 +1415,1648 @@ TEST_F(SchemaStoreTest, SetSchemaRegenerateDerivedFilesFailure) { } } +TEST_F(SchemaStoreTest, CanCheckForPropertiesDefinedInSchema) { + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaStore> schema_store, + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); + + // Set it for the first time + SchemaStore::SetSchemaResult result; + result.success = true; + result.schema_types_new_by_name.insert(schema_.types(0).schema_type()); + + // Don't use schema_ defined in the test suite, as we want to make sure that + // the test is written correctly without referring to what the suite has + // defined. + SchemaProto schema = + SchemaBuilder() + .AddType( + SchemaTypeConfigBuilder() + .SetType("email") + .AddProperty( + // Add an indexed property so we generate + // section metadata on it + PropertyConfigBuilder() + .SetName("subject") + .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("timestamp") + .SetDataTypeInt64(NUMERIC_MATCH_RANGE) + .SetCardinality(CARDINALITY_OPTIONAL))) + .Build(); + + EXPECT_THAT(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOkAndHolds(EqualsSetSchemaResult(result))); + ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId schema_id, + schema_store->GetSchemaTypeId("email")); + EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(schema_id, "subject")); + EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(schema_id, "timestamp")); + EXPECT_FALSE(schema_store->IsPropertyDefinedInSchema(schema_id, "foobar")); +} + +TEST_F(SchemaStoreTest, GetSchemaTypeIdsWithChildren) { + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaStore> schema_store, + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); + + // Create a schema with the following inheritance relation: + // A + // / \ + // B E + // / \ + // C D + // | + // F + SchemaTypeConfigProto type_a = SchemaTypeConfigBuilder().SetType("A").Build(); + SchemaTypeConfigProto type_b = + SchemaTypeConfigBuilder().SetType("B").AddParentType("A").Build(); + SchemaTypeConfigProto type_c = + SchemaTypeConfigBuilder().SetType("C").AddParentType("B").Build(); + SchemaTypeConfigProto type_d = + SchemaTypeConfigBuilder().SetType("D").AddParentType("B").Build(); + SchemaTypeConfigProto type_e = + SchemaTypeConfigBuilder().SetType("E").AddParentType("A").Build(); + SchemaTypeConfigProto type_f = + SchemaTypeConfigBuilder().SetType("F").AddParentType("D").Build(); + SchemaProto schema = SchemaBuilder() + .AddType(type_a) + .AddType(type_b) + .AddType(type_c) + .AddType(type_d) + .AddType(type_e) + .AddType(type_f) + .Build(); + ICING_ASSERT_OK(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); + + // Get schema type id for each type. + ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId type_a_id, + schema_store->GetSchemaTypeId("A")); + ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId type_b_id, + schema_store->GetSchemaTypeId("B")); + ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId type_c_id, + schema_store->GetSchemaTypeId("C")); + ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId type_d_id, + schema_store->GetSchemaTypeId("D")); + ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId type_e_id, + schema_store->GetSchemaTypeId("E")); + ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId type_f_id, + schema_store->GetSchemaTypeId("F")); + + // Check the results from GetSchemaTypeIdsWithChildren + EXPECT_THAT( + schema_store->GetSchemaTypeIdsWithChildren("A"), + IsOkAndHolds(Pointee(UnorderedElementsAre( + type_a_id, type_b_id, type_c_id, type_d_id, type_e_id, type_f_id)))); + EXPECT_THAT(schema_store->GetSchemaTypeIdsWithChildren("B"), + IsOkAndHolds(Pointee(UnorderedElementsAre( + type_b_id, type_c_id, type_d_id, type_f_id)))); + EXPECT_THAT(schema_store->GetSchemaTypeIdsWithChildren("C"), + IsOkAndHolds(Pointee(UnorderedElementsAre(type_c_id)))); + EXPECT_THAT( + schema_store->GetSchemaTypeIdsWithChildren("D"), + IsOkAndHolds(Pointee(UnorderedElementsAre(type_d_id, type_f_id)))); + EXPECT_THAT(schema_store->GetSchemaTypeIdsWithChildren("E"), + IsOkAndHolds(Pointee(UnorderedElementsAre(type_e_id)))); + EXPECT_THAT(schema_store->GetSchemaTypeIdsWithChildren("F"), + IsOkAndHolds(Pointee(UnorderedElementsAre(type_f_id)))); +} + +TEST_F(SchemaStoreTest, DiamondGetSchemaTypeIdsWithChildren) { + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaStore> schema_store, + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); + + // Create a schema with the following inheritance relation: + // A + // / \ + // B E + // / \ / + // C D + // \ / + // F + SchemaTypeConfigProto type_a = SchemaTypeConfigBuilder().SetType("A").Build(); + SchemaTypeConfigProto type_b = + SchemaTypeConfigBuilder().SetType("B").AddParentType("A").Build(); + SchemaTypeConfigProto type_c = + SchemaTypeConfigBuilder().SetType("C").AddParentType("B").Build(); + SchemaTypeConfigProto type_d = SchemaTypeConfigBuilder() + .SetType("D") + .AddParentType("B") + .AddParentType("E") + .Build(); + SchemaTypeConfigProto type_e = + SchemaTypeConfigBuilder().SetType("E").AddParentType("A").Build(); + SchemaTypeConfigProto type_f = SchemaTypeConfigBuilder() + .SetType("F") + .AddParentType("C") + .AddParentType("D") + .Build(); + SchemaProto schema = SchemaBuilder() + .AddType(type_a) + .AddType(type_b) + .AddType(type_c) + .AddType(type_d) + .AddType(type_e) + .AddType(type_f) + .Build(); + ICING_ASSERT_OK(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); + + // Get schema type id for each type. + ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId type_a_id, + schema_store->GetSchemaTypeId("A")); + ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId type_b_id, + schema_store->GetSchemaTypeId("B")); + ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId type_c_id, + schema_store->GetSchemaTypeId("C")); + ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId type_d_id, + schema_store->GetSchemaTypeId("D")); + ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId type_e_id, + schema_store->GetSchemaTypeId("E")); + ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId type_f_id, + schema_store->GetSchemaTypeId("F")); + + // Check the results from GetSchemaTypeIdsWithChildren + EXPECT_THAT( + schema_store->GetSchemaTypeIdsWithChildren("A"), + IsOkAndHolds(Pointee(UnorderedElementsAre( + type_a_id, type_b_id, type_c_id, type_d_id, type_e_id, type_f_id)))); + EXPECT_THAT(schema_store->GetSchemaTypeIdsWithChildren("B"), + IsOkAndHolds(Pointee(UnorderedElementsAre( + type_b_id, type_c_id, type_d_id, type_f_id)))); + EXPECT_THAT( + schema_store->GetSchemaTypeIdsWithChildren("C"), + IsOkAndHolds(Pointee(UnorderedElementsAre(type_c_id, type_f_id)))); + EXPECT_THAT( + schema_store->GetSchemaTypeIdsWithChildren("D"), + IsOkAndHolds(Pointee(UnorderedElementsAre(type_d_id, type_f_id)))); + EXPECT_THAT(schema_store->GetSchemaTypeIdsWithChildren("E"), + IsOkAndHolds(Pointee( + UnorderedElementsAre(type_e_id, type_d_id, type_f_id)))); + EXPECT_THAT(schema_store->GetSchemaTypeIdsWithChildren("F"), + IsOkAndHolds(Pointee(UnorderedElementsAre(type_f_id)))); +} + +TEST_F(SchemaStoreTest, IndexableFieldsAreDefined) { + SchemaTypeConfigProto email_type = + SchemaTypeConfigBuilder() + .SetType("Email") + .AddProperty( + PropertyConfigBuilder() + .SetName("subject") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_REQUIRED)) + .AddProperty( + PropertyConfigBuilder() + .SetName("senderQualifiedId") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID, + /*propagate_delete=*/true) + .SetCardinality(CARDINALITY_REQUIRED)) + .AddProperty(PropertyConfigBuilder() + .SetName("recipients") + .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_REPEATED)) + .AddProperty(PropertyConfigBuilder() + .SetName("recipientIds") + .SetDataTypeInt64(NUMERIC_MATCH_RANGE) + .SetCardinality(CARDINALITY_REPEATED)) + .AddProperty(PropertyConfigBuilder() + .SetName("timestamp") + .SetDataTypeInt64(NUMERIC_MATCH_RANGE) + .SetCardinality(CARDINALITY_REQUIRED)) + .Build(); + + SchemaProto schema = SchemaBuilder().AddType(email_type).Build(); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaStore> schema_store, + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); + ICING_ASSERT_OK(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/true)); + constexpr SchemaTypeId kTypeEmailSchemaId = 0; + + // Indexables. + EXPECT_TRUE( + schema_store->IsPropertyDefinedInSchema(kTypeEmailSchemaId, "subject")); + EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeEmailSchemaId, + "senderQualifiedId")); + EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeEmailSchemaId, + "recipients")); + EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeEmailSchemaId, + "recipientIds")); + EXPECT_TRUE( + schema_store->IsPropertyDefinedInSchema(kTypeEmailSchemaId, "timestamp")); +} + +TEST_F(SchemaStoreTest, JoinableFieldsAreDefined) { + SchemaTypeConfigProto email_type = + SchemaTypeConfigBuilder() + .SetType("Email") + .AddProperty(PropertyConfigBuilder() + .SetName("tagQualifiedId") + .SetDataType(TYPE_STRING) + .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID, + /*propagate_delete=*/true) + .SetCardinality(CARDINALITY_REQUIRED)) + .AddProperty( + PropertyConfigBuilder() + .SetName("senderQualifiedId") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID, + /*propagate_delete=*/true) + .SetCardinality(CARDINALITY_REQUIRED)) + .Build(); + + SchemaProto schema = SchemaBuilder().AddType(email_type).Build(); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaStore> schema_store, + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); + ICING_ASSERT_OK(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/true)); + constexpr SchemaTypeId kTypeEmailSchemaId = 0; + + // Joinables. + EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeEmailSchemaId, + "tagQualifiedId")); + EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeEmailSchemaId, + "senderQualifiedId")); +} + +TEST_F(SchemaStoreTest, NonIndexableFieldsAreDefined) { + SchemaTypeConfigProto email_type = + SchemaTypeConfigBuilder() + .SetType("Email") + .AddProperty( + PropertyConfigBuilder() + .SetName("text") + .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("attachment") + .SetDataType(TYPE_BYTES) + .SetCardinality(CARDINALITY_REQUIRED)) + .AddProperty(PropertyConfigBuilder() + .SetName("nonindexableInteger") + .SetDataType(TYPE_INT64) + .SetCardinality(CARDINALITY_REQUIRED)) + .Build(); + + SchemaProto schema = SchemaBuilder().AddType(email_type).Build(); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaStore> schema_store, + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); + ICING_ASSERT_OK(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/true)); + constexpr SchemaTypeId kTypeEmailSchemaId = 0; + + // Non-indexables. + EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeEmailSchemaId, + "attachment")); + EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeEmailSchemaId, + "nonindexableInteger")); + EXPECT_TRUE( + schema_store->IsPropertyDefinedInSchema(kTypeEmailSchemaId, "text")); +} + +TEST_F(SchemaStoreTest, NonExistentFieldsAreUndefined) { + SchemaTypeConfigProto email_type = + SchemaTypeConfigBuilder() + .SetType("Email") + .AddProperty( + PropertyConfigBuilder() + .SetName("subject") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_REQUIRED)) + .AddProperty( + PropertyConfigBuilder() + .SetName("senderQualifiedId") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID, + /*propagate_delete=*/true) + .SetCardinality(CARDINALITY_REQUIRED)) + .AddProperty(PropertyConfigBuilder() + .SetName("timestamp") + .SetDataTypeInt64(NUMERIC_MATCH_RANGE) + .SetCardinality(CARDINALITY_REQUIRED)) + .AddProperty(PropertyConfigBuilder() + .SetName("nonindexableInteger") + .SetDataType(TYPE_INT64) + .SetCardinality(CARDINALITY_REQUIRED)) + .Build(); + + SchemaProto schema = SchemaBuilder().AddType(email_type).Build(); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaStore> schema_store, + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); + ICING_ASSERT_OK(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/true)); + constexpr SchemaTypeId kTypeEmailSchemaId = 0; + + // Non-existents. + EXPECT_FALSE( + schema_store->IsPropertyDefinedInSchema(kTypeEmailSchemaId, "foobar")); + EXPECT_FALSE(schema_store->IsPropertyDefinedInSchema(kTypeEmailSchemaId, + "timestamp.foo")); + EXPECT_FALSE( + schema_store->IsPropertyDefinedInSchema(kTypeEmailSchemaId, "time")); +} + +TEST_F(SchemaStoreTest, NestedIndexableFieldsAreDefined) { + SchemaTypeConfigProto email_type = + SchemaTypeConfigBuilder() + .SetType("Email") + .AddProperty(PropertyConfigBuilder() + .SetName("tagQualifiedId") + .SetDataType(TYPE_STRING) + .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID, + /*propagate_delete=*/true) + .SetCardinality(CARDINALITY_REQUIRED)) + .AddProperty( + PropertyConfigBuilder() + .SetName("subject") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_REQUIRED)) + .AddProperty( + PropertyConfigBuilder() + .SetName("text") + .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("timestamp") + .SetDataTypeInt64(NUMERIC_MATCH_RANGE) + .SetCardinality(CARDINALITY_REQUIRED)) + .Build(); + + SchemaTypeConfigProto conversation_type = + SchemaTypeConfigBuilder() + .SetType("Conversation") + .AddProperty(PropertyConfigBuilder() + .SetName("emails") + .SetDataTypeDocument( + "Email", /*index_nested_properties=*/true) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty( + PropertyConfigBuilder() + .SetName("nestedNonIndexable") + .SetDataTypeDocument("Email", + /*index_nested_properties=*/false) + .SetCardinality(CARDINALITY_OPTIONAL)) + .Build(); + SchemaProto schema = + SchemaBuilder().AddType(email_type).AddType(conversation_type).Build(); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaStore> schema_store, + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); + ICING_ASSERT_OK(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/true)); + constexpr SchemaTypeId kTypeConversationSchemaId = 1; + + // Indexables. + EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeConversationSchemaId, + "emails.subject")); + EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeConversationSchemaId, + "emails.timestamp")); +} + +TEST_F(SchemaStoreTest, NestedJoinableFieldsAreDefined) { + SchemaTypeConfigProto email_type = + SchemaTypeConfigBuilder() + .SetType("Email") + .AddProperty(PropertyConfigBuilder() + .SetName("tagQualifiedId") + .SetDataType(TYPE_STRING) + .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID, + /*propagate_delete=*/true) + .SetCardinality(CARDINALITY_REQUIRED)) + .AddProperty( + PropertyConfigBuilder() + .SetName("subject") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_REQUIRED)) + .AddProperty( + PropertyConfigBuilder() + .SetName("text") + .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("timestamp") + .SetDataTypeInt64(NUMERIC_MATCH_RANGE) + .SetCardinality(CARDINALITY_REQUIRED)) + .Build(); + + SchemaTypeConfigProto conversation_type = + SchemaTypeConfigBuilder() + .SetType("Conversation") + .AddProperty(PropertyConfigBuilder() + .SetName("emails") + .SetDataTypeDocument( + "Email", /*index_nested_properties=*/true) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty( + PropertyConfigBuilder() + .SetName("nestedNonIndexable") + .SetDataTypeDocument("Email", + /*index_nested_properties=*/false) + .SetCardinality(CARDINALITY_OPTIONAL)) + .Build(); + SchemaProto schema = + SchemaBuilder().AddType(email_type).AddType(conversation_type).Build(); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaStore> schema_store, + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); + ICING_ASSERT_OK(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/true)); + constexpr SchemaTypeId kTypeConversationSchemaId = 1; + + // Joinables. + EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeConversationSchemaId, + "emails.tagQualifiedId")); + EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema( + kTypeConversationSchemaId, "nestedNonIndexable.tagQualifiedId")); +} + +TEST_F(SchemaStoreTest, NestedNonIndexableFieldsAreDefined) { + SchemaTypeConfigProto email_type = + SchemaTypeConfigBuilder() + .SetType("Email") + .AddProperty(PropertyConfigBuilder() + .SetName("tagQualifiedId") + .SetDataType(TYPE_STRING) + .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID, + /*propagate_delete=*/true) + .SetCardinality(CARDINALITY_REQUIRED)) + .AddProperty( + PropertyConfigBuilder() + .SetName("subject") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_REQUIRED)) + .AddProperty( + PropertyConfigBuilder() + .SetName("text") + .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("timestamp") + .SetDataTypeInt64(NUMERIC_MATCH_RANGE) + .SetCardinality(CARDINALITY_REQUIRED)) + .Build(); + + SchemaTypeConfigProto conversation_type = + SchemaTypeConfigBuilder() + .SetType("Conversation") + .AddProperty(PropertyConfigBuilder() + .SetName("emails") + .SetDataTypeDocument( + "Email", /*index_nested_properties=*/true) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty( + PropertyConfigBuilder() + .SetName("nestedNonIndexable") + .SetDataTypeDocument("Email", + /*index_nested_properties=*/false) + .SetCardinality(CARDINALITY_OPTIONAL)) + .Build(); + SchemaProto schema = + SchemaBuilder().AddType(email_type).AddType(conversation_type).Build(); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaStore> schema_store, + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); + ICING_ASSERT_OK(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/true)); + constexpr SchemaTypeId kTypeConversationSchemaId = 1; + + // Non-indexables. + EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeConversationSchemaId, + "emails.text")); + EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema( + kTypeConversationSchemaId, "nestedNonIndexable.subject")); + EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema( + kTypeConversationSchemaId, "nestedNonIndexable.text")); + EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema( + kTypeConversationSchemaId, "nestedNonIndexable.timestamp")); +} + +TEST_F(SchemaStoreTest, NestedNonExistentFieldsAreUndefined) { + SchemaTypeConfigProto email_type = + SchemaTypeConfigBuilder() + .SetType("Email") + .AddProperty(PropertyConfigBuilder() + .SetName("tagQualifiedId") + .SetDataType(TYPE_STRING) + .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID, + /*propagate_delete=*/true) + .SetCardinality(CARDINALITY_REQUIRED)) + .AddProperty( + PropertyConfigBuilder() + .SetName("subject") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_REQUIRED)) + .AddProperty( + PropertyConfigBuilder() + .SetName("text") + .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("timestamp") + .SetDataTypeInt64(NUMERIC_MATCH_RANGE) + .SetCardinality(CARDINALITY_REQUIRED)) + .Build(); + + SchemaTypeConfigProto conversation_type = + SchemaTypeConfigBuilder() + .SetType("Conversation") + .AddProperty(PropertyConfigBuilder() + .SetName("emails") + .SetDataTypeDocument( + "Email", /*index_nested_properties=*/true) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty( + PropertyConfigBuilder() + .SetName("nestedNonIndexable") + .SetDataTypeDocument("Email", + /*index_nested_properties=*/false) + .SetCardinality(CARDINALITY_OPTIONAL)) + .Build(); + SchemaProto schema = + SchemaBuilder().AddType(email_type).AddType(conversation_type).Build(); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaStore> schema_store, + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); + ICING_ASSERT_OK(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/true)); + constexpr SchemaTypeId kTypeConversationSchemaId = 1; + + // Non-existents. + EXPECT_FALSE(schema_store->IsPropertyDefinedInSchema( + kTypeConversationSchemaId, "emails.foobar")); + EXPECT_FALSE(schema_store->IsPropertyDefinedInSchema( + kTypeConversationSchemaId, "nestedNonIndexable.foobar")); + EXPECT_FALSE(schema_store->IsPropertyDefinedInSchema( + kTypeConversationSchemaId, "emails.timestamp.foo")); + EXPECT_FALSE(schema_store->IsPropertyDefinedInSchema( + kTypeConversationSchemaId, "emails.time")); +} + +TEST_F(SchemaStoreTest, IntermediateDocumentPropertiesAreDefined) { + SchemaTypeConfigProto email_type = + SchemaTypeConfigBuilder() + .SetType("Email") + .AddProperty(PropertyConfigBuilder() + .SetName("tagQualifiedId") + .SetDataType(TYPE_STRING) + .SetJoinable(JOINABLE_VALUE_TYPE_QUALIFIED_ID, + /*propagate_delete=*/true) + .SetCardinality(CARDINALITY_REQUIRED)) + .AddProperty( + PropertyConfigBuilder() + .SetName("subject") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_REQUIRED)) + .AddProperty( + PropertyConfigBuilder() + .SetName("text") + .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("timestamp") + .SetDataTypeInt64(NUMERIC_MATCH_RANGE) + .SetCardinality(CARDINALITY_REQUIRED)) + .Build(); + + SchemaTypeConfigProto conversation_type = + SchemaTypeConfigBuilder() + .SetType("Conversation") + .AddProperty(PropertyConfigBuilder() + .SetName("emails") + .SetDataTypeDocument( + "Email", /*index_nested_properties=*/true) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty( + PropertyConfigBuilder() + .SetName("nestedNonIndexable") + .SetDataTypeDocument("Email", + /*index_nested_properties=*/false) + .SetCardinality(CARDINALITY_OPTIONAL)) + .Build(); + SchemaProto schema = + SchemaBuilder().AddType(email_type).AddType(conversation_type).Build(); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaStore> schema_store, + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); + ICING_ASSERT_OK(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/true)); + constexpr SchemaTypeId kTypeConversationSchemaId = 1; + + // Intermediate documents props. + EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeConversationSchemaId, + "emails")); + EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeConversationSchemaId, + "nestedNonIndexable")); +} + +TEST_F(SchemaStoreTest, CyclePathsAreDefined) { + SchemaTypeConfigProto type_a = + SchemaTypeConfigBuilder() + .SetType("A") + .AddProperty( + PropertyConfigBuilder() + .SetName("subject") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_REQUIRED)) + .AddProperty( + PropertyConfigBuilder() + .SetName("b") + .SetDataTypeDocument("B", /*index_nested_properties=*/true) + .SetCardinality(CARDINALITY_OPTIONAL)) + .Build(); + + SchemaTypeConfigProto type_b = + SchemaTypeConfigBuilder() + .SetType("B") + .AddProperty( + PropertyConfigBuilder() + .SetName("body") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_REQUIRED)) + .AddProperty( + PropertyConfigBuilder() + .SetName("a") + .SetDataTypeDocument("A", /*index_nested_properties=*/false) + .SetCardinality(CARDINALITY_OPTIONAL)) + .Build(); + SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build(); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaStore> schema_store, + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); + ICING_ASSERT_OK(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/true)); + constexpr SchemaTypeId kTypeASchemaId = 0; + constexpr SchemaTypeId kTypeBSchemaId = 1; + + // A's top-level properties + EXPECT_TRUE( + schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "subject")); + EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "b")); + + // A's nested properties in B + EXPECT_TRUE( + schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "b.body")); + EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "b.a")); + + // A's nested properties in B's nested property in A + EXPECT_TRUE( + schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "b.a.subject")); + EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "b.a.b")); + + // B's top-level properties + EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "body")); + EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "a")); + + // B's nested properties in A + EXPECT_TRUE( + schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "a.subject")); + EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "a.b")); + + // B's nested properties in A's nested property in B + EXPECT_TRUE( + schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "a.b.body")); + EXPECT_TRUE(schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "a.b.a")); +} + +TEST_F(SchemaStoreTest, WrongTypeCyclePathsAreUndefined) { + SchemaTypeConfigProto type_a = + SchemaTypeConfigBuilder() + .SetType("A") + .AddProperty( + PropertyConfigBuilder() + .SetName("subject") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_REQUIRED)) + .AddProperty( + PropertyConfigBuilder() + .SetName("b") + .SetDataTypeDocument("B", /*index_nested_properties=*/true) + .SetCardinality(CARDINALITY_OPTIONAL)) + .Build(); + + SchemaTypeConfigProto type_b = + SchemaTypeConfigBuilder() + .SetType("B") + .AddProperty( + PropertyConfigBuilder() + .SetName("body") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_REQUIRED)) + .AddProperty( + PropertyConfigBuilder() + .SetName("a") + .SetDataTypeDocument("A", /*index_nested_properties=*/false) + .SetCardinality(CARDINALITY_OPTIONAL)) + .Build(); + SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build(); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaStore> schema_store, + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); + ICING_ASSERT_OK(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/true)); + constexpr SchemaTypeId kTypeASchemaId = 0; + constexpr SchemaTypeId kTypeBSchemaId = 1; + + // The same paths as above, but we check the wrong types instead. + // A's top-level properties + EXPECT_FALSE( + schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "subject")); + EXPECT_FALSE(schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "b")); + + // A's nested properties in B + EXPECT_FALSE( + schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "b.body")); + EXPECT_FALSE(schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "b.a")); + + // A's nested properties in B's nested property in A + EXPECT_FALSE( + schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "b.a.subject")); + EXPECT_FALSE( + schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "b.a.b")); + + // B's top-level properties + EXPECT_FALSE(schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "body")); + EXPECT_FALSE(schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "a")); + + // B's nested properties in A + EXPECT_FALSE( + schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "a.subject")); + EXPECT_FALSE(schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "a.b")); + + // B's nested properties in A's nested property in B + EXPECT_FALSE( + schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "a.b.body")); + EXPECT_FALSE( + schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "a.b.a")); +} + +TEST_F(SchemaStoreTest, CyclePathsNonexistentPropertiesAreUndefined) { + SchemaTypeConfigProto type_a = + SchemaTypeConfigBuilder() + .SetType("A") + .AddProperty( + PropertyConfigBuilder() + .SetName("subject") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_REQUIRED)) + .AddProperty( + PropertyConfigBuilder() + .SetName("b") + .SetDataTypeDocument("B", /*index_nested_properties=*/true) + .SetCardinality(CARDINALITY_OPTIONAL)) + .Build(); + + SchemaTypeConfigProto type_b = + SchemaTypeConfigBuilder() + .SetType("B") + .AddProperty( + PropertyConfigBuilder() + .SetName("body") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_REQUIRED)) + .AddProperty( + PropertyConfigBuilder() + .SetName("a") + .SetDataTypeDocument("A", /*index_nested_properties=*/false) + .SetCardinality(CARDINALITY_OPTIONAL)) + .Build(); + SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build(); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaStore> schema_store, + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); + ICING_ASSERT_OK(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/true)); + constexpr SchemaTypeId kTypeASchemaId = 0; + constexpr SchemaTypeId kTypeBSchemaId = 1; + + // Undefined paths in A + EXPECT_FALSE( + schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "b.subject")); + EXPECT_FALSE( + schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "b.a.body")); + EXPECT_FALSE( + schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "b.a.a")); + EXPECT_FALSE( + schema_store->IsPropertyDefinedInSchema(kTypeASchemaId, "b.a.subject.b")); + + // Undefined paths in B + EXPECT_FALSE( + schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "a.body")); + EXPECT_FALSE( + schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "a.b.subject")); + EXPECT_FALSE( + schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "a.b.b")); + EXPECT_FALSE( + schema_store->IsPropertyDefinedInSchema(kTypeBSchemaId, "a.b.body.a")); +} + +TEST_F(SchemaStoreTest, LoadsOverlaySchemaOnInit) { + // Create a schema that is rollback incompatible and will trigger us to create + // an overlay schema. + PropertyConfigBuilder indexed_string_property_builder = + PropertyConfigBuilder() + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN); + SchemaTypeConfigProto type_a = + SchemaTypeConfigBuilder() + .SetType("type_a") + .AddProperty(indexed_string_property_builder.SetName("prop0")) + .AddProperty( + PropertyConfigBuilder() + .SetName("propRfc") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822)) + .Build(); + SchemaTypeConfigProto type_b = + SchemaTypeConfigBuilder() + .SetType("type_b") + .AddProperty(indexed_string_property_builder.SetName("prop0")) + .Build(); + SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build(); + + { + // Create an instance of the schema store and set the schema. + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaStore> schema_store, + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); + ICING_ASSERT_OK(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); + + EXPECT_THAT(schema_store->GetSchema(), + IsOkAndHolds(Pointee(EqualsProto(schema)))); + } + + { + // Create a new of the schema store and check that the same schema is + // present. + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaStore> schema_store, + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); + EXPECT_THAT(schema_store->GetSchema(), + IsOkAndHolds(Pointee(EqualsProto(schema)))); + + // The overlay should exist + std::string overlay_schema_path = schema_store_dir_ + "/overlay_schema.pb"; + ASSERT_TRUE(filesystem_.FileExists(overlay_schema_path.c_str())); + + // The base schema should hold a compatible schema + SchemaTypeConfigProto modified_type_a = + SchemaTypeConfigBuilder() + .SetType("type_a") + .AddProperty(indexed_string_property_builder.SetName("prop0")) + .AddProperty(PropertyConfigBuilder() + .SetName("propRfc") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataType(TYPE_STRING)) + .Build(); + SchemaProto expected_base_schema = + SchemaBuilder().AddType(modified_type_a).AddType(type_b).Build(); + std::string base_schema_path = schema_store_dir_ + "/schema.pb"; + auto base_schema_file_ = std::make_unique<FileBackedProto<SchemaProto>>( + filesystem_, base_schema_path); + ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* base_schema, + base_schema_file_->Read()); + EXPECT_THAT(*base_schema, EqualsProto(expected_base_schema)); + } +} + +TEST_F(SchemaStoreTest, LoadsBaseSchemaWithNoOverlayOnInit) { + // Create a normal schema that won't require an overlay. + PropertyConfigBuilder indexed_string_property_builder = + PropertyConfigBuilder() + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN); + SchemaTypeConfigProto type_a = + SchemaTypeConfigBuilder() + .SetType("type_a") + .AddProperty(indexed_string_property_builder.SetName("prop0")) + .AddProperty( + PropertyConfigBuilder() + .SetName("propRfc") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)) + .Build(); + SchemaTypeConfigProto type_b = + SchemaTypeConfigBuilder() + .SetType("type_b") + .AddProperty(indexed_string_property_builder.SetName("prop0")) + .Build(); + SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build(); + + { + // Create an instance of the schema store and set the schema. + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaStore> schema_store, + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); + ICING_ASSERT_OK(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); + + EXPECT_THAT(schema_store->GetSchema(), + IsOkAndHolds(Pointee(EqualsProto(schema)))); + } + + { + // Create a new instance of the schema store and check that the same schema + // is present. + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaStore> schema_store, + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); + EXPECT_THAT(schema_store->GetSchema(), + IsOkAndHolds(Pointee(EqualsProto(schema)))); + + // Additionally, the overlay should not exist + std::string overlay_schema_path = schema_store_dir_ + "/overlay_schema.pb"; + ASSERT_FALSE(filesystem_.FileExists(overlay_schema_path.c_str())); + } +} + +TEST_F(SchemaStoreTest, LoadSchemaBackupSchemaMissing) { + // Create a schema that is rollback incompatible and will trigger us to create + // a backup schema. + PropertyConfigBuilder indexed_string_property_builder = + PropertyConfigBuilder() + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN); + SchemaTypeConfigProto type_a = + SchemaTypeConfigBuilder() + .SetType("type_a") + .AddProperty(indexed_string_property_builder.SetName("prop0")) + .AddProperty( + PropertyConfigBuilder() + .SetName("propRfc") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822)) + .Build(); + SchemaTypeConfigProto type_b = + SchemaTypeConfigBuilder() + .SetType("type_b") + .AddProperty(indexed_string_property_builder.SetName("prop0")) + .Build(); + SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build(); + + { + // Create an instance of the schema store and set the schema. + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaStore> schema_store, + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); + ICING_ASSERT_OK(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); + + EXPECT_THAT(schema_store->GetSchema(), + IsOkAndHolds(Pointee(EqualsProto(schema)))); + } + + // Delete the backup schema. + std::string backup_schema_path = schema_store_dir_ + "/schema.pb"; + ASSERT_TRUE(filesystem_.DeleteFile(backup_schema_path.c_str())); + + { + // Create a new instance of the schema store and check that it fails because + // the backup schema is not available. + EXPECT_THAT( + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_), + StatusIs(libtextclassifier3::StatusCode::INTERNAL)); + } +} + +TEST_F(SchemaStoreTest, LoadSchemaOverlaySchemaMissing) { + // Create a schema that is rollback incompatible and will trigger us to create + // a backup schema. + PropertyConfigBuilder indexed_string_property_builder = + PropertyConfigBuilder() + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN); + SchemaTypeConfigProto type_a = + SchemaTypeConfigBuilder() + .SetType("type_a") + .AddProperty(indexed_string_property_builder.SetName("prop0")) + .AddProperty( + PropertyConfigBuilder() + .SetName("propRfc") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822)) + .Build(); + SchemaTypeConfigProto type_b = + SchemaTypeConfigBuilder() + .SetType("type_b") + .AddProperty(indexed_string_property_builder.SetName("prop0")) + .Build(); + SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build(); + + { + // Create an instance of the schema store and set the schema. + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaStore> schema_store, + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); + ICING_ASSERT_OK(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); + + EXPECT_THAT(schema_store->GetSchema(), + IsOkAndHolds(Pointee(EqualsProto(schema)))); + } + + // Delete the overlay schema. + std::string overlay_schema_path = schema_store_dir_ + "/overlay_schema.pb"; + ASSERT_TRUE(filesystem_.DeleteFile(overlay_schema_path.c_str())); + + { + // Create a new instance of the schema store and check that it fails because + // the overlay schema is not available when we expected it to be. + EXPECT_THAT( + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_), + StatusIs(libtextclassifier3::StatusCode::INTERNAL)); + } +} + +TEST_F(SchemaStoreTest, LoadSchemaHeaderMissing) { + // Create a schema that is rollback incompatible and will trigger us to create + // a backup schema. + PropertyConfigBuilder indexed_string_property_builder = + PropertyConfigBuilder() + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN); + SchemaTypeConfigProto type_a = + SchemaTypeConfigBuilder() + .SetType("type_a") + .AddProperty(indexed_string_property_builder.SetName("prop0")) + .AddProperty( + PropertyConfigBuilder() + .SetName("propRfc") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822)) + .Build(); + SchemaTypeConfigProto type_b = + SchemaTypeConfigBuilder() + .SetType("type_b") + .AddProperty(indexed_string_property_builder.SetName("prop0")) + .Build(); + SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build(); + + { + // Create an instance of the schema store and set the schema. + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaStore> schema_store, + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); + ICING_ASSERT_OK(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); + + EXPECT_THAT(schema_store->GetSchema(), + IsOkAndHolds(Pointee(EqualsProto(schema)))); + } + + // Delete the overlay schema. + std::string schema_header_path = schema_store_dir_ + "/schema_store_header"; + ASSERT_TRUE(filesystem_.DeleteFile(schema_header_path.c_str())); + + { + // Create a new of the schema store and check that the same schema is + // present. + EXPECT_THAT( + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_), + StatusIs(libtextclassifier3::StatusCode::INTERNAL)); + } +} + +TEST_F(SchemaStoreTest, LoadSchemaNoOverlayHeaderMissing) { + // Create a normal schema that won't require a backup. + PropertyConfigBuilder indexed_string_property_builder = + PropertyConfigBuilder() + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN); + SchemaTypeConfigProto type_a = + SchemaTypeConfigBuilder() + .SetType("type_a") + .AddProperty(indexed_string_property_builder.SetName("prop0")) + .AddProperty( + PropertyConfigBuilder() + .SetName("propRfc") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)) + .Build(); + SchemaTypeConfigProto type_b = + SchemaTypeConfigBuilder() + .SetType("type_b") + .AddProperty(indexed_string_property_builder.SetName("prop0")) + .Build(); + SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build(); + + { + // Create an instance of the schema store and set the schema. + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaStore> schema_store, + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); + ICING_ASSERT_OK(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); + + EXPECT_THAT(schema_store->GetSchema(), + IsOkAndHolds(Pointee(EqualsProto(schema)))); + } + + // Delete the schema header. + std::string schema_header_path = schema_store_dir_ + "/schema_store_header"; + ASSERT_TRUE(filesystem_.DeleteFile(schema_header_path.c_str())); + + { + // Create a new instance of the schema store and check that it fails because + // the schema header (which is now a part of the ground truth) is not + // available. + EXPECT_THAT( + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_), + StatusIs(libtextclassifier3::StatusCode::INTERNAL)); + } +} + +TEST_F(SchemaStoreTest, MigrateSchemaCompatibleNoChange) { + // Create a schema that is rollback incompatible and will trigger us to create + // a backup schema. + SchemaTypeConfigProto type_a = + SchemaTypeConfigBuilder() + .SetType("type_a") + .AddProperty( + PropertyConfigBuilder() + .SetName("propRfc") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822)) + .Build(); + SchemaProto schema = SchemaBuilder().AddType(type_a).Build(); + + { + // Create an instance of the schema store and set the schema. + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaStore> schema_store, + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); + ICING_ASSERT_OK(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); + + EXPECT_THAT(schema_store->GetSchema(), + IsOkAndHolds(Pointee(EqualsProto(schema)))); + } + + ICING_EXPECT_OK(SchemaStore::MigrateSchema( + &filesystem_, schema_store_dir_, version_util::StateChange::kCompatible, + version_util::kVersion)); + + { + // Create a new of the schema store and check that the same schema is + // present. + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaStore> schema_store, + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); + EXPECT_THAT(schema_store->GetSchema(), + IsOkAndHolds(Pointee(EqualsProto(schema)))); + } +} + +TEST_F(SchemaStoreTest, MigrateSchemaUpgradeNoChange) { + // Create a schema that is rollback incompatible and will trigger us to create + // a backup schema. + SchemaTypeConfigProto type_a = + SchemaTypeConfigBuilder() + .SetType("type_a") + .AddProperty( + PropertyConfigBuilder() + .SetName("propRfc") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822)) + .Build(); + SchemaProto schema = SchemaBuilder().AddType(type_a).Build(); + + { + // Create an instance of the schema store and set the schema. + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaStore> schema_store, + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); + ICING_ASSERT_OK(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); + + EXPECT_THAT(schema_store->GetSchema(), + IsOkAndHolds(Pointee(EqualsProto(schema)))); + } + + ICING_EXPECT_OK(SchemaStore::MigrateSchema( + &filesystem_, schema_store_dir_, version_util::StateChange::kUpgrade, + version_util::kVersion + 1)); + + { + // Create a new of the schema store and check that the same schema is + // present. + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaStore> schema_store, + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); + EXPECT_THAT(schema_store->GetSchema(), + IsOkAndHolds(Pointee(EqualsProto(schema)))); + } +} + +TEST_F(SchemaStoreTest, MigrateSchemaVersionZeroUpgradeNoChange) { + // Because we are upgrading from version zero, the schema must be compatible + // with version zero. + SchemaTypeConfigProto type_a = + SchemaTypeConfigBuilder() + .SetType("type_a") + .AddProperty( + PropertyConfigBuilder() + .SetName("propRfc") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)) + .Build(); + SchemaProto schema = SchemaBuilder().AddType(type_a).Build(); + + { + // Create an instance of the schema store and set the schema. + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaStore> schema_store, + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); + ICING_ASSERT_OK(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); + + EXPECT_THAT(schema_store->GetSchema(), + IsOkAndHolds(Pointee(EqualsProto(schema)))); + } + + ICING_EXPECT_OK( + SchemaStore::MigrateSchema(&filesystem_, schema_store_dir_, + version_util::StateChange::kVersionZeroUpgrade, + version_util::kVersion + 1)); + + { + // Create a new of the schema store and check that the same schema is + // present. + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaStore> schema_store, + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); + EXPECT_THAT(schema_store->GetSchema(), + IsOkAndHolds(Pointee(EqualsProto(schema)))); + } +} + +TEST_F(SchemaStoreTest, MigrateSchemaRollbackDiscardsOverlaySchema) { + // Because we are upgrading from version zero, the schema must be compatible + // with version zero. + SchemaTypeConfigProto type_a = + SchemaTypeConfigBuilder() + .SetType("type_a") + .AddProperty( + PropertyConfigBuilder() + .SetName("propRfc") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822)) + .Build(); + SchemaProto schema = SchemaBuilder().AddType(type_a).Build(); + + { + // Create an instance of the schema store and set the schema. + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaStore> schema_store, + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); + ICING_ASSERT_OK(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); + + EXPECT_THAT(schema_store->GetSchema(), + IsOkAndHolds(Pointee(EqualsProto(schema)))); + } + + // Rollback to a version before kVersion. The schema header will declare that + // the overlay is compatible with any version starting with kVersion. So + // kVersion - 1 is incompatible and will throw out the schema. + ICING_EXPECT_OK(SchemaStore::MigrateSchema( + &filesystem_, schema_store_dir_, version_util::StateChange::kRollBack, + version_util::kVersion - 1)); + + { + // Create a new of the schema store and check that we fell back to the + // base schema. + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaStore> schema_store, + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); + + SchemaTypeConfigProto other_type_a = + SchemaTypeConfigBuilder() + .SetType("type_a") + .AddProperty(PropertyConfigBuilder() + .SetName("propRfc") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataType(TYPE_STRING)) + .Build(); + SchemaProto base_schema = SchemaBuilder().AddType(other_type_a).Build(); + EXPECT_THAT(schema_store->GetSchema(), + IsOkAndHolds(Pointee(EqualsProto(base_schema)))); + } +} + +TEST_F(SchemaStoreTest, MigrateSchemaCompatibleRollbackKeepsOverlaySchema) { + // Because we are upgrading from version zero, the schema must be compatible + // with version zero. + SchemaTypeConfigProto type_a = + SchemaTypeConfigBuilder() + .SetType("type_a") + .AddProperty( + PropertyConfigBuilder() + .SetName("propRfc") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822)) + .Build(); + SchemaProto schema = SchemaBuilder().AddType(type_a).Build(); + + { + // Create an instance of the schema store and set the schema. + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaStore> schema_store, + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); + ICING_ASSERT_OK(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); + + EXPECT_THAT(schema_store->GetSchema(), + IsOkAndHolds(Pointee(EqualsProto(schema)))); + } + + // Rollback to kVersion. The schema header will declare that the overlay is + // compatible with any version starting with kVersion. So we will be + // compatible and retain the overlay schema. + ICING_EXPECT_OK(SchemaStore::MigrateSchema( + &filesystem_, schema_store_dir_, version_util::StateChange::kRollBack, + version_util::kVersion)); + + { + // Create a new of the schema store and check that the same schema is + // present. + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaStore> schema_store, + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); + EXPECT_THAT(schema_store->GetSchema(), + IsOkAndHolds(Pointee(EqualsProto(schema)))); + } +} + +TEST_F(SchemaStoreTest, MigrateSchemaRollforwardRetainsBaseSchema) { + SchemaTypeConfigProto type_a = + SchemaTypeConfigBuilder() + .SetType("type_a") + .AddProperty( + PropertyConfigBuilder() + .SetName("propRfc") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822)) + .Build(); + SchemaProto schema = SchemaBuilder().AddType(type_a).Build(); + { + // Create an instance of the schema store and set the schema. + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaStore> schema_store, + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); + ICING_ASSERT_OK(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); + + EXPECT_THAT(schema_store->GetSchema(), + IsOkAndHolds(Pointee(EqualsProto(schema)))); + } + + // Rollback to a version before kVersion. The schema header will declare that + // the overlay is compatible with any version starting with kVersion. So + // kVersion - 1 is incompatible and will throw out the schema. + ICING_EXPECT_OK(SchemaStore::MigrateSchema( + &filesystem_, schema_store_dir_, version_util::StateChange::kRollBack, + version_util::kVersion - 1)); + + SchemaTypeConfigProto other_type_a = + SchemaTypeConfigBuilder() + .SetType("type_a") + .AddProperty(PropertyConfigBuilder() + .SetName("propRfc") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataType(TYPE_STRING)) + .Build(); + SchemaProto base_schema = SchemaBuilder().AddType(other_type_a).Build(); + + { + // Create a new of the schema store and check that we fell back to the + // base schema. + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaStore> schema_store, + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); + + EXPECT_THAT(schema_store->GetSchema(), + IsOkAndHolds(Pointee(EqualsProto(base_schema)))); + } + + // Now rollforward to a new version. This should accept whatever schema is + // present (currently base schema) + ICING_EXPECT_OK(SchemaStore::MigrateSchema( + &filesystem_, schema_store_dir_, version_util::StateChange::kRollForward, + version_util::kVersion)); + { + // Create a new of the schema store and check that we fell back to the + // base schema. + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaStore> schema_store, + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); + + EXPECT_THAT(schema_store->GetSchema(), + IsOkAndHolds(Pointee(EqualsProto(base_schema)))); + } +} + +TEST_F(SchemaStoreTest, MigrateSchemaRollforwardRetainsOverlaySchema) { + SchemaTypeConfigProto type_a = + SchemaTypeConfigBuilder() + .SetType("type_a") + .AddProperty( + PropertyConfigBuilder() + .SetName("propRfc") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822)) + .Build(); + SchemaProto schema = SchemaBuilder().AddType(type_a).Build(); + { + // Create an instance of the schema store and set the schema. + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaStore> schema_store, + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); + ICING_ASSERT_OK(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); + + EXPECT_THAT(schema_store->GetSchema(), + IsOkAndHolds(Pointee(EqualsProto(schema)))); + } + + // Rollback to kVersion. The schema header will declare that the overlay is + // compatible with any version starting with kVersion. So we will be + // compatible and retain the overlay schema. + ICING_EXPECT_OK(SchemaStore::MigrateSchema( + &filesystem_, schema_store_dir_, version_util::StateChange::kRollBack, + version_util::kVersion)); + + { + // Create a new of the schema store and check that the same schema is + // present. + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaStore> schema_store, + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); + + EXPECT_THAT(schema_store->GetSchema(), + IsOkAndHolds(Pointee(EqualsProto(schema)))); + } + + // Now rollforward to a new version. This should accept whatever schema is + // present (currently overlay schema) + ICING_EXPECT_OK(SchemaStore::MigrateSchema( + &filesystem_, schema_store_dir_, version_util::StateChange::kRollForward, + version_util::kVersion)); + { + // Create a new of the schema store and check that the same schema is + // present. + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaStore> schema_store, + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); + + EXPECT_THAT(schema_store->GetSchema(), + IsOkAndHolds(Pointee(EqualsProto(schema)))); + } +} + +TEST_F(SchemaStoreTest, + MigrateSchemaVersionZeroRollforwardDiscardsOverlaySchema) { + SchemaTypeConfigProto type_a = + SchemaTypeConfigBuilder() + .SetType("type_a") + .AddProperty( + PropertyConfigBuilder() + .SetName("propRfc") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822)) + .Build(); + SchemaProto schema = SchemaBuilder().AddType(type_a).Build(); + { + // Create an instance of the schema store and set the schema. + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaStore> schema_store, + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); + ICING_ASSERT_OK(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); + + EXPECT_THAT(schema_store->GetSchema(), + IsOkAndHolds(Pointee(EqualsProto(schema)))); + } + + // A VersionZeroRollforward will always discard the overlay schema because it + // could be stale. + ICING_EXPECT_OK(SchemaStore::MigrateSchema( + &filesystem_, schema_store_dir_, + version_util::StateChange::kVersionZeroRollForward, + version_util::kVersion)); + + SchemaTypeConfigProto other_type_a = + SchemaTypeConfigBuilder() + .SetType("type_a") + .AddProperty(PropertyConfigBuilder() + .SetName("propRfc") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataType(TYPE_STRING)) + .Build(); + SchemaProto base_schema = SchemaBuilder().AddType(other_type_a).Build(); + + { + // Create a new of the schema store and check that we fell back to the + // base schema. + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaStore> schema_store, + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); + + EXPECT_THAT(schema_store->GetSchema(), + IsOkAndHolds(Pointee(EqualsProto(base_schema)))); + } +} + +TEST_F(SchemaStoreTest, MigrateSchemaVersionUndeterminedDiscardsOverlaySchema) { + SchemaTypeConfigProto type_a = + SchemaTypeConfigBuilder() + .SetType("type_a") + .AddProperty( + PropertyConfigBuilder() + .SetName("propRfc") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_RFC822)) + .Build(); + SchemaProto schema = SchemaBuilder().AddType(type_a).Build(); + { + // Create an instance of the schema store and set the schema. + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaStore> schema_store, + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); + ICING_ASSERT_OK(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); + + EXPECT_THAT(schema_store->GetSchema(), + IsOkAndHolds(Pointee(EqualsProto(schema)))); + } + + // An Undetermined will always discard the overlay schema because it doesn't + // know which state we're in and so it fallback to the base schema because + // it should always be valid. + ICING_EXPECT_OK(SchemaStore::MigrateSchema( + &filesystem_, schema_store_dir_, version_util::StateChange::kUndetermined, + version_util::kVersion)); + + SchemaTypeConfigProto other_type_a = + SchemaTypeConfigBuilder() + .SetType("type_a") + .AddProperty(PropertyConfigBuilder() + .SetName("propRfc") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataType(TYPE_STRING)) + .Build(); + SchemaProto base_schema = SchemaBuilder().AddType(other_type_a).Build(); + + { + // Create a new of the schema store and check that we fell back to the + // base schema. + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaStore> schema_store, + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); + + EXPECT_THAT(schema_store->GetSchema(), + IsOkAndHolds(Pointee(EqualsProto(base_schema)))); + } +} + } // namespace } // namespace lib diff --git a/icing/schema/schema-type-manager.cc b/icing/schema/schema-type-manager.cc index 7882db5..f3a86d4 100644 --- a/icing/schema/schema-type-manager.cc +++ b/icing/schema/schema-type-manager.cc @@ -15,6 +15,7 @@ #include "icing/schema/schema-type-manager.h" #include <memory> +#include <utility> #include "icing/text_classifier/lib3/utils/base/statusor.h" #include "icing/absl_ports/canonical_errors.h" diff --git a/icing/schema/schema-type-manager.h b/icing/schema/schema-type-manager.h index dc5f799..f2adbd9 100644 --- a/icing/schema/schema-type-manager.h +++ b/icing/schema/schema-type-manager.h @@ -16,6 +16,9 @@ #define ICING_SCHEMA_SCHEMA_TYPE_MANAGER_H_ #include <memory> +#include <string> +#include <unordered_set> +#include <vector> #include "icing/text_classifier/lib3/utils/base/statusor.h" #include "icing/schema/joinable-property-manager.h" @@ -30,6 +33,10 @@ namespace lib { // This class is a wrapper of SectionManager and JoinablePropertyManager. class SchemaTypeManager { public: + // Schema type ids are continuous, and so we use a vector instead of an + // unordered map for the mappings. + using SchemaTypeIdToPropertiesVector = + std::vector<std::unordered_set<std::string>>; // Factory function to create a SchemaTypeManager which does not take // ownership of any input components, and all pointers must refer to valid // objects that outlive the created SchemaTypeManager instance. diff --git a/icing/schema/schema-type-manager_test.cc b/icing/schema/schema-type-manager_test.cc index 93cbdee..eafc612 100644 --- a/icing/schema/schema-type-manager_test.cc +++ b/icing/schema/schema-type-manager_test.cc @@ -41,6 +41,7 @@ using ::testing::Pointee; // type and property names of EmailMessage static constexpr char kTypeEmail[] = "EmailMessage"; +static constexpr SchemaTypeId kTypeEmailSchemaId = 0; // indexable (in lexicographical order) static constexpr char kPropertyRecipientIds[] = "recipientIds"; static constexpr char kPropertyRecipients[] = "recipients"; @@ -57,6 +58,7 @@ static constexpr char kPropertyText[] = "text"; // type and property names of Conversation static constexpr char kTypeConversation[] = "Conversation"; +static constexpr SchemaTypeId kTypeConversationSchemaId = 1; // indexable (in lexicographical order) static constexpr char kPropertyEmails[] = "emails"; static constexpr char kPropertyGroupQualifiedId[] = @@ -208,8 +210,9 @@ TEST_F(SchemaTypeManagerTest, Create) { DynamicTrieKeyMapper<SchemaTypeId>::Create( filesystem_, test_dir_ + "/schema_type_mapper", /*maximum_size_bytes=*/3 * 128 * 1024)); - ICING_ASSERT_OK(schema_type_mapper->Put(kTypeEmail, 0)); - ICING_ASSERT_OK(schema_type_mapper->Put(kTypeConversation, 1)); + ICING_ASSERT_OK(schema_type_mapper->Put(kTypeEmail, kTypeEmailSchemaId)); + ICING_ASSERT_OK( + schema_type_mapper->Put(kTypeConversation, kTypeConversationSchemaId)); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<SchemaTypeManager> schema_type_manager, @@ -237,6 +240,7 @@ TEST_F(SchemaTypeManagerTest, Create) { EqualsSectionMetadata(/*expected_id=*/4, /*expected_property_path=*/"timestamp", CreateTimestampPropertyConfig()))))); + // In the Conversation type, "groupQualifiedId" and "name" are indexable // properties as are the indexable properties of the email in the "emails" // property. All properties of the email in the "nestedNonIndexable" property diff --git a/icing/schema/schema-util.cc b/icing/schema/schema-util.cc index f3f7aad..c85cc87 100644 --- a/icing/schema/schema-util.cc +++ b/icing/schema/schema-util.cc @@ -14,6 +14,7 @@ #include "icing/schema/schema-util.h" +#include <algorithm> #include <cstdint> #include <queue> #include <string> @@ -21,13 +22,13 @@ #include <unordered_map> #include <unordered_set> #include <utility> +#include <vector> #include "icing/text_classifier/lib3/utils/base/status.h" #include "icing/absl_ports/annotate.h" #include "icing/absl_ports/canonical_errors.h" #include "icing/absl_ports/str_cat.h" #include "icing/absl_ports/str_join.h" -#include "icing/legacy/core/icing-string-util.h" #include "icing/proto/schema.pb.h" #include "icing/proto/term.pb.h" #include "icing/util/logging.h" @@ -158,97 +159,273 @@ void AddIncompatibleChangeToDelta( } } +// Returns if C1 <= C2 based on the following rule, where C1 and C2 are +// cardinalities that can be one of REPEATED, OPTIONAL, or REQUIRED. +// +// Rule: REQUIRED < OPTIONAL < REPEATED +bool CardinalityLessThanEq(PropertyConfigProto::Cardinality::Code C1, + PropertyConfigProto::Cardinality::Code C2) { + if (C1 == C2) { + return true; + } + if (C1 == PropertyConfigProto::Cardinality::REQUIRED) { + return C2 == PropertyConfigProto::Cardinality::OPTIONAL || + C2 == PropertyConfigProto::Cardinality::REPEATED; + } + if (C1 == PropertyConfigProto::Cardinality::OPTIONAL) { + return C2 == PropertyConfigProto::Cardinality::REPEATED; + } + return false; +} + } // namespace -libtextclassifier3::Status ExpandTranstiveDependents( - const SchemaUtil::DependentMap& dependent_map, std::string_view type, - SchemaUtil::DependentMap* expanded_dependent_map, +libtextclassifier3::Status CalculateTransitiveNestedTypeRelations( + const SchemaUtil::DependentMap& direct_nested_types_map, + const std::unordered_set<std::string_view>& joinable_types, + std::string_view type, bool path_contains_joinable_property, + SchemaUtil::DependentMap* expanded_nested_types_map, + std::unordered_map<std::string_view, bool>&& + pending_expansion_paths_indexable, + std::unordered_set<std::string_view>* sink_types) { + // TODO(b/280698121): Implement optimizations to this code to avoid reentering + // a node after it's already been expanded. + + auto itr = direct_nested_types_map.find(type); + if (itr == direct_nested_types_map.end()) { + // It's a sink node. Just return. + sink_types->insert(type); + return libtextclassifier3::Status::OK; + } + std::unordered_map<std::string_view, std::vector<const PropertyConfigProto*>> + expanded_relations; + + // Add all of the adjacent outgoing relations. + expanded_relations.reserve(itr->second.size()); + expanded_relations.insert(itr->second.begin(), itr->second.end()); + + // Iterate through each adjacent outgoing relation and add their indirect + // outgoing relations. + for (const auto& [adjacent_type, adjacent_property_protos] : itr->second) { + // Make a copy of pending_expansion_paths_indexable for every iteration. + std::unordered_map<std::string_view, bool> pending_expansion_paths_copy( + pending_expansion_paths_indexable); + + // 1. Check the nested indexable config of the edge (type -> adjacent_type), + // and the joinable config of the current path up to adjacent_type. + // + // The nested indexable config is true if any of the PropertyConfigProtos + // representing the connecting edge has index_nested_properties=true. + bool is_edge_nested_indexable = std::any_of( + adjacent_property_protos.begin(), adjacent_property_protos.end(), + [](const PropertyConfigProto* property_config) { + return property_config->document_indexing_config() + .index_nested_properties(); + }); + // TODO(b/265304217): change this once we add joinable_properties_list. + // Check if addition of the new edge (type->adjacent_type) makes the path + // joinable. + bool new_path_contains_joinable_property = + joinable_types.count(type) > 0 || path_contains_joinable_property; + // Set is_nested_indexable field for the current edge + pending_expansion_paths_copy[type] = is_edge_nested_indexable; + + // If is_edge_nested_indexable=false, then all paths to adjacent_type + // currently in the pending_expansions map are also not nested indexable. + if (!is_edge_nested_indexable) { + for (auto& pending_expansion : pending_expansion_paths_copy) { + pending_expansion.second = false; + } + } + + // 2. Check if we're in the middle of expanding this type - IOW + // there's a cycle! + // + // This cycle is not allowed if either: + // 1. The cycle starting at adjacent_type is nested indexable, OR + // 2. The current path contains a joinable property. + auto adjacent_itr = pending_expansion_paths_copy.find(adjacent_type); + if (adjacent_itr != pending_expansion_paths_copy.end()) { + if (adjacent_itr->second || new_path_contains_joinable_property) { + return absl_ports::InvalidArgumentError(absl_ports::StrCat( + "Invalid cycle detected in type configs. '", type, + "' references itself and is nested-indexable or nested-joinable.")); + } + // The cycle is allowed and there's no need to keep iterating the loop. + // Move on to the next adjacent value. + continue; + } + + // 3. Expand this type as needed. + ICING_RETURN_IF_ERROR(CalculateTransitiveNestedTypeRelations( + direct_nested_types_map, joinable_types, adjacent_type, + new_path_contains_joinable_property, expanded_nested_types_map, + std::move(pending_expansion_paths_copy), sink_types)); + if (sink_types->count(adjacent_type) > 0) { + // "adjacent" is a sink node. Just skip to the next. + continue; + } + + // 4. "adjacent" has been fully expanded. Add all of its transitive + // outgoing relations to this type's transitive outgoing relations. + auto adjacent_expanded_itr = expanded_nested_types_map->find(adjacent_type); + expanded_relations.reserve(expanded_relations.size() + + adjacent_expanded_itr->second.size()); + for (const auto& [transitive_reachable, _] : + adjacent_expanded_itr->second) { + // Insert a transitive reachable node `transitive_reachable` for `type` if + // it wasn't previously reachable. + // Since there is no direct edge between `type` and `transitive_reachable` + // we insert an empty vector into the dependent map. + expanded_relations.insert({transitive_reachable, {}}); + } + } + for (const auto& kvp : expanded_relations) { + expanded_nested_types_map->operator[](type).insert(kvp); + } + return libtextclassifier3::Status::OK; +} + +template <typename T> +libtextclassifier3::Status CalculateAcyclicTransitiveRelations( + const SchemaUtil::TypeRelationMap<T>& direct_relation_map, + std::string_view type, + SchemaUtil::TypeRelationMap<T>* expanded_relation_map, std::unordered_set<std::string_view>* pending_expansions, - std::unordered_set<std::string_view>* orphaned_types) { - auto expanded_itr = expanded_dependent_map->find(type); - if (expanded_itr != expanded_dependent_map->end()) { + std::unordered_set<std::string_view>* sink_types) { + auto expanded_itr = expanded_relation_map->find(type); + if (expanded_itr != expanded_relation_map->end()) { // We've already expanded this type. Just return. return libtextclassifier3::Status::OK; } - auto itr = dependent_map.find(type); - if (itr == dependent_map.end()) { - // It's an orphan. Just return. - orphaned_types->insert(type); + auto itr = direct_relation_map.find(type); + if (itr == direct_relation_map.end()) { + // It's a sink node. Just return. + sink_types->insert(type); return libtextclassifier3::Status::OK; } pending_expansions->insert(type); - std::unordered_map<std::string_view, std::vector<const PropertyConfigProto*>> - expanded_dependents; + std::unordered_map<std::string_view, T> expanded_relations; - // Add all of the direct dependents. - expanded_dependents.reserve(itr->second.size()); - expanded_dependents.insert(itr->second.begin(), itr->second.end()); + // Add all of the adjacent outgoing relations. + expanded_relations.reserve(itr->second.size()); + expanded_relations.insert(itr->second.begin(), itr->second.end()); - // Iterate through each direct dependent and add their indirect dependents. - for (const auto& [dep, _] : itr->second) { + // Iterate through each adjacent outgoing relation and add their indirect + // outgoing relations. + for (const auto& [adjacent, _] : itr->second) { // 1. Check if we're in the middle of expanding this type - IOW there's a // cycle! - if (pending_expansions->count(dep) > 0) { + if (pending_expansions->count(adjacent) > 0) { return absl_ports::InvalidArgumentError( - absl_ports::StrCat("Infinite loop detected in type configs. '", type, - "' references itself.")); + absl_ports::StrCat("Invalid cycle detected in type configs. '", type, + "' references or inherits from itself.")); } // 2. Expand this type as needed. - ICING_RETURN_IF_ERROR( - ExpandTranstiveDependents(dependent_map, dep, expanded_dependent_map, - pending_expansions, orphaned_types)); - if (orphaned_types->count(dep) > 0) { - // Dep is an orphan. Just skip to the next dep. + ICING_RETURN_IF_ERROR(CalculateAcyclicTransitiveRelations( + direct_relation_map, adjacent, expanded_relation_map, + pending_expansions, sink_types)); + if (sink_types->count(adjacent) > 0) { + // "adjacent" is a sink node. Just skip to the next. continue; } - // 3. Dep has been fully expanded. Add all of its dependents to this - // type's dependents. - auto dep_expanded_itr = expanded_dependent_map->find(dep); - expanded_dependents.reserve(expanded_dependents.size() + - dep_expanded_itr->second.size()); - for (const auto& [dep_dependent, _] : dep_expanded_itr->second) { - // Insert a transitive dependent `dep_dependent` for `type`. Also since - // there is no direct edge between `type` and `dep_dependent`, the direct - // edge (i.e. PropertyConfigProto*) vector is empty. - expanded_dependents.insert({dep_dependent, {}}); + // 3. "adjacent" has been fully expanded. Add all of its transitive outgoing + // relations to this type's transitive outgoing relations. + auto adjacent_expanded_itr = expanded_relation_map->find(adjacent); + expanded_relations.reserve(expanded_relations.size() + + adjacent_expanded_itr->second.size()); + for (const auto& [transitive_reachable, _] : + adjacent_expanded_itr->second) { + // Insert a transitive reachable node `transitive_reachable` for `type`. + // Also since there is no direct edge between `type` and + // `transitive_reachable`, the direct edge is initialized by default. + expanded_relations.insert({transitive_reachable, T()}); } } - expanded_dependent_map->insert({type, std::move(expanded_dependents)}); + expanded_relation_map->insert({type, std::move(expanded_relations)}); pending_expansions->erase(type); return libtextclassifier3::Status::OK; } -// Calculate and return the transitive closure of dependent_map, which expands -// the dependent_map to also include indirect dependents +// Calculate and return the expanded nested-type map from +// direct_nested_type_map. This expands the direct_nested_type_map to also +// include indirect nested-type relations. // -// Ex. Suppose we have a schema with three types A, B and C, and we have the -// following dependent relationship. +// Ex. Suppose we have the following relations in direct_nested_type_map. // -// C -> B (B depends on C) -// B -> A (A depends on B) +// C -> B (Schema type B has a document property of type C) +// B -> A (Schema type A has a document property of type B) // // Then, this function would expand the map by adding C -> A to the map. libtextclassifier3::StatusOr<SchemaUtil::DependentMap> -ExpandTranstiveDependents(const SchemaUtil::DependentMap& dependent_map) { - SchemaUtil::DependentMap expanded_dependent_map; +CalculateTransitiveNestedTypeRelations( + const SchemaUtil::DependentMap& direct_nested_type_map, + const std::unordered_set<std::string_view>& joinable_types, + bool allow_circular_schema_definitions) { + SchemaUtil::DependentMap expanded_nested_type_map; + // Types that have no outgoing relations. + std::unordered_set<std::string_view> sink_types; + + if (allow_circular_schema_definitions) { + // Map of nodes that are pending expansion -> whether the path from each key + // node to the 'current' node is nested_indexable. + // A copy of this map is made for each new node that we expand. + std::unordered_map<std::string_view, bool> + pending_expansion_paths_indexable; + for (const auto& kvp : direct_nested_type_map) { + ICING_RETURN_IF_ERROR(CalculateTransitiveNestedTypeRelations( + direct_nested_type_map, joinable_types, kvp.first, + /*path_contains_joinable_property=*/false, &expanded_nested_type_map, + std::unordered_map<std::string_view, bool>( + pending_expansion_paths_indexable), + &sink_types)); + } + } else { + // If allow_circular_schema_definitions is false, then fallback to the old + // way of detecting cycles. + // Types that we are expanding. + std::unordered_set<std::string_view> pending_expansions; + for (const auto& kvp : direct_nested_type_map) { + ICING_RETURN_IF_ERROR(CalculateAcyclicTransitiveRelations( + direct_nested_type_map, kvp.first, &expanded_nested_type_map, + &pending_expansions, &sink_types)); + } + } + return expanded_nested_type_map; +} + +// Calculate and return the expanded inheritance map from +// direct_nested_type_map. This expands the direct_inheritance_map to also +// include indirect inheritance relations. +// +// Ex. Suppose we have the following relations in direct_inheritance_map. +// +// C -> B (Schema type C is B's parent_type ) +// B -> A (Schema type B is A's parent_type) +// +// Then, this function would expand the map by adding C -> A to the map. +libtextclassifier3::StatusOr<SchemaUtil::InheritanceMap> +CalculateTransitiveInheritanceRelations( + const SchemaUtil::InheritanceMap& direct_inheritance_map) { + SchemaUtil::InheritanceMap expanded_inheritance_map; // Types that we are expanding. std::unordered_set<std::string_view> pending_expansions; - // Types that have no dependents. - std::unordered_set<std::string_view> orphaned_types; - for (const auto& kvp : dependent_map) { - ICING_RETURN_IF_ERROR(ExpandTranstiveDependents( - dependent_map, kvp.first, &expanded_dependent_map, &pending_expansions, - &orphaned_types)); + // Types that have no outgoing relation. + std::unordered_set<std::string_view> sink_types; + for (const auto& kvp : direct_inheritance_map) { + ICING_RETURN_IF_ERROR(CalculateAcyclicTransitiveRelations( + direct_inheritance_map, kvp.first, &expanded_inheritance_map, + &pending_expansions, &sink_types)); } - return expanded_dependent_map; + return expanded_inheritance_map; } -// Builds a transitive dependent map. 'Orphaned' types (types with no -// dependents) will not be present in the map. +// Builds a transitive dependent map. Types with no dependents will not be +// present in the map as keys. // // Ex. Suppose we have a schema with four types A, B, C, D. A has a property of // type B and B has a property of type C. C and D only have non-document @@ -258,7 +435,7 @@ ExpandTranstiveDependents(const SchemaUtil::DependentMap& dependent_map) { // C -> A, B (both A and B depend on C) // B -> A (A depends on B) // -// A and D would be considered orphaned properties because no type refers to +// A and D will not be present in the map as keys because no type depends on // them. // // RETURNS: @@ -266,8 +443,21 @@ ExpandTranstiveDependents(const SchemaUtil::DependentMap& dependent_map) { // INVALID_ARGUMENT if the schema contains a cycle or an undefined type. // ALREADY_EXISTS if a schema type is specified more than once in the schema libtextclassifier3::StatusOr<SchemaUtil::DependentMap> -BuildTransitiveDependentGraph(const SchemaProto& schema) { - SchemaUtil::DependentMap dependent_map; +BuildTransitiveDependentGraph(const SchemaProto& schema, + bool allow_circular_schema_definitions) { + // We expand the nested-type dependent map and inheritance map differently + // when calculating transitive relations. These two types of relations also + // should not be transitive so we keep these as separate maps. + // + // e.g. For schema type A, B and C, B depends on A through inheritance, and + // C depends on B by having a property with type B, we will have the two + // relations {A, B} and {B, C} in the dependent map, but will not have {A, C} + // in the map. + SchemaUtil::DependentMap direct_nested_type_map; + SchemaUtil::InheritanceMap direct_inheritance_map; + + // Set of schema types that have at least one joinable property. + std::unordered_set<std::string_view> joinable_types; // Add all first-order dependents. std::unordered_set<std::string_view> known_types; @@ -280,16 +470,19 @@ BuildTransitiveDependentGraph(const SchemaProto& schema) { } known_types.insert(schema_type); unknown_types.erase(schema_type); - if (!type_config.parent_type().empty()) { - std::string_view parent_schema_type(type_config.parent_type()); + // Insert inheritance relations into the inheritance map. + for (std::string_view parent_schema_type : type_config.parent_types()) { if (known_types.count(parent_schema_type) == 0) { unknown_types.insert(parent_schema_type); } - // Try to add schema_type to the parent type's dependent map when it is - // not present already, in which case the value will be an empty vector. - dependent_map[parent_schema_type].insert({schema_type, {}}); + direct_inheritance_map[parent_schema_type][schema_type] = true; } for (const auto& property_config : type_config.properties()) { + if (property_config.joinable_config().value_type() != + JoinableConfig::ValueType::NONE) { + joinable_types.insert(schema_type); + } + // Insert nested-type relations into the nested-type map. if (property_config.data_type() == PropertyConfigProto::DataType::DOCUMENT) { // Need to know what schema_type these Document properties should be @@ -298,7 +491,7 @@ BuildTransitiveDependentGraph(const SchemaProto& schema) { if (known_types.count(property_schema_type) == 0) { unknown_types.insert(property_schema_type); } - dependent_map[property_schema_type][schema_type].push_back( + direct_nested_type_map[property_schema_type][schema_type].push_back( &property_config); } } @@ -307,15 +500,50 @@ BuildTransitiveDependentGraph(const SchemaProto& schema) { return absl_ports::InvalidArgumentError(absl_ports::StrCat( "Undefined 'schema_type's: ", absl_ports::StrJoin(unknown_types, ","))); } - return ExpandTranstiveDependents(dependent_map); + + // Merge two expanded maps into a single dependent_map, without making + // inheritance and nested-type relations transitive. + ICING_ASSIGN_OR_RETURN(SchemaUtil::DependentMap merged_dependent_map, + CalculateTransitiveNestedTypeRelations( + direct_nested_type_map, joinable_types, + allow_circular_schema_definitions)); + ICING_ASSIGN_OR_RETURN( + SchemaUtil::InheritanceMap expanded_inheritance_map, + CalculateTransitiveInheritanceRelations(direct_inheritance_map)); + for (const auto& [parent_type, inheritance_relation] : + expanded_inheritance_map) { + // Insert the parent_type into the dependent map if it is not present + // already. + merged_dependent_map.insert({parent_type, {}}); + merged_dependent_map[parent_type].reserve(inheritance_relation.size()); + for (const auto& [child_type, _] : inheritance_relation) { + // Insert the child_type into parent_type's dependent map if it's not + // present already, in which case the value will be an empty vector. + merged_dependent_map[parent_type].insert({child_type, {}}); + } + } + return merged_dependent_map; +} + +libtextclassifier3::StatusOr<SchemaUtil::InheritanceMap> +SchemaUtil::BuildTransitiveInheritanceGraph(const SchemaProto& schema) { + SchemaUtil::InheritanceMap direct_inheritance_map; + for (const auto& type_config : schema.types()) { + for (std::string_view parent_schema_type : type_config.parent_types()) { + direct_inheritance_map[parent_schema_type][type_config.schema_type()] = + true; + } + } + return CalculateTransitiveInheritanceRelations(direct_inheritance_map); } libtextclassifier3::StatusOr<SchemaUtil::DependentMap> SchemaUtil::Validate( - const SchemaProto& schema) { + const SchemaProto& schema, bool allow_circular_schema_definitions) { // 1. Build the dependent map. This will detect any cycles, non-existent or // duplicate types in the schema. - ICING_ASSIGN_OR_RETURN(SchemaUtil::DependentMap dependent_map, - BuildTransitiveDependentGraph(schema)); + ICING_ASSIGN_OR_RETURN( + SchemaUtil::DependentMap dependent_map, + BuildTransitiveDependentGraph(schema, allow_circular_schema_definitions)); // Tracks PropertyConfigs within a SchemaTypeConfig that we've validated // already. @@ -422,6 +650,9 @@ libtextclassifier3::StatusOr<SchemaUtil::DependentMap> SchemaUtil::Validate( } } + // Verify that every child type's property set has included all compatible + // properties from parent types. + ICING_RETURN_IF_ERROR(ValidateInheritedProperties(schema)); return dependent_map; } @@ -537,6 +768,100 @@ libtextclassifier3::Status SchemaUtil::ValidateJoinableConfig( return libtextclassifier3::Status::OK; } +bool SchemaUtil::IsParent(const SchemaUtil::InheritanceMap& inheritance_map, + std::string_view parent_type, + std::string_view child_type) { + auto iter = inheritance_map.find(parent_type); + if (iter == inheritance_map.end()) { + return false; + } + return iter->second.count(child_type) > 0; +} + +bool SchemaUtil::IsInheritedPropertyCompatible( + const SchemaUtil::InheritanceMap& inheritance_map, + const PropertyConfigProto& child_property_config, + const PropertyConfigProto& parent_property_config) { + // Check if child_property_config->cardinality() <= + // parent_property_config->cardinality(). + // Subtype may require a stricter cardinality, but cannot loosen cardinality + // requirements. + if (!CardinalityLessThanEq(child_property_config.cardinality(), + parent_property_config.cardinality())) { + return false; + } + + // Now we can assume T1 and T2 are not nullptr, and cardinality check passes. + if (child_property_config.data_type() != + PropertyConfigProto::DataType::DOCUMENT || + parent_property_config.data_type() != + PropertyConfigProto::DataType::DOCUMENT) { + return child_property_config.data_type() == + parent_property_config.data_type(); + } + + // Now we can assume T1 and T2 are both document type. + return child_property_config.schema_type() == + parent_property_config.schema_type() || + IsParent(inheritance_map, parent_property_config.schema_type(), + child_property_config.schema_type()); +} + +libtextclassifier3::Status SchemaUtil::ValidateInheritedProperties( + const SchemaProto& schema) { + // Create a inheritance map + ICING_ASSIGN_OR_RETURN(SchemaUtil::InheritanceMap inheritance_map, + BuildTransitiveInheritanceGraph(schema)); + + // Create a map that maps from type name to property names, and then from + // property names to PropertyConfigProto. + std::unordered_map< + std::string, std::unordered_map<std::string, const PropertyConfigProto*>> + property_map; + for (const SchemaTypeConfigProto& type_config : schema.types()) { + // Skipping building entries for types without any child or parent, since + // such entry will never be used. + if (type_config.parent_types().empty() && + inheritance_map.count(type_config.schema_type()) == 0) { + continue; + } + auto& curr_property_map = property_map[type_config.schema_type()]; + for (const PropertyConfigProto& property_config : + type_config.properties()) { + curr_property_map[property_config.property_name()] = &property_config; + } + } + + // Validate child properties. + for (const SchemaTypeConfigProto& type_config : schema.types()) { + const std::string& child_type_name = type_config.schema_type(); + auto& child_property_map = property_map[child_type_name]; + + for (const std::string& parent_type_name : type_config.parent_types()) { + auto& parent_property_map = property_map[parent_type_name]; + + for (const auto& [property_name, parent_property_config] : + parent_property_map) { + auto child_property_iter = child_property_map.find(property_name); + if (child_property_iter == child_property_map.end()) { + return absl_ports::InvalidArgumentError(absl_ports::StrCat( + "Property ", property_name, " is not present in child type ", + child_type_name, ", but it is defined in the parent type ", + parent_type_name, ".")); + } + if (!IsInheritedPropertyCompatible(inheritance_map, + *child_property_iter->second, + *parent_property_config)) { + return absl_ports::InvalidArgumentError(absl_ports::StrCat( + "Property ", property_name, " from child type ", child_type_name, + " is not compatible to the parent type ", parent_type_name, ".")); + } + } + } + } + return libtextclassifier3::Status::OK; +} + void SchemaUtil::BuildTypeConfigMap( const SchemaProto& schema, SchemaUtil::TypeConfigMap* type_config_map) { type_config_map->clear(); diff --git a/icing/schema/schema-util.h b/icing/schema/schema-util.h index 825625e..445affd 100644 --- a/icing/schema/schema-util.h +++ b/icing/schema/schema-util.h @@ -33,6 +33,14 @@ class SchemaUtil { using TypeConfigMap = std::unordered_map<std::string, const SchemaTypeConfigProto>; + // A data structure that stores the relationships between schema types. The + // keys in TypeRelationMap are schema types, and the values are sets of schema + // types that are directly or indirectly related to the key. + template <typename T> + using TypeRelationMap = + std::unordered_map<std::string_view, + std::unordered_map<std::string_view, T>>; + // If A -> B is indicated in the map, then type A must be built before // building type B, which implies one of the following situations. // @@ -48,10 +56,16 @@ class SchemaUtil { // C -> B with valid PropertyConfigProto* respectively in this map, but we // will also expand transitive dependents: add A -> B into dependent map with // empty vector of "edges". - using DependentMap = std::unordered_map< - std::string_view, - std::unordered_map<std::string_view, - std::vector<const PropertyConfigProto*>>>; + using DependentMap = TypeRelationMap<std::vector<const PropertyConfigProto*>>; + + // If A -> B is indicated in the map, then type A is a parent type of B, + // directly or indirectly. If directly, the bool value in the map will be + // true, otherwise false. + // + // Note that all relationships contained in this map are also entries in the + // DependentMap, i.e. if B inherits from A, then there will be a mapping from + // A to B in both this map and the DependentMap. + using InheritanceMap = TypeRelationMap<bool>; struct SchemaDelta { // Which schema types were present in the old schema, but were deleted from @@ -124,18 +138,25 @@ class SchemaUtil { // SchemaTypeConfigProto.schema_type // 10. Property names can only be alphanumeric. // 11. Any STRING data types have a valid string_indexing_config - // 12. A SchemaTypeConfigProto cannot have a property whose schema_type is - // itself, thus creating an infinite loop. - // 13. Two SchemaTypeConfigProtos cannot have properties that reference each - // other's schema_type, thus creating an infinite loop. - // 14. PropertyConfigProtos.joinable_config must be valid. See + // 12. PropertyConfigProtos.joinable_config must be valid. See // ValidateJoinableConfig for more details. - // 15. Any PropertyConfigProtos with nested DOCUMENT data type must not have + // 13. Any PropertyConfigProtos with nested DOCUMENT data type must not have // REPEATED cardinality if they reference a schema type containing // joinable property. - // - // TODO(b/171996137): Clarify 12 and 13 are only for indexed properties, once - // document properties can be opted out of indexing. + // 14. The schema definition cannot have invalid cycles. A cycle is invalid + // if: + // a. SchemaTypeConfigProto.parent_type definitions form an inheritance + // cycle. + // b. The schema's property definitions have schema_types that form a + // cycle, and all properties on the cycle declare + // DocumentIndexingConfig.index_nested_properties=true. + // c. The schema's property definitions have schema_types that form a + // cycle, and the cycle leads to an invalid joinable property config. + // This is the case if: + // i. Any type node in the cycle itself has a joinable proprty + // (property whose joinable config is not NONE), OR + // ii. Any type node in the cycle has a nested-type (direct or + // indirect) with a joinable property. // // Returns: // On success, a dependent map from each types to their dependent types @@ -143,7 +164,28 @@ class SchemaUtil { // ALREADY_EXISTS for case 1 and 2 // INVALID_ARGUMENT for 3-15 static libtextclassifier3::StatusOr<DependentMap> Validate( - const SchemaProto& schema); + const SchemaProto& schema, bool allow_circular_schema_definitions); + + // Builds a transitive inheritance map. + // + // Ex. Suppose we have a schema with four types A, B, C and D, and we have the + // following direct inheritance relation. + // + // A -> B (A is the parent type of B) + // B -> C (B is the parent type of C) + // C -> D (C is the parent type of D) + // + // Then, the transitive inheritance map for this schema would be: + // + // A -> B, C, D + // B -> C, D + // C -> D + // + // RETURNS: + // On success, a transitive inheritance map of all types in the schema. + // INVALID_ARGUMENT if the inheritance graph contains a cycle. + static libtextclassifier3::StatusOr<SchemaUtil::InheritanceMap> + BuildTransitiveInheritanceGraph(const SchemaProto& schema); // Creates a mapping of schema type -> schema type config proto. The // type_config_map is cleared, and then each schema-type_config_proto pair is @@ -270,6 +312,52 @@ class SchemaUtil { PropertyConfigProto::DataType::Code data_type, PropertyConfigProto::Cardinality::Code cardinality, std::string_view schema_type, std::string_view property_name); + + // Returns if 'parent_type' is a direct or indirect parent of 'child_type'. + static bool IsParent(const SchemaUtil::InheritanceMap& inheritance_map, + std::string_view parent_type, + std::string_view child_type); + + // Returns if 'child_property_config' in a child type can override + // 'parent_property_config' in the parent type. + // + // Let's assign 'child_property_config' a type T1 and 'parent_property_config' + // a type T2 that captures information for their data_type, schema_type and + // cardinalities, so that 'child_property_config' can override + // 'parent_property_config' if and only if T1 <: T2, i.e. T1 is a subtype of + // T2. + // + // Below are the rules for inferring subtype relations. + // - T <: T for every type T. + // - If U extends T, then U <: T. + // - For every type T1, T2 and T3, if T1 <: T2 and T2 <: T3, then T1 <: T3. + // - Optional<T> <: Repeated<T> for every type T. + // - Required<T> <: Optional<T> for every type T. + // - If T1 <: T2, then + // - Required<T1> <: Required<T2> + // - Optional<T1> <: Optional<T2> + // - Repeated<T1> <: Repeated<T2> + // + // We assume the Closed World Assumption (CWA), i.e. if T1 <: T2 cannot be + // deduced from the above rules, then T1 is not a subtype of T2. + static bool IsInheritedPropertyCompatible( + const SchemaUtil::InheritanceMap& inheritance_map, + const PropertyConfigProto& child_property_config, + const PropertyConfigProto& parent_property_config); + + // Verifies that every child type's property set has included all compatible + // properties from parent types, based on the following rule: + // + // - If a property "prop" of type T is in the parent, then the child type must + // also have "prop" that is of type U, such that U <: T, i.e. U is a subtype + // of T. + // + // RETURNS: + // Ok on validation success + // INVALID_ARGUMENT if an exception that violates the above validation rule + // is found. + static libtextclassifier3::Status ValidateInheritedProperties( + const SchemaProto& schema); }; } // namespace lib diff --git a/icing/schema/schema-util_test.cc b/icing/schema/schema-util_test.cc index df7a421..3ea855c 100644 --- a/icing/schema/schema-util_test.cc +++ b/icing/schema/schema-util_test.cc @@ -14,15 +14,12 @@ #include "icing/schema/schema-util.h" -#include <cstdint> -#include <string> #include <string_view> #include <unordered_set> #include "gmock/gmock.h" #include "gtest/gtest.h" #include "icing/proto/schema.pb.h" -#include "icing/proto/term.pb.h" #include "icing/schema-builder.h" #include "icing/testing/common-matchers.h" @@ -34,6 +31,8 @@ using portable_equals_proto::EqualsProto; using ::testing::Eq; using ::testing::HasSubstr; using ::testing::IsEmpty; +using ::testing::IsFalse; +using ::testing::IsTrue; using ::testing::Pair; using ::testing::Pointee; using ::testing::SizeIs; @@ -44,7 +43,9 @@ constexpr char kEmailType[] = "EmailMessage"; constexpr char kMessageType[] = "Text"; constexpr char kPersonType[] = "Person"; -TEST(SchemaUtilTest, DependentGraphAlphabeticalOrder) { +class SchemaUtilTest : public ::testing::TestWithParam<bool> {}; + +TEST_P(SchemaUtilTest, DependentGraphAlphabeticalOrder) { // Create a schema with the following dependent relation: // C // / \ @@ -121,7 +122,7 @@ TEST(SchemaUtilTest, DependentGraphAlphabeticalOrder) { .AddType(type_f) .Build(); ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map, - SchemaUtil::Validate(schema)); + SchemaUtil::Validate(schema, GetParam())); EXPECT_THAT(d_map, testing::SizeIs(5)); EXPECT_THAT( d_map["F"], @@ -151,7 +152,7 @@ TEST(SchemaUtilTest, DependentGraphAlphabeticalOrder) { EqualsProto(type_a.properties(0))))))); } -TEST(SchemaUtilTest, DependentGraphReverseAlphabeticalOrder) { +TEST_P(SchemaUtilTest, DependentGraphReverseAlphabeticalOrder) { // Create a schema with the following dependent relation: // C // / \ @@ -229,7 +230,7 @@ TEST(SchemaUtilTest, DependentGraphReverseAlphabeticalOrder) { .AddType(type_a) .Build(); ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map, - SchemaUtil::Validate(schema)); + SchemaUtil::Validate(schema, GetParam())); EXPECT_THAT(d_map, testing::SizeIs(5)); EXPECT_THAT( d_map["F"], @@ -259,7 +260,7 @@ TEST(SchemaUtilTest, DependentGraphReverseAlphabeticalOrder) { EqualsProto(type_a.properties(0))))))); } -TEST(SchemaUtilTest, DependentGraphMixedOrder) { +TEST_P(SchemaUtilTest, DependentGraphMixedOrder) { // Create a schema with the following dependent relation: // C // / \ @@ -336,7 +337,7 @@ TEST(SchemaUtilTest, DependentGraphMixedOrder) { .AddType(type_d) .Build(); ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map, - SchemaUtil::Validate(schema)); + SchemaUtil::Validate(schema, GetParam())); EXPECT_THAT(d_map, testing::SizeIs(5)); EXPECT_THAT( d_map["F"], @@ -366,9 +367,9 @@ TEST(SchemaUtilTest, DependentGraphMixedOrder) { EqualsProto(type_a.properties(0))))))); } -TEST(SchemaUtilTest, TopLevelCycle) { - // Create a schema with the following dependent relation: - // A - B - B - B - B.... +TEST_P(SchemaUtilTest, TopLevelCycleIndexableTrueInvalid) { + // Create a schema with the following nested-type relation: + // A - B - B - B - B.... where all edges declare index_nested_properties=true SchemaTypeConfigProto type_a = SchemaTypeConfigBuilder() .SetType("A") @@ -389,14 +390,57 @@ TEST(SchemaUtilTest, TopLevelCycle) { .Build(); SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build(); - EXPECT_THAT(SchemaUtil::Validate(schema), + EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT, - HasSubstr("Infinite loop"))); + HasSubstr("Invalid cycle"))); } -TEST(SchemaUtilTest, MultiLevelCycle) { +TEST_P(SchemaUtilTest, TopLevelCycleIndexableFalseNotJoinableOK) { + if (GetParam() != true) { + GTEST_SKIP() << "This is an invalid cycle if circular schema definitions " + "are not allowed."; + } + + // Create a schema with the following nested-type relation and + // index_nested_properties definition: + // A -(true)-> B -(false)-> B -(false)-> B.... + // Edge B -(false)-> B breaks the invalid cycle, so this is allowed. + SchemaTypeConfigProto type_a = + SchemaTypeConfigBuilder() + .SetType("A") + .AddProperty( + PropertyConfigBuilder() + .SetName("b") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("B", /*index_nested_properties=*/true)) + .Build(); + SchemaTypeConfigProto type_b = + SchemaTypeConfigBuilder() + .SetType("B") + .AddProperty( + PropertyConfigBuilder() + .SetName("b") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("B", /*index_nested_properties=*/false)) + .Build(); + + SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build(); + // Assert Validate status is OK and check dependent map + ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map, + SchemaUtil::Validate(schema, GetParam())); + EXPECT_THAT(d_map, SizeIs(1)); + EXPECT_THAT(d_map["B"], + UnorderedElementsAre( + Pair("A", UnorderedElementsAre( + Pointee(EqualsProto(type_a.properties(0))))), + Pair("B", UnorderedElementsAre( + Pointee(EqualsProto(type_b.properties(0))))))); +} + +TEST_P(SchemaUtilTest, MultiLevelCycleIndexableTrueInvalid) { // Create a schema with the following dependent relation: // A - B - C - A - B - C - A ... + // where all edges declare index_nested_properties=true SchemaTypeConfigProto type_a = SchemaTypeConfigBuilder() .SetType("A") @@ -427,11 +471,1222 @@ TEST(SchemaUtilTest, MultiLevelCycle) { SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build(); - EXPECT_THAT(SchemaUtil::Validate(schema), + EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), + StatusIs((libtextclassifier3::StatusCode::INVALID_ARGUMENT), + HasSubstr("Invalid cycle"))); +} + +TEST_P(SchemaUtilTest, MultiLevelCycleIndexableFalseNotJoinableOK) { + if (GetParam() != true) { + GTEST_SKIP() << "This is an invalid cycle if circular schema definitions " + "are not allowed."; + } + + // Create a schema with the following nested-type relation: + // A -(true)-> B -(false)-> C -(true)-> A -(true)-> B -(false)-> C ... + // B -(false)-> C breaking the infinite cycle. + SchemaTypeConfigProto type_a = + SchemaTypeConfigBuilder() + .SetType("A") + .AddProperty( + PropertyConfigBuilder() + .SetName("b") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("B", /*index_nested_properties=*/true)) + .Build(); + SchemaTypeConfigProto type_b = + SchemaTypeConfigBuilder() + .SetType("B") + .AddProperty( + PropertyConfigBuilder() + .SetName("c") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("C", /*index_nested_properties=*/false)) + .Build(); + SchemaTypeConfigProto type_c = + SchemaTypeConfigBuilder() + .SetType("C") + .AddProperty( + PropertyConfigBuilder() + .SetName("a") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("A", /*index_nested_properties=*/true)) + .Build(); + + SchemaProto schema = + SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build(); + EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), + StatusIs(libtextclassifier3::StatusCode::OK)); +} + +TEST_P(SchemaUtilTest, MultiLevelCycleDependentMapOk) { + if (GetParam() != true) { + GTEST_SKIP() << "This is an invalid cycle if circular schema definitions " + "are not allowed."; + } + + // Create a schema with the following nested-type dependent relation: + // A -(false)-> B -(false)-> C -(false)-> A --> B --> C ... + // i.e. A is a property of B + // B is a property of C + // C is a property of A + SchemaTypeConfigProto type_a = + SchemaTypeConfigBuilder() + .SetType("A") + .AddProperty( + PropertyConfigBuilder() + .SetName("c") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("C", /*index_nested_properties=*/false)) + .Build(); + SchemaTypeConfigProto type_b = + SchemaTypeConfigBuilder() + .SetType("B") + .AddProperty( + PropertyConfigBuilder() + .SetName("a") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("A", /*index_nested_properties=*/false)) + .Build(); + SchemaTypeConfigProto type_c = + SchemaTypeConfigBuilder() + .SetType("C") + .AddProperty( + PropertyConfigBuilder() + .SetName("b") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("B", /*index_nested_properties=*/false)) + .Build(); + + SchemaProto schema = + SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build(); + // Assert Validate status is OK and check dependent map + ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map, + SchemaUtil::Validate(schema, GetParam())); + EXPECT_THAT(d_map, SizeIs(3)); + EXPECT_THAT( + d_map["A"], + UnorderedElementsAre(Pair("A", IsEmpty()), + Pair("B", UnorderedElementsAre(Pointee( + EqualsProto(type_b.properties(0))))), + Pair("C", IsEmpty()))); + EXPECT_THAT( + d_map["B"], + UnorderedElementsAre(Pair("A", IsEmpty()), Pair("B", IsEmpty()), + Pair("C", UnorderedElementsAre(Pointee( + EqualsProto(type_c.properties(0))))))); + EXPECT_THAT( + d_map["C"], + UnorderedElementsAre(Pair("A", UnorderedElementsAre(Pointee( + EqualsProto(type_a.properties(0))))), + Pair("B", IsEmpty()), Pair("C", IsEmpty()))); +} + +TEST_P(SchemaUtilTest, NestedCycleIndexableTrueInvalid) { + // Create a schema with the following dependent relation: + // A -(false)-> B <-(true)-> C -(false)-> D. + // B <-(true)-> C creates an invalid cycle. + SchemaTypeConfigProto type_a = + SchemaTypeConfigBuilder() + .SetType("A") + .AddProperty( + PropertyConfigBuilder() + .SetName("b") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("B", /*index_nested_properties=*/false)) + .Build(); + SchemaTypeConfigProto type_b = + SchemaTypeConfigBuilder() + .SetType("B") + .AddProperty( + PropertyConfigBuilder() + .SetName("c") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("C", /*index_nested_properties=*/true)) + .Build(); + SchemaTypeConfigProto type_c = + SchemaTypeConfigBuilder() + .SetType("C") + .AddProperty( + PropertyConfigBuilder() + .SetName("b") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("B", /*index_nested_properties=*/true)) + .AddProperty( + PropertyConfigBuilder() + .SetName("d") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("D", /*index_nested_properties=*/false)) + .Build(); + SchemaTypeConfigProto type_d = + SchemaTypeConfigBuilder() + .SetType("D") + .AddProperty( + PropertyConfigBuilder() + .SetName("prop") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE)) + .Build(); + + SchemaProto schema = SchemaBuilder() + .AddType(type_a) + .AddType(type_b) + .AddType(type_c) + .AddType(type_d) + .Build(); + EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), + StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT, + HasSubstr("Invalid cycle"))); +} + +TEST_P(SchemaUtilTest, NestedCycleIndexableFalseNotJoinableOK) { + if (GetParam() != true) { + GTEST_SKIP() << "This is an invalid cycle if circular schema definitions " + "are not allowed."; + } + + // Create a schema with the following nested-type relation: + // A -(true)-> B -(true)-> C -(false)-> B -(true)-> D. + // C -(false)-> B breaks the invalid cycle in B - C - B. + SchemaTypeConfigProto type_a = + SchemaTypeConfigBuilder() + .SetType("A") + .AddProperty( + PropertyConfigBuilder() + .SetName("b") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("B", /*index_nested_properties=*/true)) + .Build(); + SchemaTypeConfigProto type_b = + SchemaTypeConfigBuilder() + .SetType("B") + .AddProperty( + PropertyConfigBuilder() + .SetName("c") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("C", /*index_nested_properties=*/true)) + .AddProperty( + PropertyConfigBuilder() + .SetName("d") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("D", /*index_nested_properties=*/true)) + .Build(); + SchemaTypeConfigProto type_c = + SchemaTypeConfigBuilder() + .SetType("C") + .AddProperty( + PropertyConfigBuilder() + .SetName("b") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("B", /*index_nested_properties=*/false)) + .AddProperty( + PropertyConfigBuilder() + .SetName("d") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("D", /*index_nested_properties=*/true)) + .Build(); + SchemaTypeConfigProto type_d = + SchemaTypeConfigBuilder() + .SetType("D") + .AddProperty( + PropertyConfigBuilder() + .SetName("prop") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE)) + .Build(); + + SchemaProto schema = SchemaBuilder() + .AddType(type_a) + .AddType(type_b) + .AddType(type_c) + .AddType(type_d) + .Build(); + // Assert Validate status is OK and check dependent map + ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map, + SchemaUtil::Validate(schema, GetParam())); + EXPECT_THAT(d_map, SizeIs(3)); + EXPECT_THAT(d_map["B"], + UnorderedElementsAre( + Pair("A", UnorderedElementsAre( + Pointee(EqualsProto(type_a.properties(0))))), + Pair("B", IsEmpty()), + Pair("C", UnorderedElementsAre( + Pointee(EqualsProto(type_c.properties(0))))))); + EXPECT_THAT( + d_map["C"], + UnorderedElementsAre(Pair("A", IsEmpty()), + Pair("B", UnorderedElementsAre(Pointee( + EqualsProto(type_b.properties(0))))), + Pair("C", IsEmpty()))); + EXPECT_THAT(d_map["D"], + UnorderedElementsAre( + Pair("A", IsEmpty()), + Pair("B", UnorderedElementsAre( + Pointee(EqualsProto(type_b.properties(1))))), + Pair("C", UnorderedElementsAre( + Pointee(EqualsProto(type_c.properties(1))))))); +} + +TEST_P(SchemaUtilTest, MultiplePathsAnyPathContainsCycleIsInvalid) { + // Create a schema with the following nested-type relation: + // C -(false)-> B -(true)-> A + // ^ / + // (true)\ /(true) + // \ v + // D + // There is a cycle in B-A-D-B... so this is not allowed + SchemaTypeConfigProto type_a = + SchemaTypeConfigBuilder() + .SetType("A") + .AddProperty( + PropertyConfigBuilder() + .SetName("d") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("D", /*index_nested_properties=*/true)) + .Build(); + SchemaTypeConfigProto type_b = + SchemaTypeConfigBuilder() + .SetType("B") + .AddProperty( + PropertyConfigBuilder() + .SetName("a") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("A", /*index_nested_properties=*/true)) + .Build(); + SchemaTypeConfigProto type_c = + SchemaTypeConfigBuilder() + .SetType("C") + .AddProperty( + PropertyConfigBuilder() + .SetName("b") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("B", /*index_nested_properties=*/false)) + .Build(); + SchemaTypeConfigProto type_d = + SchemaTypeConfigBuilder() + .SetType("D") + .AddProperty( + PropertyConfigBuilder() + .SetName("b") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("B", /*index_nested_properties=*/true)) + .Build(); + + SchemaProto schema = SchemaBuilder() + .AddType(type_a) + .AddType(type_d) + .AddType(type_c) + .AddType(type_b) + .Build(); + EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), + StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT, + HasSubstr("Invalid cycle"))); +} + +TEST_P(SchemaUtilTest, MultipleCycles_anyCycleIndexableTrueInvalid) { + // Create a schema with the following nested-type dependent relation: + // Note that the arrows in this graph shows the direction of the dependent + // relation, rather than nested-type relations. + // A -(F)-> B + // ^ \ | + // (T)| (T)\ |(T) + // | v v + // D <-(T)- C + // There are two cycles: A-B-C-D and A-C-D. The first cycle is allowed because + // A-B has nested-indexable=false, but A-C-D + // + // Schema nested-type property relation graph: + // A <-- B + // | ^ ^ + // v \ | + // D --> C + SchemaTypeConfigProto type_a = + SchemaTypeConfigBuilder() + .SetType("A") + .AddProperty( + PropertyConfigBuilder() + .SetName("d") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("D", /*index_nested_properties=*/true)) + .Build(); + SchemaTypeConfigProto type_b = + SchemaTypeConfigBuilder() + .SetType("B") + .AddProperty( + PropertyConfigBuilder() + .SetName("a") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("A", /*index_nested_properties=*/false)) + .Build(); + SchemaTypeConfigProto type_c = + SchemaTypeConfigBuilder() + .SetType("C") + .AddProperty( + PropertyConfigBuilder() + .SetName("a") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("A", /*index_nested_properties=*/true)) + .AddProperty( + PropertyConfigBuilder() + .SetName("b") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("B", /*index_nested_properties=*/true)) + .Build(); + SchemaTypeConfigProto type_d = + SchemaTypeConfigBuilder() + .SetType("D") + .AddProperty( + PropertyConfigBuilder() + .SetName("c") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("C", /*index_nested_properties=*/true)) + .Build(); + + SchemaProto schema = SchemaBuilder() + .AddType(type_d) + .AddType(type_c) + .AddType(type_b) + .AddType(type_a) + .Build(); + EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); } -TEST(SchemaUtilTest, NonExistentType) { +TEST_P(SchemaUtilTest, CycleWithSameTypedProps_allPropsIndexableFalseIsOK) { + if (GetParam() != true) { + GTEST_SKIP() << "This is an invalid cycle if circular schema definitions " + "are not allowed."; + } + + // Create a schema with the following nested-type relation and + // index_nested_properties definition: + // A <-(true)- B <-(false)- A -(false)-> B -(true)-> A + // A has 2 properties with type B. A - B breaks the invalid cycle only when + // both properties declare index_nested_properties=false. + SchemaTypeConfigProto type_a = + SchemaTypeConfigBuilder() + .SetType("A") + .AddProperty( + PropertyConfigBuilder() + .SetName("b1") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("B", /*index_nested_properties=*/false)) + .AddProperty( + PropertyConfigBuilder() + .SetName("b2") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("B", /*index_nested_properties=*/false)) + .Build(); + SchemaTypeConfigProto type_b = + SchemaTypeConfigBuilder() + .SetType("B") + .AddProperty( + PropertyConfigBuilder() + .SetName("A") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("A", /*index_nested_properties=*/true)) + .Build(); + + SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build(); + // Assert Validate status is OK and check dependent map + ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map, + SchemaUtil::Validate(schema, GetParam())); + EXPECT_THAT(d_map, SizeIs(2)); + EXPECT_THAT( + d_map["A"], + UnorderedElementsAre(Pair("A", IsEmpty()), + Pair("B", UnorderedElementsAre(Pointee( + EqualsProto(type_b.properties(0))))))); + EXPECT_THAT(d_map["B"], + UnorderedElementsAre( + Pair("A", UnorderedElementsAre( + Pointee(EqualsProto(type_a.properties(0))), + Pointee(EqualsProto(type_a.properties(1))))), + Pair("B", IsEmpty()))); +} + +TEST_P(SchemaUtilTest, CycleWithSameTypedProps_anyPropIndexableTrueIsInvalid) { + // Create a schema with the following nested-type relation and + // index_nested_properties definition: + // A <-(true)- B <-(true)- A -(false)-> B -(true)-> A + // A has 2 properties with type B. Prop 'b2' declares + // index_nested_properties=true, so there is an invalid cycle. + SchemaTypeConfigProto type_a = + SchemaTypeConfigBuilder() + .SetType("A") + .AddProperty( + PropertyConfigBuilder() + .SetName("b1") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("B", /*index_nested_properties=*/true)) + .AddProperty( + PropertyConfigBuilder() + .SetName("b2") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("B", /*index_nested_properties=*/false)) + .Build(); + SchemaTypeConfigProto type_b = + SchemaTypeConfigBuilder() + .SetType("B") + .AddProperty( + PropertyConfigBuilder() + .SetName("A") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("A", /*index_nested_properties=*/true)) + .Build(); + + SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build(); + EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), + StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT, + HasSubstr("Invalid cycle"))); +} + +TEST_P(SchemaUtilTest, CycleWithJoinablePropertyNotAllowed) { + // Create a schema with the following dependent relation: + // A + // / ^ + // v \ + // (joinable) B ---> C + // B also has a string property that is joinable on QUALIFIED_ID + SchemaTypeConfigProto type_a = + SchemaTypeConfigBuilder() + .SetType("A") + .AddProperty( + PropertyConfigBuilder() + .SetName("b") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("B", /*index_nested_properties=*/false)) + .Build(); + SchemaTypeConfigProto type_b = + SchemaTypeConfigBuilder() + .SetType("B") + .AddProperty( + PropertyConfigBuilder() + .SetName("c") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("C", /*index_nested_properties=*/false)) + .AddProperty( + PropertyConfigBuilder() + .SetName("joinableProp") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeJoinableString(JOINABLE_VALUE_TYPE_QUALIFIED_ID) + .SetCardinality(CARDINALITY_OPTIONAL)) + .Build(); + SchemaTypeConfigProto type_c = + SchemaTypeConfigBuilder() + .SetType("C") + .AddProperty( + PropertyConfigBuilder() + .SetName("a") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("A", /*index_nested_properties=*/false)) + .Build(); + + SchemaProto schema = + SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build(); + EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), + StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT, + HasSubstr("Invalid cycle"))); +} + +TEST_P(SchemaUtilTest, NonNestedJoinablePropOutsideCycleOK) { + if (GetParam() != true) { + GTEST_SKIP() << "This is an invalid cycle if circular schema definitions " + "are not allowed."; + } + + // Create a schema with the following dependent relation: + // A -(false)-> B <-(false)-> C... + // A has a string property that is joinable on QUALIFIED_ID, but the cycle is + // B-C-B, and none of B or C depends on A, so this is fine. + SchemaTypeConfigProto type_a = + SchemaTypeConfigBuilder() + .SetType("A") + .AddProperty( + PropertyConfigBuilder() + .SetName("b") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("B", /*index_nested_properties=*/false)) + .AddProperty( + PropertyConfigBuilder() + .SetName("joinableProp") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeJoinableString(JOINABLE_VALUE_TYPE_QUALIFIED_ID) + .SetCardinality(CARDINALITY_OPTIONAL)) + .Build(); + SchemaTypeConfigProto type_b = + SchemaTypeConfigBuilder() + .SetType("B") + .AddProperty( + PropertyConfigBuilder() + .SetName("c") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("C", /*index_nested_properties=*/false)) + .Build(); + SchemaTypeConfigProto type_c = + SchemaTypeConfigBuilder() + .SetType("C") + .AddProperty( + PropertyConfigBuilder() + .SetName("b") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("B", /*index_nested_properties=*/false)) + .Build(); + + SchemaProto schema = + SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build(); + // Assert Validate status is OK and check dependent map + ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map, + SchemaUtil::Validate(schema, GetParam())); + EXPECT_THAT(d_map, SizeIs(2)); + EXPECT_THAT(d_map["B"], + UnorderedElementsAre( + Pair("A", UnorderedElementsAre( + Pointee(EqualsProto(type_a.properties(0))))), + Pair("B", IsEmpty()), + Pair("C", UnorderedElementsAre( + Pointee(EqualsProto(type_c.properties(0))))))); + EXPECT_THAT( + d_map["C"], + UnorderedElementsAre(Pair("A", IsEmpty()), + Pair("B", UnorderedElementsAre(Pointee( + EqualsProto(type_b.properties(0))))), + Pair("C", IsEmpty()))); +} + +TEST_P(SchemaUtilTest, DirectNestedJoinablePropOutsideCycleNotAllowed) { + // Create a schema with the following dependent relation: + // A + // / ^ + // v \ + // B ---> C ---> D(joinable) + // All edges have index_nested_properties=false and only D has a joinable + // property. The cycle A-B-C... is not allowed since there is a type in the + // cycle (C) which has a direct nested-type (D) with a joinable property. + SchemaTypeConfigProto type_a = + SchemaTypeConfigBuilder() + .SetType("A") + .AddProperty( + PropertyConfigBuilder() + .SetName("b") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("B", /*index_nested_properties=*/false)) + .Build(); + SchemaTypeConfigProto type_b = + SchemaTypeConfigBuilder() + .SetType("B") + .AddProperty( + PropertyConfigBuilder() + .SetName("c") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("C", /*index_nested_properties=*/false)) + .Build(); + SchemaTypeConfigProto type_c = + SchemaTypeConfigBuilder() + .SetType("C") + .AddProperty( + PropertyConfigBuilder() + .SetName("a") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("A", /*index_nested_properties=*/false)) + .AddProperty( + PropertyConfigBuilder() + .SetName("d") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("D", /*index_nested_properties=*/false)) + .Build(); + SchemaTypeConfigProto type_d = + SchemaTypeConfigBuilder() + .SetType("D") + .AddProperty( + PropertyConfigBuilder() + .SetName("joinableProp") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeJoinableString(JOINABLE_VALUE_TYPE_QUALIFIED_ID) + .SetCardinality(CARDINALITY_OPTIONAL)) + .Build(); + + SchemaProto schema = SchemaBuilder() + .AddType(type_a) + .AddType(type_b) + .AddType(type_c) + .AddType(type_d) + .Build(); + EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), + StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT, + HasSubstr("Invalid cycle"))); +} + +TEST_P(SchemaUtilTest, TransitiveNestedJoinablePropOutsideCycleNotAllowed) { + // Create a schema with the following dependent relation: + // A + // / ^ + // v \ + // B ---> C ---> D ---> E (joinable) + // All edges have index_nested_properties=false and only D has a joinable + // property. The cycle A-B-C... is not allowed since there is a type in the + // cycle (C) which has a transitive nested-type (E) with a joinable property. + SchemaTypeConfigProto type_a = + SchemaTypeConfigBuilder() + .SetType("A") + .AddProperty( + PropertyConfigBuilder() + .SetName("b") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("B", /*index_nested_properties=*/false)) + .Build(); + SchemaTypeConfigProto type_b = + SchemaTypeConfigBuilder() + .SetType("B") + .AddProperty( + PropertyConfigBuilder() + .SetName("c") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("C", /*index_nested_properties=*/false)) + .Build(); + SchemaTypeConfigProto type_c = + SchemaTypeConfigBuilder() + .SetType("C") + .AddProperty( + PropertyConfigBuilder() + .SetName("a") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("A", /*index_nested_properties=*/false)) + .AddProperty( + PropertyConfigBuilder() + .SetName("d") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("D", /*index_nested_properties=*/false)) + .Build(); + SchemaTypeConfigProto type_d = + SchemaTypeConfigBuilder() + .SetType("D") + .AddProperty( + PropertyConfigBuilder() + .SetName("e") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("E", /*index_nested_properties=*/false)) + .Build(); + SchemaTypeConfigProto type_e = + SchemaTypeConfigBuilder() + .SetType("E") + .AddProperty( + PropertyConfigBuilder() + .SetName("joinableProp") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeJoinableString(JOINABLE_VALUE_TYPE_QUALIFIED_ID) + .SetCardinality(CARDINALITY_OPTIONAL)) + .Build(); + + SchemaProto schema = SchemaBuilder() + .AddType(type_a) + .AddType(type_b) + .AddType(type_c) + .AddType(type_d) + .AddType(type_e) + .Build(); + EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), + StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT, + HasSubstr("Invalid cycle"))); +} + +TEST_P(SchemaUtilTest, + NestedJoinablePropOutsideCycleNotAllowed_reverseIterationOrder) { + // Create a schema with the following dependent relation: + // E + // / ^ + // v \ + // D ---> C ---> B ---> A (joinable) + // All edges have index_nested_properties=false and only D has a joinable + // property. The cycle A-B-C... is not allowed since there is a type in the + // cycle (C) which has a transitive nested-type (E) with a joinable property. + SchemaTypeConfigProto type_a = + SchemaTypeConfigBuilder() + .SetType("A") + .AddProperty( + PropertyConfigBuilder() + .SetName("joinableProp") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeJoinableString(JOINABLE_VALUE_TYPE_QUALIFIED_ID) + .SetCardinality(CARDINALITY_OPTIONAL)) + .Build(); + SchemaTypeConfigProto type_b = + SchemaTypeConfigBuilder() + .SetType("B") + .AddProperty( + PropertyConfigBuilder() + .SetName("a") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("A", /*index_nested_properties=*/false)) + .Build(); + SchemaTypeConfigProto type_c = + SchemaTypeConfigBuilder() + .SetType("C") + .AddProperty( + PropertyConfigBuilder() + .SetName("b") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("B", /*index_nested_properties=*/false)) + .AddProperty( + PropertyConfigBuilder() + .SetName("e") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("E", /*index_nested_properties=*/false)) + .Build(); + SchemaTypeConfigProto type_d = + SchemaTypeConfigBuilder() + .SetType("D") + .AddProperty( + PropertyConfigBuilder() + .SetName("c") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("C", /*index_nested_properties=*/false)) + .Build(); + SchemaTypeConfigProto type_e = + SchemaTypeConfigBuilder() + .SetType("E") + .AddProperty( + PropertyConfigBuilder() + .SetName("d") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("D", /*index_nested_properties=*/false)) + .Build(); + + SchemaProto schema = SchemaBuilder() + .AddType(type_a) + .AddType(type_b) + .AddType(type_c) + .AddType(type_d) + .AddType(type_e) + .Build(); + EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), + StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT, + HasSubstr("Invalid cycle"))); +} + +TEST_P(SchemaUtilTest, ComplexCycleWithJoinablePropertyNotAllowed) { + // Create a schema with the following dependent relation: + // A + // / ^ + // v \ + // B ---> E + // / \ ^ + // v v \ + // C D --> F + // + // Cycles: A-B-E-A, A-B-D-F-E-A. + // All edges have index_nested_properties=false, but D has a joinable property + // so the second cycle is not allowed. + SchemaTypeConfigProto type_a = + SchemaTypeConfigBuilder() + .SetType("A") + .AddProperty( + PropertyConfigBuilder() + .SetName("b") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("B", /*index_nested_properties=*/false)) + .Build(); + SchemaTypeConfigProto type_b = + SchemaTypeConfigBuilder() + .SetType("B") + .AddProperty( + PropertyConfigBuilder() + .SetName("c") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("C", /*index_nested_properties=*/false)) + .AddProperty( + PropertyConfigBuilder() + .SetName("d") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("D", /*index_nested_properties=*/false)) + .AddProperty( + PropertyConfigBuilder() + .SetName("e") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("E", /*index_nested_properties=*/false)) + .Build(); + SchemaTypeConfigProto type_c = + SchemaTypeConfigBuilder() + .SetType("C") + .AddProperty( + PropertyConfigBuilder() + .SetName("joinableProp") + .SetDataTypeJoinableString(JOINABLE_VALUE_TYPE_QUALIFIED_ID) + .SetCardinality(CARDINALITY_OPTIONAL)) + .Build(); + SchemaTypeConfigProto type_d = + SchemaTypeConfigBuilder() + .SetType("D") + .AddProperty( + PropertyConfigBuilder() + .SetName("f") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("F", /*index_nested_properties=*/false)) + .AddProperty( + PropertyConfigBuilder() + .SetName("joinableProp") + .SetDataTypeJoinableString(JOINABLE_VALUE_TYPE_QUALIFIED_ID) + .SetCardinality(CARDINALITY_OPTIONAL)) + .Build(); + SchemaTypeConfigProto type_e = + SchemaTypeConfigBuilder() + .SetType("E") + .AddProperty( + PropertyConfigBuilder() + .SetName("a") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("A", /*index_nested_properties=*/false)) + .Build(); + SchemaTypeConfigProto type_f = + SchemaTypeConfigBuilder() + .SetType("F") + .AddProperty( + PropertyConfigBuilder() + .SetName("e") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("E", /*index_nested_properties=*/false)) + .Build(); + + SchemaProto schema = SchemaBuilder() + .AddType(type_a) + .AddType(type_b) + .AddType(type_c) + .AddType(type_d) + .AddType(type_e) + .AddType(type_f) + .Build(); + EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), + StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT, + HasSubstr("Invalid cycle"))); +} + +TEST_P(SchemaUtilTest, ComplexCycleWithIndexableTrueNotAllowed) { + // Create a schema with the following dependent relation: + // A + // / ^ + // v \ + // B ---> E + // / \ ^ + // v v \ + // C D --> F + // + // Cycles: A-B-E-A, A-B-D-F-E-A. + // B->E has index_nested_properties=false, so the first cycle is allowed. + // All edges on the second cycle are nested_indexable, so the second cycle is + // not allowed + SchemaTypeConfigProto type_a = + SchemaTypeConfigBuilder() + .SetType("A") + .AddProperty( + PropertyConfigBuilder() + .SetName("b") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("B", /*index_nested_properties=*/true)) + .Build(); + SchemaTypeConfigProto type_b = + SchemaTypeConfigBuilder() + .SetType("B") + .AddProperty( + PropertyConfigBuilder() + .SetName("c") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("C", /*index_nested_properties=*/false)) + .AddProperty( + PropertyConfigBuilder() + .SetName("d") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("D", /*index_nested_properties=*/true)) + .AddProperty( + PropertyConfigBuilder() + .SetName("e") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("E", /*index_nested_properties=*/false)) + .Build(); + SchemaTypeConfigProto type_c = + SchemaTypeConfigBuilder() + .SetType("C") + .AddProperty( + PropertyConfigBuilder() + .SetName("joinableProp") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeJoinableString(JOINABLE_VALUE_TYPE_QUALIFIED_ID)) + .Build(); + SchemaTypeConfigProto type_d = + SchemaTypeConfigBuilder() + .SetType("D") + .AddProperty( + PropertyConfigBuilder() + .SetName("f") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("F", /*index_nested_properties=*/true)) + .Build(); + SchemaTypeConfigProto type_e = + SchemaTypeConfigBuilder() + .SetType("E") + .AddProperty( + PropertyConfigBuilder() + .SetName("a") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("A", /*index_nested_properties=*/true)) + .Build(); + SchemaTypeConfigProto type_f = + SchemaTypeConfigBuilder() + .SetType("F") + .AddProperty( + PropertyConfigBuilder() + .SetName("e") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("E", /*index_nested_properties=*/true)) + .Build(); + + SchemaProto schema = SchemaBuilder() + .AddType(type_a) + .AddType(type_b) + .AddType(type_c) + .AddType(type_d) + .AddType(type_e) + .AddType(type_f) + .Build(); + EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), + StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT, + HasSubstr("Invalid cycle"))); +} + +TEST_P(SchemaUtilTest, InheritanceAndNestedTypeRelations_noCycle) { + if (GetParam() != true) { + GTEST_SKIP() << "This is an invalid cycle if circular schema definitions " + "are not allowed."; + } + + // Create a schema with the following relations: + // index_nested_properties definition: + // 1. Nested-type relations: + // A -(true)-> B -(true)-> C + // (false)| (false)/ \(false) + // B B C + // The properties in the second row are required for B and C to be + // compatible with their parents. index_nested_properties must be false in + // these properties so that no invalid cycle can be formed because of these + // self reference. + // + // 2. Inheritance relations: + // C -> B -> A (A is a parent of B, which is a parent of C) + // + // These two relations are separate and do not affect each other. In this + // case there is no cycle. + SchemaTypeConfigProto type_a = + SchemaTypeConfigBuilder() + .SetType("A") + .AddProperty( + PropertyConfigBuilder() + .SetName("b") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("B", /*index_nested_properties=*/true)) + .Build(); + SchemaTypeConfigProto type_b = + SchemaTypeConfigBuilder() + .SetType("B") + .AddParentType("A") + .AddProperty( + PropertyConfigBuilder() + .SetName("b") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("B", /*index_nested_properties=*/false)) + .AddProperty( + PropertyConfigBuilder() + .SetName("c") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("C", /*index_nested_properties=*/true)) + .Build(); + SchemaTypeConfigProto type_c = + SchemaTypeConfigBuilder() + .SetType("C") + .AddParentType("B") + .AddProperty( + PropertyConfigBuilder() + .SetName("b") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("B", /*index_nested_properties=*/false)) + .AddProperty( + PropertyConfigBuilder() + .SetName("c") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("C", /*index_nested_properties=*/false)) + .AddProperty( + PropertyConfigBuilder() + .SetName("prop") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE)) + .Build(); + + SchemaProto schema = + SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build(); + ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map, + SchemaUtil::Validate(schema, GetParam())); + EXPECT_THAT(d_map, SizeIs(3)); + // Both A-B and A-C are inheritance relations. + EXPECT_THAT(d_map["A"], + UnorderedElementsAre(Pair("B", IsEmpty()), Pair("C", IsEmpty()))); + // B-A and B-B are nested-type relations, B-C is both a nested-type and an + // inheritance relation. + EXPECT_THAT(d_map["B"], + UnorderedElementsAre( + Pair("A", UnorderedElementsAre( + Pointee(EqualsProto(type_a.properties(0))))), + Pair("B", UnorderedElementsAre( + Pointee(EqualsProto(type_b.properties(0))))), + Pair("C", UnorderedElementsAre( + Pointee(EqualsProto(type_c.properties(0))))))); + // C-C, C-B and C-A are all nested-type relations. + EXPECT_THAT(d_map["C"], + UnorderedElementsAre( + Pair("B", UnorderedElementsAre( + Pointee(EqualsProto(type_b.properties(1))))), + Pair("C", UnorderedElementsAre( + Pointee(EqualsProto(type_c.properties(1))))), + Pair("A", IsEmpty()))); + + ICING_ASSERT_OK_AND_ASSIGN( + SchemaUtil::InheritanceMap i_map, + SchemaUtil::BuildTransitiveInheritanceGraph(schema)); + EXPECT_THAT(i_map, SizeIs(2)); + EXPECT_THAT(i_map["A"], + UnorderedElementsAre(Pair("B", IsTrue()), Pair("C", IsFalse()))); + EXPECT_THAT(i_map["B"], UnorderedElementsAre(Pair("C", IsTrue()))); +} + +TEST_P(SchemaUtilTest, InheritanceAndNestedTypeRelations_nestedTypeCycle) { + // Create a schema with the following relations: + // index_nested_properties definition: + // 1. Nested-type relations: + // A -(true)-> B -(true)-> C + // (true)| (false)/ \(false) + // B B C + // + // 2. Inheritance relations: + // C -> B -> A (A is a parent of B, which is a parent of C) + // + // These two relations are separate and do not affect each other, but there is + // a cycle in nested-type relations: B - B + SchemaTypeConfigProto type_a = + SchemaTypeConfigBuilder() + .SetType("A") + .AddProperty( + PropertyConfigBuilder() + .SetName("b") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("B", /*index_nested_properties=*/true)) + .Build(); + SchemaTypeConfigProto type_b = + SchemaTypeConfigBuilder() + .SetType("B") + .AddParentType("A") + .AddProperty( + PropertyConfigBuilder() + .SetName("b") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("B", /*index_nested_properties=*/true)) + .AddProperty( + PropertyConfigBuilder() + .SetName("c") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("C", /*index_nested_properties=*/true)) + .Build(); + SchemaTypeConfigProto type_c = + SchemaTypeConfigBuilder() + .SetType("C") + .AddParentType("B") + .AddProperty( + PropertyConfigBuilder() + .SetName("b") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("B", /*index_nested_properties=*/false)) + .AddProperty( + PropertyConfigBuilder() + .SetName("c") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("C", /*index_nested_properties=*/false)) + .AddProperty( + PropertyConfigBuilder() + .SetName("prop") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE)) + .Build(); + + SchemaProto schema = + SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build(); + EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), + StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT, + HasSubstr("Invalid cycle"))); +} + +TEST_P(SchemaUtilTest, InheritanceAndNestedTypeRelations_inheritanceCycle) { + // Create a schema with the following relations: + // index_nested_properties definition: + // 1. Nested-type relations: + // A -(true)-> B -(true)-> C + // (false)| (false)/ \(false) + // B B C + // + // 2. Inheritance relations: + // C -> B -> A -> B (A is a parent of B, which is a parent of C and A) + // + // These two relations are separate and do not affect each other, but there is + // a cycle in inheritance relation: B - A - B + SchemaTypeConfigProto type_a = + SchemaTypeConfigBuilder() + .SetType("A") + .AddParentType("B") + .AddProperty( + PropertyConfigBuilder() + .SetName("b") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("B", /*index_nested_properties=*/true)) + .Build(); + SchemaTypeConfigProto type_b = + SchemaTypeConfigBuilder() + .SetType("B") + .AddParentType("A") + .AddProperty( + PropertyConfigBuilder() + .SetName("b") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("B", /*index_nested_properties=*/false)) + .AddProperty( + PropertyConfigBuilder() + .SetName("c") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("C", /*index_nested_properties=*/true)) + .Build(); + SchemaTypeConfigProto type_c = + SchemaTypeConfigBuilder() + .SetType("C") + .AddParentType("B") + .AddProperty( + PropertyConfigBuilder() + .SetName("b") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("B", /*index_nested_properties=*/false)) + .AddProperty( + PropertyConfigBuilder() + .SetName("c") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("C", /*index_nested_properties=*/false)) + .AddProperty( + PropertyConfigBuilder() + .SetName("prop") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_NONE)) + .Build(); + + SchemaProto schema = + SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build(); + EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), + StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT, + HasSubstr("inherits from itself"))); +} + +TEST_P(SchemaUtilTest, NonExistentType) { // Create a schema with the following dependent relation: // A - B - C - X (does not exist) SchemaTypeConfigProto type_a = @@ -464,25 +1719,121 @@ TEST(SchemaUtilTest, NonExistentType) { SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build(); - EXPECT_THAT(SchemaUtil::Validate(schema), + EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); } -TEST(SchemaUtilTest, SimpleInheritance) { +TEST_P(SchemaUtilTest, SingleTypeIsBothDirectAndIndirectDependent) { + // Create a schema with the following dependent relation, all of which are via + // nested document. In this case, C is both a direct dependent and an indirect + // dependent of A. + // A + // | \ + // | B + // | / + // C + SchemaTypeConfigProto type_a = SchemaTypeConfigBuilder().SetType("A").Build(); + SchemaTypeConfigProto type_b = + SchemaTypeConfigBuilder() + .SetType("B") + .AddProperty( + PropertyConfigBuilder() + .SetName("a") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("A", /*index_nested_properties=*/true)) + .Build(); + SchemaTypeConfigProto type_c = + SchemaTypeConfigBuilder() + .SetType("C") + .AddProperty( + PropertyConfigBuilder() + .SetName("a") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("A", /*index_nested_properties=*/true)) + .AddProperty( + PropertyConfigBuilder() + .SetName("b") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("B", /*index_nested_properties=*/true)) + .Build(); + + SchemaProto schema = + SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build(); + ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map, + SchemaUtil::Validate(schema, GetParam())); + EXPECT_THAT(d_map, SizeIs(2)); + EXPECT_THAT(d_map["A"], + UnorderedElementsAre( + Pair("B", UnorderedElementsAre( + Pointee(EqualsProto(type_b.properties(0))))), + Pair("C", UnorderedElementsAre( + Pointee(EqualsProto(type_c.properties(0))))))); + EXPECT_THAT(d_map["B"], UnorderedElementsAre(Pair( + "C", UnorderedElementsAre(Pointee( + EqualsProto(type_c.properties(1))))))); + + ICING_ASSERT_OK_AND_ASSIGN( + SchemaUtil::InheritanceMap i_map, + SchemaUtil::BuildTransitiveInheritanceGraph(schema)); + EXPECT_THAT(i_map, IsEmpty()); +} + +TEST_P(SchemaUtilTest, SimpleInheritance) { // Create a schema with the following inheritance relation: // A <- B SchemaTypeConfigProto type_a = SchemaTypeConfigBuilder().SetType("A").Build(); SchemaTypeConfigProto type_b = - SchemaTypeConfigBuilder().SetType("B").SetParentType("A").Build(); + SchemaTypeConfigBuilder().SetType("B").AddParentType("A").Build(); SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build(); ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map, - SchemaUtil::Validate(schema)); + SchemaUtil::Validate(schema, GetParam())); EXPECT_THAT(d_map, SizeIs(1)); EXPECT_THAT(d_map["A"], UnorderedElementsAre(Pair("B", IsEmpty()))); + + ICING_ASSERT_OK_AND_ASSIGN( + SchemaUtil::InheritanceMap i_map, + SchemaUtil::BuildTransitiveInheritanceGraph(schema)); + EXPECT_THAT(i_map, SizeIs(1)); + EXPECT_THAT(i_map["A"], UnorderedElementsAre(Pair("B", IsTrue()))); +} + +TEST_P(SchemaUtilTest, SingleInheritanceTypeIsBothDirectAndIndirectChild) { + // Create a schema with the following inheritance relation. In this case, C is + // both a direct and an indirect child of A. + // A + // | \ + // | B + // | / + // C + SchemaTypeConfigProto type_a = SchemaTypeConfigBuilder().SetType("A").Build(); + SchemaTypeConfigProto type_b = + SchemaTypeConfigBuilder().SetType("B").AddParentType("A").Build(); + SchemaTypeConfigProto type_c = SchemaTypeConfigBuilder() + .SetType("C") + .AddParentType("A") + .AddParentType("B") + .Build(); + + SchemaProto schema = + SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build(); + ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map, + SchemaUtil::Validate(schema, GetParam())); + EXPECT_THAT(d_map, SizeIs(2)); + EXPECT_THAT(d_map["A"], + UnorderedElementsAre(Pair("B", IsEmpty()), Pair("C", IsEmpty()))); + EXPECT_THAT(d_map["B"], UnorderedElementsAre(Pair("C", IsEmpty()))); + + ICING_ASSERT_OK_AND_ASSIGN( + SchemaUtil::InheritanceMap i_map, + SchemaUtil::BuildTransitiveInheritanceGraph(schema)); + EXPECT_THAT(i_map, SizeIs(2)); + EXPECT_THAT(i_map["A"], + UnorderedElementsAre(Pair("B", IsTrue()), Pair("C", IsTrue()))); + EXPECT_THAT(i_map["B"], UnorderedElementsAre(Pair("C", IsTrue()))); } -TEST(SchemaUtilTest, ComplexInheritance) { +TEST_P(SchemaUtilTest, ComplexInheritance) { // Create a schema with the following inheritance relation: // A // / \ @@ -493,15 +1844,15 @@ TEST(SchemaUtilTest, ComplexInheritance) { // F SchemaTypeConfigProto type_a = SchemaTypeConfigBuilder().SetType("A").Build(); SchemaTypeConfigProto type_b = - SchemaTypeConfigBuilder().SetType("B").SetParentType("A").Build(); + SchemaTypeConfigBuilder().SetType("B").AddParentType("A").Build(); SchemaTypeConfigProto type_c = - SchemaTypeConfigBuilder().SetType("C").SetParentType("B").Build(); + SchemaTypeConfigBuilder().SetType("C").AddParentType("B").Build(); SchemaTypeConfigProto type_d = - SchemaTypeConfigBuilder().SetType("D").SetParentType("B").Build(); + SchemaTypeConfigBuilder().SetType("D").AddParentType("B").Build(); SchemaTypeConfigProto type_e = - SchemaTypeConfigBuilder().SetType("E").SetParentType("A").Build(); + SchemaTypeConfigBuilder().SetType("E").AddParentType("A").Build(); SchemaTypeConfigProto type_f = - SchemaTypeConfigBuilder().SetType("F").SetParentType("D").Build(); + SchemaTypeConfigBuilder().SetType("F").AddParentType("D").Build(); SchemaProto schema = SchemaBuilder() .AddType(type_a) @@ -512,7 +1863,7 @@ TEST(SchemaUtilTest, ComplexInheritance) { .AddType(type_f) .Build(); ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map, - SchemaUtil::Validate(schema)); + SchemaUtil::Validate(schema, GetParam())); EXPECT_THAT(d_map, SizeIs(3)); EXPECT_THAT(d_map["A"], UnorderedElementsAre(Pair("B", IsEmpty()), Pair("C", IsEmpty()), @@ -522,56 +1873,69 @@ TEST(SchemaUtilTest, ComplexInheritance) { UnorderedElementsAre(Pair("C", IsEmpty()), Pair("D", IsEmpty()), Pair("F", IsEmpty()))); EXPECT_THAT(d_map["D"], UnorderedElementsAre(Pair("F", IsEmpty()))); + + ICING_ASSERT_OK_AND_ASSIGN( + SchemaUtil::InheritanceMap i_map, + SchemaUtil::BuildTransitiveInheritanceGraph(schema)); + EXPECT_THAT(i_map, SizeIs(3)); + EXPECT_THAT(i_map["A"], + UnorderedElementsAre(Pair("B", IsTrue()), Pair("C", IsFalse()), + Pair("D", IsFalse()), Pair("E", IsTrue()), + Pair("F", IsFalse()))); + EXPECT_THAT(i_map["B"], + UnorderedElementsAre(Pair("C", IsTrue()), Pair("D", IsTrue()), + Pair("F", IsFalse()))); + EXPECT_THAT(i_map["D"], UnorderedElementsAre(Pair("F", IsTrue()))); } -TEST(SchemaUtilTest, InheritanceCycle) { +TEST_P(SchemaUtilTest, InheritanceCycle) { // Create a schema with the following inheritance relation: // C <- A <- B <- C SchemaTypeConfigProto type_a = - SchemaTypeConfigBuilder().SetType("A").SetParentType("C").Build(); + SchemaTypeConfigBuilder().SetType("A").AddParentType("C").Build(); SchemaTypeConfigProto type_b = - SchemaTypeConfigBuilder().SetType("B").SetParentType("A").Build(); + SchemaTypeConfigBuilder().SetType("B").AddParentType("A").Build(); SchemaTypeConfigProto type_c = - SchemaTypeConfigBuilder().SetType("C").SetParentType("B").Build(); + SchemaTypeConfigBuilder().SetType("C").AddParentType("B").Build(); SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build(); - EXPECT_THAT(SchemaUtil::Validate(schema), + EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); } -TEST(SchemaUtilTest, SelfInheritance) { +TEST_P(SchemaUtilTest, SelfInheritance) { SchemaTypeConfigProto type_a = - SchemaTypeConfigBuilder().SetType("A").SetParentType("A").Build(); + SchemaTypeConfigBuilder().SetType("A").AddParentType("A").Build(); SchemaProto schema = SchemaBuilder().AddType(type_a).Build(); - EXPECT_THAT(SchemaUtil::Validate(schema), + EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); } -TEST(SchemaUtilTest, NonExistentParentType) { +TEST_P(SchemaUtilTest, NonExistentParentType) { // Create a schema with the following inheritance relation: // (does not exist) X <- A <- B <- C SchemaTypeConfigProto type_a = - SchemaTypeConfigBuilder().SetType("A").SetParentType("X").Build(); + SchemaTypeConfigBuilder().SetType("A").AddParentType("X").Build(); SchemaTypeConfigProto type_b = - SchemaTypeConfigBuilder().SetType("B").SetParentType("A").Build(); + SchemaTypeConfigBuilder().SetType("B").AddParentType("A").Build(); SchemaTypeConfigProto type_c = - SchemaTypeConfigBuilder().SetType("C").SetParentType("B").Build(); + SchemaTypeConfigBuilder().SetType("C").AddParentType("B").Build(); SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build(); - EXPECT_THAT(SchemaUtil::Validate(schema), + EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); } -TEST(SchemaUtilTest, SimpleInheritanceWithNestedType) { +TEST_P(SchemaUtilTest, SimpleInheritanceWithNestedType) { // Create a schema with the following dependent relation: // A - B (via inheritance) // B - C (via nested document) SchemaTypeConfigProto type_a = SchemaTypeConfigBuilder().SetType("A").Build(); SchemaTypeConfigProto type_b = - SchemaTypeConfigBuilder().SetType("B").SetParentType("A").Build(); + SchemaTypeConfigBuilder().SetType("B").AddParentType("A").Build(); SchemaTypeConfigProto type_c = SchemaTypeConfigBuilder() .SetType("C") @@ -585,16 +1949,22 @@ TEST(SchemaUtilTest, SimpleInheritanceWithNestedType) { SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build(); ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map, - SchemaUtil::Validate(schema)); + SchemaUtil::Validate(schema, GetParam())); EXPECT_THAT(d_map, SizeIs(2)); - EXPECT_THAT(d_map["A"], - UnorderedElementsAre(Pair("B", IsEmpty()), Pair("C", IsEmpty()))); + // Nested-type dependency and inheritance dependencies are not transitive. + EXPECT_THAT(d_map["A"], UnorderedElementsAre(Pair("B", IsEmpty()))); EXPECT_THAT(d_map["B"], UnorderedElementsAre(Pair( "C", UnorderedElementsAre(Pointee( EqualsProto(type_c.properties(0))))))); + + ICING_ASSERT_OK_AND_ASSIGN( + SchemaUtil::InheritanceMap i_map, + SchemaUtil::BuildTransitiveInheritanceGraph(schema)); + EXPECT_THAT(i_map, SizeIs(1)); + EXPECT_THAT(i_map["A"], UnorderedElementsAre(Pair("B", IsTrue()))); } -TEST(SchemaUtilTest, ComplexInheritanceWithNestedType) { +TEST_P(SchemaUtilTest, ComplexInheritanceWithNestedType) { // Create a schema with the following dependent relation: // A // / \ @@ -611,9 +1981,9 @@ TEST(SchemaUtilTest, ComplexInheritanceWithNestedType) { // F has a nested document of type D SchemaTypeConfigProto type_a = SchemaTypeConfigBuilder().SetType("A").Build(); SchemaTypeConfigProto type_b = - SchemaTypeConfigBuilder().SetType("B").SetParentType("A").Build(); + SchemaTypeConfigBuilder().SetType("B").AddParentType("A").Build(); SchemaTypeConfigProto type_c = - SchemaTypeConfigBuilder().SetType("C").SetParentType("B").Build(); + SchemaTypeConfigBuilder().SetType("C").AddParentType("B").Build(); SchemaTypeConfigProto type_d = SchemaTypeConfigBuilder() .SetType("D") @@ -651,15 +2021,13 @@ TEST(SchemaUtilTest, ComplexInheritanceWithNestedType) { .AddType(type_f) .Build(); ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map, - SchemaUtil::Validate(schema)); + SchemaUtil::Validate(schema, GetParam())); EXPECT_THAT(d_map, SizeIs(3)); EXPECT_THAT( d_map["A"], - UnorderedElementsAre( - Pair("B", IsEmpty()), Pair("C", IsEmpty()), Pair("D", IsEmpty()), - Pair("E", UnorderedElementsAre( - Pointee(EqualsProto(type_e.properties(0))))), - Pair("F", IsEmpty()))); + UnorderedElementsAre(Pair("B", IsEmpty()), Pair("C", IsEmpty()), + Pair("E", UnorderedElementsAre(Pointee( + EqualsProto(type_e.properties(0))))))); EXPECT_THAT( d_map["B"], UnorderedElementsAre(Pair("C", IsEmpty()), @@ -669,9 +2037,17 @@ TEST(SchemaUtilTest, ComplexInheritanceWithNestedType) { EXPECT_THAT(d_map["D"], UnorderedElementsAre(Pair( "F", UnorderedElementsAre(Pointee( EqualsProto(type_f.properties(0))))))); + + ICING_ASSERT_OK_AND_ASSIGN( + SchemaUtil::InheritanceMap i_map, + SchemaUtil::BuildTransitiveInheritanceGraph(schema)); + EXPECT_THAT(i_map, SizeIs(2)); + EXPECT_THAT(i_map["A"], + UnorderedElementsAre(Pair("B", IsTrue()), Pair("C", IsFalse()))); + EXPECT_THAT(i_map["B"], UnorderedElementsAre(Pair("C", IsTrue()))); } -TEST(SchemaUtilTest, InheritanceWithNestedTypeCycle) { +TEST_P(SchemaUtilTest, InheritanceWithNestedTypeCycle) { // Create a schema that A and B depend on each other, in the sense that B // extends A but A has a nested document of type B. SchemaTypeConfigProto type_a = @@ -684,19 +2060,19 @@ TEST(SchemaUtilTest, InheritanceWithNestedTypeCycle) { .SetDataTypeDocument("B", /*index_nested_properties=*/true)) .Build(); SchemaTypeConfigProto type_b = - SchemaTypeConfigBuilder().SetType("B").SetParentType("A").Build(); + SchemaTypeConfigBuilder().SetType("B").AddParentType("A").Build(); SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build(); - EXPECT_THAT(SchemaUtil::Validate(schema), + EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); } -TEST(SchemaUtilTest, EmptySchemaProtoIsValid) { +TEST_P(SchemaUtilTest, EmptySchemaProtoIsValid) { SchemaProto schema; - ICING_ASSERT_OK(SchemaUtil::Validate(schema)); + ICING_ASSERT_OK(SchemaUtil::Validate(schema, GetParam())); } -TEST(SchemaUtilTest, Valid_Nested) { +TEST_P(SchemaUtilTest, Valid_Nested) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder() @@ -719,43 +2095,43 @@ TEST(SchemaUtilTest, Valid_Nested) { .SetCardinality(CARDINALITY_REQUIRED))) .Build(); - ICING_ASSERT_OK(SchemaUtil::Validate(schema)); + ICING_ASSERT_OK(SchemaUtil::Validate(schema, GetParam())); } -TEST(SchemaUtilTest, ClearedPropertyConfigsIsValid) { +TEST_P(SchemaUtilTest, ClearedPropertyConfigsIsValid) { // No property fields is technically ok, but probably not realistic. SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType(kEmailType)) .Build(); - ICING_ASSERT_OK(SchemaUtil::Validate(schema)); + ICING_ASSERT_OK(SchemaUtil::Validate(schema, GetParam())); } -TEST(SchemaUtilTest, ClearedSchemaTypeIsInvalid) { +TEST_P(SchemaUtilTest, ClearedSchemaTypeIsInvalid) { SchemaProto schema = SchemaBuilder().AddType(SchemaTypeConfigBuilder()).Build(); - ASSERT_THAT(SchemaUtil::Validate(schema), + ASSERT_THAT(SchemaUtil::Validate(schema, GetParam()), StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); } -TEST(SchemaUtilTest, EmptySchemaTypeIsInvalid) { +TEST_P(SchemaUtilTest, EmptySchemaTypeIsInvalid) { SchemaProto schema = SchemaBuilder().AddType(SchemaTypeConfigBuilder().SetType("")).Build(); - ASSERT_THAT(SchemaUtil::Validate(schema), + ASSERT_THAT(SchemaUtil::Validate(schema, GetParam()), StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); } -TEST(SchemaUtilTest, AnySchemaTypeOk) { +TEST_P(SchemaUtilTest, AnySchemaTypeOk) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType( "abc123!@#$%^&*()_-+=[{]}|\\;:'\",<.>?你好")) .Build(); - ICING_ASSERT_OK(SchemaUtil::Validate(schema)); + ICING_ASSERT_OK(SchemaUtil::Validate(schema, GetParam())); } -TEST(SchemaUtilTest, ClearedPropertyNameIsInvalid) { +TEST_P(SchemaUtilTest, ClearedPropertyNameIsInvalid) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder() @@ -766,11 +2142,11 @@ TEST(SchemaUtilTest, ClearedPropertyNameIsInvalid) { .SetCardinality(CARDINALITY_REQUIRED))) .Build(); schema.mutable_types(0)->mutable_properties(0)->clear_property_name(); - ASSERT_THAT(SchemaUtil::Validate(schema), + ASSERT_THAT(SchemaUtil::Validate(schema, GetParam()), StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); } -TEST(SchemaUtilTest, EmptyPropertyNameIsInvalid) { +TEST_P(SchemaUtilTest, EmptyPropertyNameIsInvalid) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder() @@ -781,11 +2157,11 @@ TEST(SchemaUtilTest, EmptyPropertyNameIsInvalid) { .SetCardinality(CARDINALITY_REQUIRED))) .Build(); - ASSERT_THAT(SchemaUtil::Validate(schema), + ASSERT_THAT(SchemaUtil::Validate(schema, GetParam()), StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); } -TEST(SchemaUtilTest, NonAlphanumericPropertyNameIsInvalid) { +TEST_P(SchemaUtilTest, NonAlphanumericPropertyNameIsInvalid) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder() @@ -796,11 +2172,11 @@ TEST(SchemaUtilTest, NonAlphanumericPropertyNameIsInvalid) { .SetCardinality(CARDINALITY_REQUIRED))) .Build(); - ASSERT_THAT(SchemaUtil::Validate(schema), + ASSERT_THAT(SchemaUtil::Validate(schema, GetParam()), StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); } -TEST(SchemaUtilTest, AlphanumericPropertyNameOk) { +TEST_P(SchemaUtilTest, AlphanumericPropertyNameOk) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder() @@ -811,10 +2187,10 @@ TEST(SchemaUtilTest, AlphanumericPropertyNameOk) { .SetCardinality(CARDINALITY_REQUIRED))) .Build(); - ICING_ASSERT_OK(SchemaUtil::Validate(schema)); + ICING_ASSERT_OK(SchemaUtil::Validate(schema, GetParam())); } -TEST(SchemaUtilTest, DuplicatePropertyNameIsInvalid) { +TEST_P(SchemaUtilTest, DuplicatePropertyNameIsInvalid) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder() @@ -828,11 +2204,11 @@ TEST(SchemaUtilTest, DuplicatePropertyNameIsInvalid) { .SetDataType(TYPE_STRING) .SetCardinality(CARDINALITY_REQUIRED))) .Build(); - ASSERT_THAT(SchemaUtil::Validate(schema), + ASSERT_THAT(SchemaUtil::Validate(schema, GetParam()), StatusIs(libtextclassifier3::StatusCode::ALREADY_EXISTS)); } -TEST(SchemaUtilTest, ClearedDataTypeIsInvalid) { +TEST_P(SchemaUtilTest, ClearedDataTypeIsInvalid) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder() @@ -843,11 +2219,11 @@ TEST(SchemaUtilTest, ClearedDataTypeIsInvalid) { .SetCardinality(CARDINALITY_REQUIRED))) .Build(); schema.mutable_types(0)->mutable_properties(0)->clear_data_type(); - ASSERT_THAT(SchemaUtil::Validate(schema), + ASSERT_THAT(SchemaUtil::Validate(schema, GetParam()), StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); } -TEST(SchemaUtilTest, UnknownDataTypeIsInvalid) { +TEST_P(SchemaUtilTest, UnknownDataTypeIsInvalid) { SchemaProto schema = SchemaBuilder() .AddType( @@ -859,11 +2235,11 @@ TEST(SchemaUtilTest, UnknownDataTypeIsInvalid) { .SetDataType(PropertyConfigProto::DataType::UNKNOWN) .SetCardinality(CARDINALITY_REQUIRED))) .Build(); - ASSERT_THAT(SchemaUtil::Validate(schema), + ASSERT_THAT(SchemaUtil::Validate(schema, GetParam()), StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); } -TEST(SchemaUtilTest, ClearedCardinalityIsInvalid) { +TEST_P(SchemaUtilTest, ClearedCardinalityIsInvalid) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder() @@ -874,11 +2250,11 @@ TEST(SchemaUtilTest, ClearedCardinalityIsInvalid) { .SetCardinality(CARDINALITY_REQUIRED))) .Build(); schema.mutable_types(0)->mutable_properties(0)->clear_cardinality(); - ASSERT_THAT(SchemaUtil::Validate(schema), + ASSERT_THAT(SchemaUtil::Validate(schema, GetParam()), StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); } -TEST(SchemaUtilTest, UnknownCardinalityIsInvalid) { +TEST_P(SchemaUtilTest, UnknownCardinalityIsInvalid) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder() @@ -888,11 +2264,11 @@ TEST(SchemaUtilTest, UnknownCardinalityIsInvalid) { .SetDataType(TYPE_STRING) .SetCardinality(CARDINALITY_UNKNOWN))) .Build(); - ASSERT_THAT(SchemaUtil::Validate(schema), + ASSERT_THAT(SchemaUtil::Validate(schema, GetParam()), StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); } -TEST(SchemaUtilTest, ClearedPropertySchemaTypeIsInvalid) { +TEST_P(SchemaUtilTest, ClearedPropertySchemaTypeIsInvalid) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder() @@ -902,11 +2278,11 @@ TEST(SchemaUtilTest, ClearedPropertySchemaTypeIsInvalid) { .SetDataType(TYPE_DOCUMENT) .SetCardinality(CARDINALITY_REPEATED))) .Build(); - ASSERT_THAT(SchemaUtil::Validate(schema), + ASSERT_THAT(SchemaUtil::Validate(schema, GetParam()), StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); } -TEST(SchemaUtilTest, Invalid_EmptyPropertySchemaType) { +TEST_P(SchemaUtilTest, Invalid_EmptyPropertySchemaType) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder() @@ -919,11 +2295,11 @@ TEST(SchemaUtilTest, Invalid_EmptyPropertySchemaType) { .SetCardinality(CARDINALITY_REQUIRED))) .Build(); - ASSERT_THAT(SchemaUtil::Validate(schema), + ASSERT_THAT(SchemaUtil::Validate(schema, GetParam()), StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); } -TEST(SchemaUtilTest, NoMatchingSchemaTypeIsInvalid) { +TEST_P(SchemaUtilTest, NoMatchingSchemaTypeIsInvalid) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder() @@ -936,12 +2312,12 @@ TEST(SchemaUtilTest, NoMatchingSchemaTypeIsInvalid) { .SetCardinality(CARDINALITY_REQUIRED))) .Build(); - ASSERT_THAT(SchemaUtil::Validate(schema), + ASSERT_THAT(SchemaUtil::Validate(schema, GetParam()), StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT, HasSubstr("Undefined 'schema_type'"))); } -TEST(SchemaUtilTest, NewOptionalPropertyIsCompatible) { +TEST_P(SchemaUtilTest, NewOptionalPropertyIsCompatible) { // Configure old schema SchemaProto old_schema = SchemaBuilder() @@ -977,7 +2353,7 @@ TEST(SchemaUtilTest, NewOptionalPropertyIsCompatible) { Eq(schema_delta)); } -TEST(SchemaUtilTest, NewRequiredPropertyIsIncompatible) { +TEST_P(SchemaUtilTest, NewRequiredPropertyIsIncompatible) { // Configure old schema SchemaProto old_schema = SchemaBuilder() @@ -1013,7 +2389,7 @@ TEST(SchemaUtilTest, NewRequiredPropertyIsIncompatible) { Eq(schema_delta)); } -TEST(SchemaUtilTest, NewSchemaMissingPropertyIsIncompatible) { +TEST_P(SchemaUtilTest, NewSchemaMissingPropertyIsIncompatible) { // Configure old schema SchemaProto old_schema = SchemaBuilder() @@ -1049,7 +2425,7 @@ TEST(SchemaUtilTest, NewSchemaMissingPropertyIsIncompatible) { Eq(schema_delta)); } -TEST(SchemaUtilTest, CompatibilityOfDifferentCardinalityOk) { +TEST_P(SchemaUtilTest, CompatibilityOfDifferentCardinalityOk) { // Configure less restrictive schema based on cardinality SchemaProto less_restrictive_schema = SchemaBuilder() @@ -1091,7 +2467,7 @@ TEST(SchemaUtilTest, CompatibilityOfDifferentCardinalityOk) { Eq(compatible_schema_delta)); } -TEST(SchemaUtilTest, DifferentDataTypeIsIncompatible) { +TEST_P(SchemaUtilTest, DifferentDataTypeIsIncompatible) { // Configure old schema, with an int64_t property SchemaProto old_schema = SchemaBuilder() @@ -1122,7 +2498,7 @@ TEST(SchemaUtilTest, DifferentDataTypeIsIncompatible) { Eq(schema_delta)); } -TEST(SchemaUtilTest, DifferentSchemaTypeIsIncompatible) { +TEST_P(SchemaUtilTest, DifferentSchemaTypeIsIncompatible) { // Configure old schema, where Property is supposed to be a Person type SchemaProto old_schema = SchemaBuilder() @@ -1186,7 +2562,7 @@ TEST(SchemaUtilTest, DifferentSchemaTypeIsIncompatible) { EXPECT_THAT(actual.schema_types_deleted, testing::IsEmpty()); } -TEST(SchemaUtilTest, ChangingIndexedStringPropertiesMakesIndexIncompatible) { +TEST_P(SchemaUtilTest, ChangingIndexedStringPropertiesMakesIndexIncompatible) { // Configure old schema SchemaProto schema_with_indexed_property = SchemaBuilder() @@ -1228,7 +2604,7 @@ TEST(SchemaUtilTest, ChangingIndexedStringPropertiesMakesIndexIncompatible) { Eq(schema_delta)); } -TEST(SchemaUtilTest, AddingNewIndexedStringPropertyMakesIndexIncompatible) { +TEST_P(SchemaUtilTest, AddingNewIndexedStringPropertyMakesIndexIncompatible) { // Configure old schema SchemaProto old_schema = SchemaBuilder() @@ -1266,8 +2642,8 @@ TEST(SchemaUtilTest, AddingNewIndexedStringPropertyMakesIndexIncompatible) { Eq(schema_delta)); } -TEST(SchemaUtilTest, - AddingNewNonIndexedStringPropertyShouldRemainIndexCompatible) { +TEST_P(SchemaUtilTest, + AddingNewNonIndexedStringPropertyShouldRemainIndexCompatible) { // Configure old schema SchemaProto old_schema = SchemaBuilder() @@ -1304,7 +2680,7 @@ TEST(SchemaUtilTest, IsEmpty()); } -TEST(SchemaUtilTest, ChangingIndexedIntegerPropertiesMakesIndexIncompatible) { +TEST_P(SchemaUtilTest, ChangingIndexedIntegerPropertiesMakesIndexIncompatible) { // Configure old schema SchemaProto schema_with_indexed_property = SchemaBuilder() @@ -1344,7 +2720,7 @@ TEST(SchemaUtilTest, ChangingIndexedIntegerPropertiesMakesIndexIncompatible) { Eq(schema_delta)); } -TEST(SchemaUtilTest, AddingNewIndexedIntegerPropertyMakesIndexIncompatible) { +TEST_P(SchemaUtilTest, AddingNewIndexedIntegerPropertyMakesIndexIncompatible) { // Configure old schema SchemaProto old_schema = SchemaBuilder() @@ -1379,8 +2755,8 @@ TEST(SchemaUtilTest, AddingNewIndexedIntegerPropertyMakesIndexIncompatible) { Eq(schema_delta)); } -TEST(SchemaUtilTest, - AddingNewNonIndexedIntegerPropertyShouldRemainIndexCompatible) { +TEST_P(SchemaUtilTest, + AddingNewNonIndexedIntegerPropertyShouldRemainIndexCompatible) { // Configure old schema SchemaProto old_schema = SchemaBuilder() @@ -1414,7 +2790,7 @@ TEST(SchemaUtilTest, IsEmpty()); } -TEST(SchemaUtilTest, ChangingJoinablePropertiesMakesJoinIncompatible) { +TEST_P(SchemaUtilTest, ChangingJoinablePropertiesMakesJoinIncompatible) { // Configure old schema SchemaProto schema_with_joinable_property = SchemaBuilder() @@ -1456,7 +2832,7 @@ TEST(SchemaUtilTest, ChangingJoinablePropertiesMakesJoinIncompatible) { Eq(expected_schema_delta)); } -TEST(SchemaUtilTest, AddingNewJoinablePropertyMakesJoinIncompatible) { +TEST_P(SchemaUtilTest, AddingNewJoinablePropertyMakesJoinIncompatible) { // Configure old schema SchemaProto old_schema = SchemaBuilder() @@ -1494,7 +2870,7 @@ TEST(SchemaUtilTest, AddingNewJoinablePropertyMakesJoinIncompatible) { Eq(expected_schema_delta)); } -TEST(SchemaUtilTest, AddingNewNonJoinablePropertyShouldRemainJoinCompatible) { +TEST_P(SchemaUtilTest, AddingNewNonJoinablePropertyShouldRemainJoinCompatible) { // Configure old schema SchemaProto old_schema = SchemaBuilder() @@ -1531,7 +2907,7 @@ TEST(SchemaUtilTest, AddingNewNonJoinablePropertyShouldRemainJoinCompatible) { IsEmpty()); } -TEST(SchemaUtilTest, AddingTypeIsCompatible) { +TEST_P(SchemaUtilTest, AddingTypeIsCompatible) { // Can add a new type, existing data isn't incompatible, since none of them // are of this new schema type SchemaProto old_schema = @@ -1571,7 +2947,7 @@ TEST(SchemaUtilTest, AddingTypeIsCompatible) { Eq(schema_delta)); } -TEST(SchemaUtilTest, DeletingTypeIsNoted) { +TEST_P(SchemaUtilTest, DeletingTypeIsNoted) { // Can't remove an old type, new schema needs to at least have all the // previously defined schema otherwise the Documents of the missing schema // are invalid @@ -1612,7 +2988,7 @@ TEST(SchemaUtilTest, DeletingTypeIsNoted) { Eq(schema_delta)); } -TEST(SchemaUtilTest, DeletingPropertyAndChangingProperty) { +TEST_P(SchemaUtilTest, DeletingPropertyAndChangingProperty) { SchemaProto old_schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder() @@ -1650,7 +3026,7 @@ TEST(SchemaUtilTest, DeletingPropertyAndChangingProperty) { EXPECT_THAT(actual, Eq(schema_delta)); } -TEST(SchemaUtilTest, IndexNestedDocumentsIndexIncompatible) { +TEST_P(SchemaUtilTest, IndexNestedDocumentsIndexIncompatible) { // Make two schemas. One that sets index_nested_properties to false and one // that sets it to true. SchemaTypeConfigProto email_type_config = @@ -1705,7 +3081,7 @@ TEST(SchemaUtilTest, IndexNestedDocumentsIndexIncompatible) { EXPECT_THAT(actual, Eq(schema_delta)); } -TEST(SchemaUtilTest, ValidateStringIndexingConfigShouldHaveTermMatchType) { +TEST_P(SchemaUtilTest, ValidateStringIndexingConfigShouldHaveTermMatchType) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("MyType").AddProperty( @@ -1716,7 +3092,7 @@ TEST(SchemaUtilTest, ValidateStringIndexingConfigShouldHaveTermMatchType) { .Build(); // Error if we don't set a term match type - EXPECT_THAT(SchemaUtil::Validate(schema), + EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); // Passes once we set a term match type @@ -1727,10 +3103,10 @@ TEST(SchemaUtilTest, ValidateStringIndexingConfigShouldHaveTermMatchType) { .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_REQUIRED))) .Build(); - EXPECT_THAT(SchemaUtil::Validate(schema), IsOk()); + EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk()); } -TEST(SchemaUtilTest, ValidateStringIndexingConfigShouldHaveTokenizer) { +TEST_P(SchemaUtilTest, ValidateStringIndexingConfigShouldHaveTokenizer) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("MyType").AddProperty( @@ -1741,7 +3117,7 @@ TEST(SchemaUtilTest, ValidateStringIndexingConfigShouldHaveTokenizer) { .Build(); // Error if we don't set a tokenizer type - EXPECT_THAT(SchemaUtil::Validate(schema), + EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); // Passes once we set a tokenizer type @@ -1752,11 +3128,11 @@ TEST(SchemaUtilTest, ValidateStringIndexingConfigShouldHaveTokenizer) { .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_REQUIRED))) .Build(); - EXPECT_THAT(SchemaUtil::Validate(schema), IsOk()); + EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk()); } -TEST(SchemaUtilTest, - ValidateJoinablePropertyTypeQualifiedIdShouldHaveStringDataType) { +TEST_P(SchemaUtilTest, + ValidateJoinablePropertyTypeQualifiedIdShouldHaveStringDataType) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("MyType").AddProperty( @@ -1769,7 +3145,7 @@ TEST(SchemaUtilTest, .Build(); // Error if data type is not STRING for qualified id joinable value type. - EXPECT_THAT(SchemaUtil::Validate(schema), + EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); // Passes once we set STRING as the data type. @@ -1782,10 +3158,11 @@ TEST(SchemaUtilTest, /*propagate_delete=*/false) .SetCardinality(CARDINALITY_REQUIRED))) .Build(); - EXPECT_THAT(SchemaUtil::Validate(schema), IsOk()); + EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk()); } -TEST(SchemaUtilTest, ValidateJoinablePropertyShouldNotHaveRepeatedCardinality) { +TEST_P(SchemaUtilTest, + ValidateJoinablePropertyShouldNotHaveRepeatedCardinality) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("MyType").AddProperty( @@ -1798,7 +3175,7 @@ TEST(SchemaUtilTest, ValidateJoinablePropertyShouldNotHaveRepeatedCardinality) { .Build(); // Error if using REPEATED cardinality for joinable property. - EXPECT_THAT(SchemaUtil::Validate(schema), + EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); // Passes once we use OPTIONAL cardinality with joinable property. @@ -1811,7 +3188,7 @@ TEST(SchemaUtilTest, ValidateJoinablePropertyShouldNotHaveRepeatedCardinality) { /*propagate_delete=*/false) .SetCardinality(CARDINALITY_OPTIONAL))) .Build(); - EXPECT_THAT(SchemaUtil::Validate(schema), IsOk()); + EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk()); // Passes once we use REQUIRED cardinality with joinable property. schema = SchemaBuilder() @@ -1823,7 +3200,7 @@ TEST(SchemaUtilTest, ValidateJoinablePropertyShouldNotHaveRepeatedCardinality) { /*propagate_delete=*/false) .SetCardinality(CARDINALITY_REQUIRED))) .Build(); - EXPECT_THAT(SchemaUtil::Validate(schema), IsOk()); + EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk()); // Passes once we use REPEATED cardinality with non-joinable property. schema = SchemaBuilder() @@ -1835,11 +3212,11 @@ TEST(SchemaUtilTest, ValidateJoinablePropertyShouldNotHaveRepeatedCardinality) { /*propagate_delete=*/false) .SetCardinality(CARDINALITY_REPEATED))) .Build(); - EXPECT_THAT(SchemaUtil::Validate(schema), IsOk()); + EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk()); } -TEST(SchemaUtilTest, - ValidateJoinablePropertyWithDeletePropagationShouldHaveTypeQualifiedId) { +TEST_P(SchemaUtilTest, + ValidateJoinablePropertyWithDeletePropagationShouldHaveTypeQualifiedId) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("MyType").AddProperty( @@ -1853,7 +3230,7 @@ TEST(SchemaUtilTest, // Error if enabling delete propagation with non qualified id joinable value // type. - EXPECT_THAT(SchemaUtil::Validate(schema), + EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); // Passes once we set qualified id joinable value type with delete propagation @@ -1867,7 +3244,7 @@ TEST(SchemaUtilTest, /*propagate_delete=*/true) .SetCardinality(CARDINALITY_REQUIRED))) .Build(); - EXPECT_THAT(SchemaUtil::Validate(schema), IsOk()); + EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk()); // Passes once we disable delete propagation. schema = SchemaBuilder() @@ -1879,11 +3256,11 @@ TEST(SchemaUtilTest, /*propagate_delete=*/false) .SetCardinality(CARDINALITY_REQUIRED))) .Build(); - EXPECT_THAT(SchemaUtil::Validate(schema), IsOk()); + EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk()); } -TEST(SchemaUtilTest, - ValidateNestedJoinablePropertyShouldNotHaveNestedRepeatedCardinality) { +TEST_P(SchemaUtilTest, + ValidateNestedJoinablePropertyShouldNotHaveNestedRepeatedCardinality) { // Dependency and nested document property cardinality: // "C" --(REPEATED)--> "B" --(OPTIONAL)--> "A" // where "A" contains joinable property. This should not be allowed. @@ -1909,7 +3286,7 @@ TEST(SchemaUtilTest, /*index_nested_properties=*/false) .SetCardinality(CARDINALITY_REPEATED))) .Build(); - EXPECT_THAT(SchemaUtil::Validate(schema), + EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); // Passes once we use non-REPEATED cardinality for "C.b", i.e. the dependency @@ -1936,10 +3313,10 @@ TEST(SchemaUtilTest, /*index_nested_properties=*/false) .SetCardinality(CARDINALITY_OPTIONAL))) .Build(); - EXPECT_THAT(SchemaUtil::Validate(schema), IsOk()); + EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk()); } -TEST( +TEST_P( SchemaUtilTest, ValidateNestedJoinablePropertyShouldAllowRepeatedCardinalityIfNoJoinableProperty) { // Dependency and nested document property cardinality: @@ -1979,11 +3356,11 @@ TEST( // Passes since nested schema type with REPEATED cardinality doesn't have // joinable property. - EXPECT_THAT(SchemaUtil::Validate(schema), IsOk()); + EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk()); } -TEST(SchemaUtilTest, - ValidateNestedJoinablePropertyMultiplePropertiesWithSameSchema) { +TEST_P(SchemaUtilTest, + ValidateNestedJoinablePropertyMultiplePropertiesWithSameSchema) { // Dependency and nested document property cardinality: // --(a1: OPTIONAL)-- // / \ @@ -2015,7 +3392,7 @@ TEST(SchemaUtilTest, /*index_nested_properties=*/false) .SetCardinality(CARDINALITY_REPEATED))) .Build(); - EXPECT_THAT(SchemaUtil::Validate(schema), + EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); // Passes once we use non-REPEATED cardinality for "B.a2", i.e. the dependency @@ -2049,10 +3426,10 @@ TEST(SchemaUtilTest, /*index_nested_properties=*/false) .SetCardinality(CARDINALITY_OPTIONAL))) .Build(); - EXPECT_THAT(SchemaUtil::Validate(schema), IsOk()); + EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk()); } -TEST(SchemaUtilTest, ValidateNestedJoinablePropertyDiamondRelationship) { +TEST_P(SchemaUtilTest, ValidateNestedJoinablePropertyDiamondRelationship) { // Dependency and nested document property cardinality: // B // / \ @@ -2100,7 +3477,7 @@ TEST(SchemaUtilTest, ValidateNestedJoinablePropertyDiamondRelationship) { /*index_nested_properties=*/false) .SetCardinality(CARDINALITY_OPTIONAL))) .Build(); - EXPECT_THAT(SchemaUtil::Validate(schema), IsOk()); + EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk()); // Fails once we change any of edge to REPEATED cardinality. // B @@ -2148,7 +3525,7 @@ TEST(SchemaUtilTest, ValidateNestedJoinablePropertyDiamondRelationship) { /*index_nested_properties=*/false) .SetCardinality(CARDINALITY_OPTIONAL))) .Build(); - EXPECT_THAT(SchemaUtil::Validate(schema), + EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); // B @@ -2196,7 +3573,7 @@ TEST(SchemaUtilTest, ValidateNestedJoinablePropertyDiamondRelationship) { /*index_nested_properties=*/false) .SetCardinality(CARDINALITY_OPTIONAL))) .Build(); - EXPECT_THAT(SchemaUtil::Validate(schema), + EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); // B @@ -2244,7 +3621,7 @@ TEST(SchemaUtilTest, ValidateNestedJoinablePropertyDiamondRelationship) { /*index_nested_properties=*/false) .SetCardinality(CARDINALITY_REPEATED))) .Build(); - EXPECT_THAT(SchemaUtil::Validate(schema), + EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); // B @@ -2292,11 +3669,11 @@ TEST(SchemaUtilTest, ValidateNestedJoinablePropertyDiamondRelationship) { /*index_nested_properties=*/false) .SetCardinality(CARDINALITY_OPTIONAL))) .Build(); - EXPECT_THAT(SchemaUtil::Validate(schema), + EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); } -TEST(SchemaUtilTest, MultipleReferencesToSameNestedSchemaOk) { +TEST_P(SchemaUtilTest, MultipleReferencesToSameNestedSchemaOk) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("InnerSchema")) @@ -2316,10 +3693,10 @@ TEST(SchemaUtilTest, MultipleReferencesToSameNestedSchemaOk) { .SetCardinality(CARDINALITY_REPEATED))) .Build(); - EXPECT_THAT(SchemaUtil::Validate(schema), IsOk()); + EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), IsOk()); } -TEST(SchemaUtilTest, InvalidSelfReference) { +TEST_P(SchemaUtilTest, InvalidSelfReference) { // Create a schema with a self-reference cycle in it: OwnSchema -> OwnSchema SchemaProto schema = SchemaBuilder() @@ -2333,12 +3710,12 @@ TEST(SchemaUtilTest, InvalidSelfReference) { .SetCardinality(CARDINALITY_OPTIONAL))) .Build(); - EXPECT_THAT(SchemaUtil::Validate(schema), + EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT, - HasSubstr("Infinite loop"))); + HasSubstr("Invalid cycle"))); } -TEST(SchemaUtilTest, InvalidSelfReferenceEvenWithOtherProperties) { +TEST_P(SchemaUtilTest, InvalidSelfReferenceEvenWithOtherProperties) { // Create a schema with a self-reference cycle in it: OwnSchema -> OwnSchema SchemaProto schema = SchemaBuilder() @@ -2357,12 +3734,12 @@ TEST(SchemaUtilTest, InvalidSelfReferenceEvenWithOtherProperties) { .SetCardinality(CARDINALITY_OPTIONAL))) .Build(); - EXPECT_THAT(SchemaUtil::Validate(schema), + EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT, - HasSubstr("Infinite loop"))); + HasSubstr("Invalid cycle"))); } -TEST(SchemaUtilTest, InvalidInfiniteLoopTwoDegrees) { +TEST_P(SchemaUtilTest, InvalidInfiniteLoopTwoDegrees) { // Create a schema for the outer schema SchemaProto schema = SchemaBuilder() @@ -2379,7 +3756,7 @@ TEST(SchemaUtilTest, InvalidInfiniteLoopTwoDegrees) { .AddType( SchemaTypeConfigBuilder() .SetType("B") - // Reference the schema A, causing an infinite loop of + // Reference the schema A, causing an invalid cycle of // references. .AddProperty(PropertyConfigBuilder() .SetName("NestedDocument") @@ -2389,12 +3766,12 @@ TEST(SchemaUtilTest, InvalidInfiniteLoopTwoDegrees) { .Build(); // Two degrees of referencing: A -> B -> A - EXPECT_THAT(SchemaUtil::Validate(schema), + EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT, - HasSubstr("Infinite loop"))); + HasSubstr("Invalid cycle"))); } -TEST(SchemaUtilTest, InvalidInfiniteLoopThreeDegrees) { +TEST_P(SchemaUtilTest, InvalidInfiniteLoopThreeDegrees) { SchemaProto schema = SchemaBuilder() // Create a schema for the outer schema @@ -2430,11 +3807,445 @@ TEST(SchemaUtilTest, InvalidInfiniteLoopThreeDegrees) { .Build(); // Three degrees of referencing: A -> B -> C -> A - EXPECT_THAT(SchemaUtil::Validate(schema), + EXPECT_THAT(SchemaUtil::Validate(schema, GetParam()), StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT, - HasSubstr("Infinite loop"))); + HasSubstr("Invalid cycle"))); +} + +TEST_P(SchemaUtilTest, ChildMissingOptionalAndRepeatedPropertiesNotOk) { + SchemaTypeConfigProto type_a = + SchemaTypeConfigBuilder() + .SetType("A") + .AddProperty( + PropertyConfigBuilder() + .SetName("text") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)) + .Build(); + SchemaTypeConfigProto type_b = + SchemaTypeConfigBuilder().SetType("B").AddParentType("A").Build(); + + SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build(); + EXPECT_THAT( + SchemaUtil::Validate(schema, GetParam()), + StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT, + HasSubstr("Property text is not present in child type"))); } +TEST_P(SchemaUtilTest, ChildMissingRequiredPropertyNotOk) { + SchemaTypeConfigProto type_a = + SchemaTypeConfigBuilder() + .SetType("A") + .AddProperty( + PropertyConfigBuilder() + .SetName("text") + .SetCardinality(CARDINALITY_REQUIRED) + .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)) + .Build(); + SchemaTypeConfigProto type_b = + SchemaTypeConfigBuilder().SetType("B").AddParentType("A").Build(); + + SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build(); + EXPECT_THAT( + SchemaUtil::Validate(schema, GetParam()), + StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT, + HasSubstr("Property text is not present in child type"))); +} + +TEST_P(SchemaUtilTest, ChildCompatiblePropertyOk) { + SchemaTypeConfigProto message_type = + SchemaTypeConfigBuilder() + .SetType("Message") + .AddProperty( + PropertyConfigBuilder() + .SetName("text") + .SetCardinality(CARDINALITY_REPEATED) + .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)) + .AddProperty(PropertyConfigBuilder() + .SetName("person") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument( + "Person", /*index_nested_properties=*/true)) + .Build(); + SchemaTypeConfigProto artist_message_type = + SchemaTypeConfigBuilder() + .SetType("ArtistMessage") + .AddParentType("Message") + .AddProperty( + PropertyConfigBuilder() + .SetName("text") + // OPTIONAL is compatible with REPEATED. + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)) + .AddProperty( + // An extra text is compatible. + PropertyConfigBuilder() + .SetName("extraText") + .SetCardinality(CARDINALITY_REPEATED) + .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)) + .AddProperty( + // An extra double is compatible + PropertyConfigBuilder() + .SetName("extraDouble") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataType(TYPE_DOUBLE)) + .AddProperty(PropertyConfigBuilder() + .SetName("person") + // REQUIRED is compatible with OPTIONAL. + .SetCardinality(CARDINALITY_REQUIRED) + // Artist is compatible with Person. + .SetDataTypeDocument( + "Artist", /*index_nested_properties=*/true)) + .Build(); + + SchemaTypeConfigProto person_type = + SchemaTypeConfigBuilder().SetType("Person").Build(); + SchemaTypeConfigProto artist_type = SchemaTypeConfigBuilder() + .SetType("Artist") + .AddParentType("Person") + .Build(); + + SchemaProto schema = SchemaBuilder() + .AddType(message_type) + .AddType(artist_message_type) + .AddType(person_type) + .AddType(artist_type) + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map, + SchemaUtil::Validate(schema, GetParam())); + EXPECT_THAT(d_map, SizeIs(3)); + EXPECT_THAT(d_map["Message"], + UnorderedElementsAre(Pair("ArtistMessage", IsEmpty()))); + EXPECT_THAT(d_map["Person"], + UnorderedElementsAre( + Pair("Message", UnorderedElementsAre(Pointee(EqualsProto( + message_type.properties(1))))), + Pair("Artist", IsEmpty()))); + EXPECT_THAT(d_map["Artist"], + UnorderedElementsAre(Pair( + "ArtistMessage", UnorderedElementsAre(Pointee(EqualsProto( + artist_message_type.properties(3))))))); +} + +TEST_P(SchemaUtilTest, ChildIncompatibleCardinalityPropertyNotOk) { + SchemaTypeConfigProto message_type = + SchemaTypeConfigBuilder() + .SetType("Message") + .AddProperty( + PropertyConfigBuilder() + .SetName("text") + .SetCardinality(CARDINALITY_REPEATED) + .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)) + .AddProperty(PropertyConfigBuilder() + .SetName("person") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument( + "Person", /*index_nested_properties=*/true)) + .Build(); + SchemaTypeConfigProto artist_message_type = + SchemaTypeConfigBuilder() + .SetType("ArtistMessage") + .AddParentType("Message") + .AddProperty( + PropertyConfigBuilder() + .SetName("text") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)) + .AddProperty( + PropertyConfigBuilder() + .SetName("extraText") + .SetCardinality(CARDINALITY_REPEATED) + .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)) + .AddProperty(PropertyConfigBuilder() + .SetName("person") + // Overwrite OPTIONAL to REPEATED is not ok. + .SetCardinality(CARDINALITY_REPEATED) + .SetDataTypeDocument( + "Artist", /*index_nested_properties=*/true)) + .Build(); + + SchemaTypeConfigProto person_type = + SchemaTypeConfigBuilder().SetType("Person").Build(); + SchemaTypeConfigProto artist_type = SchemaTypeConfigBuilder() + .SetType("Artist") + .AddParentType("Person") + .Build(); + + SchemaProto schema = SchemaBuilder() + .AddType(message_type) + .AddType(artist_message_type) + .AddType(person_type) + .AddType(artist_type) + .Build(); + EXPECT_THAT( + SchemaUtil::Validate(schema, GetParam()), + StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT, + HasSubstr("Property person from child type ArtistMessage is not " + "compatible to the parent type Message."))); +} + +TEST_P(SchemaUtilTest, ChildIncompatibleDataTypePropertyNotOk) { + SchemaTypeConfigProto message_type = + SchemaTypeConfigBuilder() + .SetType("Message") + .AddProperty( + PropertyConfigBuilder() + .SetName("text") + .SetCardinality(CARDINALITY_REPEATED) + .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)) + .AddProperty(PropertyConfigBuilder() + .SetName("person") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument( + "Person", /*index_nested_properties=*/true)) + .Build(); + SchemaTypeConfigProto artist_message_type = + SchemaTypeConfigBuilder() + .SetType("ArtistMessage") + .AddParentType("Message") + .AddProperty(PropertyConfigBuilder() + .SetName("text") + .SetCardinality(CARDINALITY_OPTIONAL) + // Double is not compatible to string. + .SetDataType(TYPE_DOUBLE)) + .AddProperty( + PropertyConfigBuilder() + .SetName("extraText") + .SetCardinality(CARDINALITY_REPEATED) + .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)) + .AddProperty(PropertyConfigBuilder() + .SetName("person") + .SetCardinality(CARDINALITY_REQUIRED) + .SetDataTypeDocument( + "Artist", /*index_nested_properties=*/true)) + .Build(); + + SchemaTypeConfigProto person_type = + SchemaTypeConfigBuilder().SetType("Person").Build(); + SchemaTypeConfigProto artist_type = SchemaTypeConfigBuilder() + .SetType("Artist") + .AddParentType("Person") + .Build(); + + SchemaProto schema = SchemaBuilder() + .AddType(message_type) + .AddType(artist_message_type) + .AddType(person_type) + .AddType(artist_type) + .Build(); + EXPECT_THAT( + SchemaUtil::Validate(schema, GetParam()), + StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT, + HasSubstr("Property text from child type ArtistMessage is not " + "compatible to the parent type Message."))); +} + +TEST_P(SchemaUtilTest, ChildIncompatibleDocumentTypePropertyNotOk) { + SchemaTypeConfigProto message_type = + SchemaTypeConfigBuilder() + .SetType("Message") + .AddProperty( + PropertyConfigBuilder() + .SetName("text") + .SetCardinality(CARDINALITY_REPEATED) + .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)) + .AddProperty(PropertyConfigBuilder() + .SetName("person") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument( + "Person", /*index_nested_properties=*/true)) + .Build(); + SchemaTypeConfigProto artist_message_type = + SchemaTypeConfigBuilder() + .SetType("ArtistMessage") + .AddParentType("Message") + .AddProperty( + PropertyConfigBuilder() + .SetName("text") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)) + .AddProperty( + PropertyConfigBuilder() + .SetName("extraText") + .SetCardinality(CARDINALITY_REPEATED) + .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)) + .AddProperty( + PropertyConfigBuilder() + .SetName("person") + .SetCardinality(CARDINALITY_REQUIRED) + // Artist is not a subtype of Person, thus incompatible + .SetDataTypeDocument("Artist", + /*index_nested_properties=*/true)) + .Build(); + + SchemaTypeConfigProto person_type = + SchemaTypeConfigBuilder().SetType("Person").Build(); + // In this test, Artist is not a subtype of Person. + SchemaTypeConfigProto artist_type = + SchemaTypeConfigBuilder().SetType("Artist").Build(); + + SchemaProto schema = SchemaBuilder() + .AddType(message_type) + .AddType(artist_message_type) + .AddType(person_type) + .AddType(artist_type) + .Build(); + EXPECT_THAT( + SchemaUtil::Validate(schema, GetParam()), + StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT, + HasSubstr("Property person from child type ArtistMessage is not " + "compatible to the parent type Message."))); +} + +TEST_P(SchemaUtilTest, ChildCompatibleMultipleParentPropertyOk) { + SchemaTypeConfigProto email_type = + SchemaTypeConfigBuilder() + .SetType("Email") + .AddProperty( + PropertyConfigBuilder() + .SetName("sender") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty( + PropertyConfigBuilder() + .SetName("recipient") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .Build(); + SchemaTypeConfigProto message_type = + SchemaTypeConfigBuilder() + .SetType("Message") + .AddProperty( + PropertyConfigBuilder() + .SetName("content") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .Build(); + SchemaTypeConfigProto email_message_type = + SchemaTypeConfigBuilder() + .SetType("EmailMessage") + .AddParentType("Email") + .AddParentType("Message") + .AddProperty( + PropertyConfigBuilder() + .SetName("sender") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty( + PropertyConfigBuilder() + .SetName("recipient") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty( + PropertyConfigBuilder() + .SetName("content") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .Build(); + + SchemaProto schema = SchemaBuilder() + .AddType(email_type) + .AddType(message_type) + .AddType(email_message_type) + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependentMap d_map, + SchemaUtil::Validate(schema, GetParam())); + EXPECT_THAT(d_map, SizeIs(2)); + EXPECT_THAT(d_map["Email"], + UnorderedElementsAre(Pair("EmailMessage", IsEmpty()))); + EXPECT_THAT(d_map["Message"], + UnorderedElementsAre(Pair("EmailMessage", IsEmpty()))); +} + +TEST_P(SchemaUtilTest, ChildIncompatibleMultipleParentPropertyNotOk) { + SchemaTypeConfigProto email_type = + SchemaTypeConfigBuilder() + .SetType("Email") + .AddProperty( + PropertyConfigBuilder() + .SetName("sender") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty( + PropertyConfigBuilder() + .SetName("recipient") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .Build(); + SchemaTypeConfigProto message_type = + SchemaTypeConfigBuilder() + .SetType("Message") + .AddProperty( + PropertyConfigBuilder() + .SetName("content") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .Build(); + + // Missing the "sender" field from parent "Email", thus incompatible. + SchemaTypeConfigProto email_message_type1 = + SchemaTypeConfigBuilder() + .SetType("EmailMessage") + .AddParentType("Email") + .AddParentType("Message") + .AddProperty( + PropertyConfigBuilder() + .SetName("recipient") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty( + PropertyConfigBuilder() + .SetName("content") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .Build(); + SchemaProto schema1 = SchemaBuilder() + .AddType(email_type) + .AddType(message_type) + .AddType(email_message_type1) + .Build(); + EXPECT_THAT( + SchemaUtil::Validate(schema1, GetParam()), + StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT, + HasSubstr( + "Property sender is not present in child type EmailMessage, " + "but it is defined in the parent type Email."))); + + // Missing the "content" field from parent "Message", thus incompatible. + SchemaTypeConfigProto email_message_type2 = + SchemaTypeConfigBuilder() + .SetType("EmailMessage") + .AddParentType("Email") + .AddParentType("Message") + .AddProperty( + PropertyConfigBuilder() + .SetName("sender") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty( + PropertyConfigBuilder() + .SetName("recipient") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .Build(); + SchemaProto schema2 = SchemaBuilder() + .AddType(email_type) + .AddType(message_type) + .AddType(email_message_type2) + .Build(); + EXPECT_THAT( + SchemaUtil::Validate(schema2, GetParam()), + StatusIs( + libtextclassifier3::StatusCode::INVALID_ARGUMENT, + HasSubstr( + "Property content is not present in child type EmailMessage, " + "but it is defined in the parent type Message."))); +} + +INSTANTIATE_TEST_SUITE_P( + SchemaUtilTest, SchemaUtilTest, + testing::Values(/*allow_circular_schema_definitions=*/true, false)); + } // namespace } // namespace lib diff --git a/icing/schema/section.h b/icing/schema/section.h index 65149b9..3685a29 100644 --- a/icing/schema/section.h +++ b/icing/schema/section.h @@ -33,6 +33,8 @@ inline constexpr int kSectionIdBits = 6; inline constexpr SectionId kTotalNumSections = (1 << kSectionIdBits); inline constexpr SectionId kInvalidSectionId = kTotalNumSections; inline constexpr SectionId kMaxSectionId = kTotalNumSections - 1; +// Prior versions of Icing only supported 16 indexed properties. +inline constexpr SectionId kOldTotalNumSections = 16; inline constexpr SectionId kMinSectionId = 0; constexpr bool IsSectionIdValid(SectionId section_id) { return section_id >= kMinSectionId && section_id <= kMaxSectionId; diff --git a/icing/scoring/advanced_scoring/advanced-scorer_test.cc b/icing/scoring/advanced_scoring/advanced-scorer_test.cc index c962bc5..65d4cff 100644 --- a/icing/scoring/advanced_scoring/advanced-scorer_test.cc +++ b/icing/scoring/advanced_scoring/advanced-scorer_test.cc @@ -109,7 +109,9 @@ class AdvancedScorerTest : public testing::Test { .SetCardinality(CARDINALITY_OPTIONAL))) .Build(); - ICING_ASSERT_OK(schema_store_->SetSchema(test_email_schema)); + ICING_ASSERT_OK(schema_store_->SetSchema( + test_email_schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); } void TearDown() override { diff --git a/icing/scoring/score-and-rank_benchmark.cc b/icing/scoring/score-and-rank_benchmark.cc index 076e36a..ddc21a2 100644 --- a/icing/scoring/score-and-rank_benchmark.cc +++ b/icing/scoring/score-and-rank_benchmark.cc @@ -108,7 +108,8 @@ void BM_ScoreAndRankDocumentHitsByDocumentScore(benchmark::State& state) { // Creates file directories Filesystem filesystem; filesystem.DeleteDirectoryRecursively(base_dir.c_str()); - ASSERT_TRUE(filesystem.CreateDirectoryRecursively(document_store_dir.c_str())); + ASSERT_TRUE( + filesystem.CreateDirectoryRecursively(document_store_dir.c_str())); ASSERT_TRUE(filesystem.CreateDirectoryRecursively(schema_store_dir.c_str())); Clock clock; @@ -123,7 +124,9 @@ void BM_ScoreAndRankDocumentHitsByDocumentScore(benchmark::State& state) { std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); - ICING_ASSERT_OK(schema_store->SetSchema(CreateSchemaWithEmailType())); + ICING_ASSERT_OK(schema_store->SetSchema( + CreateSchemaWithEmailType(), /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); ScoringSpecProto scoring_spec; scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE); @@ -209,7 +212,8 @@ void BM_ScoreAndRankDocumentHitsByCreationTime(benchmark::State& state) { // Creates file directories Filesystem filesystem; filesystem.DeleteDirectoryRecursively(base_dir.c_str()); - ASSERT_TRUE(filesystem.CreateDirectoryRecursively(document_store_dir.c_str())); + ASSERT_TRUE( + filesystem.CreateDirectoryRecursively(document_store_dir.c_str())); ASSERT_TRUE(filesystem.CreateDirectoryRecursively(schema_store_dir.c_str())); Clock clock; @@ -224,7 +228,9 @@ void BM_ScoreAndRankDocumentHitsByCreationTime(benchmark::State& state) { std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); - ICING_ASSERT_OK(schema_store->SetSchema(CreateSchemaWithEmailType())); + ICING_ASSERT_OK(schema_store->SetSchema( + CreateSchemaWithEmailType(), /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); ScoringSpecProto scoring_spec; scoring_spec.set_rank_by( @@ -313,7 +319,8 @@ void BM_ScoreAndRankDocumentHitsNoScoring(benchmark::State& state) { // Creates file directories Filesystem filesystem; filesystem.DeleteDirectoryRecursively(base_dir.c_str()); - ASSERT_TRUE(filesystem.CreateDirectoryRecursively(document_store_dir.c_str())); + ASSERT_TRUE( + filesystem.CreateDirectoryRecursively(document_store_dir.c_str())); ASSERT_TRUE(filesystem.CreateDirectoryRecursively(schema_store_dir.c_str())); Clock clock; @@ -328,7 +335,9 @@ void BM_ScoreAndRankDocumentHitsNoScoring(benchmark::State& state) { std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); - ICING_ASSERT_OK(schema_store->SetSchema(CreateSchemaWithEmailType())); + ICING_ASSERT_OK(schema_store->SetSchema( + CreateSchemaWithEmailType(), /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); ScoringSpecProto scoring_spec; scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::NONE); @@ -411,7 +420,8 @@ void BM_ScoreAndRankDocumentHitsByRelevanceScoring(benchmark::State& state) { // Creates file directories Filesystem filesystem; filesystem.DeleteDirectoryRecursively(base_dir.c_str()); - ASSERT_TRUE(filesystem.CreateDirectoryRecursively(document_store_dir.c_str())); + ASSERT_TRUE( + filesystem.CreateDirectoryRecursively(document_store_dir.c_str())); ASSERT_TRUE(filesystem.CreateDirectoryRecursively(schema_store_dir.c_str())); Clock clock; @@ -426,7 +436,9 @@ void BM_ScoreAndRankDocumentHitsByRelevanceScoring(benchmark::State& state) { std::unique_ptr<DocumentStore> document_store = std::move(create_result.document_store); - ICING_ASSERT_OK(schema_store->SetSchema(CreateSchemaWithEmailType())); + ICING_ASSERT_OK(schema_store->SetSchema( + CreateSchemaWithEmailType(), /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); ScoringSpecProto scoring_spec; scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE); diff --git a/icing/scoring/scorer_test.cc b/icing/scoring/scorer_test.cc index 2649c95..1c7d2ab 100644 --- a/icing/scoring/scorer_test.cc +++ b/icing/scoring/scorer_test.cc @@ -83,7 +83,9 @@ class ScorerTest : public ::testing::TestWithParam<ScorerTestingMode> { .SetCardinality(CARDINALITY_REQUIRED))) .Build(); - ICING_ASSERT_OK(schema_store_->SetSchema(test_email_schema)); + ICING_ASSERT_OK(schema_store_->SetSchema( + test_email_schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); } void TearDown() override { diff --git a/icing/scoring/scoring-processor_test.cc b/icing/scoring/scoring-processor_test.cc index 5c42236..10f3eb5 100644 --- a/icing/scoring/scoring-processor_test.cc +++ b/icing/scoring/scoring-processor_test.cc @@ -93,7 +93,9 @@ class ScoringProcessorTest .SetDataType(TYPE_STRING) .SetCardinality(CARDINALITY_OPTIONAL))) .Build(); - ICING_ASSERT_OK(schema_store_->SetSchema(test_email_schema)); + ICING_ASSERT_OK(schema_store_->SetSchema( + test_email_schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); } void TearDown() override { diff --git a/icing/scoring/section-weights_test.cc b/icing/scoring/section-weights_test.cc index 02205f5..28b1797 100644 --- a/icing/scoring/section-weights_test.cc +++ b/icing/scoring/section-weights_test.cc @@ -87,7 +87,9 @@ class SectionWeightsTest : public testing::Test { SchemaProto schema = SchemaBuilder().AddType(sender_schema).AddType(email_schema).Build(); - ICING_ASSERT_OK(schema_store_->SetSchema(schema)); + ICING_ASSERT_OK(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); } void TearDown() override { diff --git a/icing/store/document-store.cc b/icing/store/document-store.cc index ae8bfc0..b49d0de 100644 --- a/icing/store/document-store.cc +++ b/icing/store/document-store.cc @@ -285,6 +285,45 @@ libtextclassifier3::StatusOr<DocumentStore::CreateResult> DocumentStore::Create( return create_result; } +/* static */ libtextclassifier3::Status DocumentStore::DiscardDerivedFiles( + const Filesystem* filesystem, const std::string& base_dir) { + // Header + const std::string header_filename = MakeHeaderFilename(base_dir); + if (!filesystem->DeleteFile(MakeHeaderFilename(base_dir).c_str())) { + return absl_ports::InternalError("Couldn't delete header file"); + } + + // Document key mapper + ICING_RETURN_IF_ERROR( + DynamicTrieKeyMapper<DocumentId>::Delete(*filesystem, base_dir)); + + // Document id mapper + ICING_RETURN_IF_ERROR(FileBackedVector<int64_t>::Delete( + *filesystem, MakeDocumentIdMapperFilename(base_dir))); + + // Document associated score cache + ICING_RETURN_IF_ERROR(FileBackedVector<DocumentAssociatedScoreData>::Delete( + *filesystem, MakeScoreCacheFilename(base_dir))); + + // Filter cache + ICING_RETURN_IF_ERROR(FileBackedVector<DocumentFilterData>::Delete( + *filesystem, MakeFilterCacheFilename(base_dir))); + + // Namespace mapper + ICING_RETURN_IF_ERROR(DynamicTrieKeyMapper<NamespaceId>::Delete( + *filesystem, MakeNamespaceMapperFilename(base_dir))); + + // Corpus mapper + ICING_RETURN_IF_ERROR(DynamicTrieKeyMapper<CorpusId>::Delete( + *filesystem, MakeCorpusMapperFilename(base_dir))); + + // Corpus associated score cache + ICING_RETURN_IF_ERROR(FileBackedVector<CorpusAssociatedScoreData>::Delete( + *filesystem, MakeCorpusScoreCache(base_dir))); + + return libtextclassifier3::Status::OK; +} + libtextclassifier3::StatusOr<DataLoss> DocumentStore::Initialize( bool force_recovery_and_revalidate_documents, InitializeStatsProto* initialize_stats) { diff --git a/icing/store/document-store.h b/icing/store/document-store.h index 88050ce..3bb04f4 100644 --- a/icing/store/document-store.h +++ b/icing/store/document-store.h @@ -146,6 +146,14 @@ class DocumentStore { int32_t compression_level, InitializeStatsProto* initialize_stats); + // Discards all derived data in the document store. + // + // Returns: + // OK on success or nothing to discard + // INTERNAL_ERROR on any I/O errors + static libtextclassifier3::Status DiscardDerivedFiles( + const Filesystem* filesystem, const std::string& base_dir); + // Returns the maximum DocumentId that the DocumentStore has assigned. If // there has not been any DocumentIds assigned, i.e. the DocumentStore is // empty, then kInvalidDocumentId is returned. This does not filter out diff --git a/icing/store/document-store_benchmark.cc b/icing/store/document-store_benchmark.cc index 99e17c7..61906a9 100644 --- a/icing/store/document-store_benchmark.cc +++ b/icing/store/document-store_benchmark.cc @@ -116,7 +116,9 @@ std::unique_ptr<SchemaStore> CreateSchemaStore(Filesystem filesystem, std::unique_ptr<SchemaStore> schema_store = SchemaStore::Create(&filesystem, schema_store_dir, clock).ValueOrDie(); - auto set_schema_status = schema_store->SetSchema(CreateSchema()); + auto set_schema_status = schema_store->SetSchema( + CreateSchema(), /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false); if (!set_schema_status.ok()) { ICING_LOG(ERROR) << set_schema_status.status().error_message(); } diff --git a/icing/store/document-store_test.cc b/icing/store/document-store_test.cc index 896d852..146191f 100644 --- a/icing/store/document-store_test.cc +++ b/icing/store/document-store_test.cc @@ -189,7 +189,10 @@ class DocumentStoreTest : public ::testing::Test { ICING_ASSERT_OK_AND_ASSIGN( schema_store_, SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); language_segmenter_factory::SegmenterOptions segmenter_options(ULOC_US); ICING_ASSERT_OK_AND_ASSIGN( @@ -198,6 +201,8 @@ class DocumentStoreTest : public ::testing::Test { } void TearDown() override { + lang_segmenter_.reset(); + schema_store_.reset(); filesystem_.DeleteDirectoryRecursively(test_dir_.c_str()); } @@ -717,7 +722,9 @@ TEST_F(DocumentStoreTest, DeleteBySchemaTypeOk) { std::unique_ptr<SchemaStore> schema_store, SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_)); - ICING_ASSERT_OK(schema_store->SetSchema(schema)); + ICING_ASSERT_OK(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, @@ -844,7 +851,9 @@ TEST_F(DocumentStoreTest, DeleteBySchemaTypeRecoversOk) { std::unique_ptr<SchemaStore> schema_store, SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_)); - ICING_ASSERT_OK(schema_store->SetSchema(schema)); + ICING_ASSERT_OK(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); DocumentId email_document_id; DocumentId message_document_id; @@ -935,7 +944,9 @@ TEST_F(DocumentStoreTest, DeletedSchemaTypeFromSchemaStoreRecoversOk) { std::unique_ptr<SchemaStore> schema_store, SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_)); - ICING_ASSERT_OK(schema_store->SetSchema(schema)); + ICING_ASSERT_OK(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); DocumentId email_document_id; DocumentId message_document_id; @@ -989,7 +1000,8 @@ TEST_F(DocumentStoreTest, DeletedSchemaTypeFromSchemaStoreRecoversOk) { .AddType(SchemaTypeConfigBuilder().SetType("message")) .Build(); ICING_EXPECT_OK(schema_store->SetSchema( - new_schema, /*ignore_errors_and_delete_documents=*/true)); + new_schema, /*ignore_errors_and_delete_documents=*/true, + /*allow_circular_schema_definitions=*/false)); // Successfully recover from a corrupt derived file issue. ICING_ASSERT_OK_AND_ASSIGN( @@ -1264,11 +1276,16 @@ TEST_F(DocumentStoreTest, ShouldRecoverFromCorruptDerivedFile) { StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); EXPECT_THAT(doc_store->Get(document_id2), IsOkAndHolds(EqualsProto(test_document2_))); + + EXPECT_THAT(doc_store->ReportUsage(CreateUsageReport( + /*name_space=*/"icing", /*uri=*/"email/2", + /*timestamp_ms=*/0, UsageReport::USAGE_TYPE1)), + IsOk()); } - // "Corrupt" one of the derived files by adding non-checksummed data to - // it. This will mess up the checksum and throw an error on the derived file's - // initialization. + // "Corrupt" one of the derived files by modifying an existing data without + // calling PersistToDisk() or updating its checksum. This will mess up the + // checksum and throw an error on the derived file's initialization. const std::string document_id_mapper_file = absl_ports::StrCat(document_store_dir_, "/document_id_mapper"); ICING_ASSERT_OK_AND_ASSIGN( @@ -1276,13 +1293,14 @@ TEST_F(DocumentStoreTest, ShouldRecoverFromCorruptDerivedFile) { FileBackedVector<int64_t>::Create( filesystem_, document_id_mapper_file, MemoryMappedFile::READ_WRITE_AUTO_SYNC)); - int64_t corrupt_document_id = 3; - int64_t corrupt_offset = 3; + int64_t corrupt_document_id = 1; + int64_t corrupt_offset = 123456; EXPECT_THAT(document_id_mapper->Set(corrupt_document_id, corrupt_offset), IsOk()); + // Will get error when initializing document id mapper file, so it will + // trigger RegenerateDerivedFiles. // Successfully recover from a corrupt derived file issue. - // NOTE: this doesn't trigger RegenerateDerivedFiles. ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, @@ -1304,8 +1322,100 @@ TEST_F(DocumentStoreTest, ShouldRecoverFromCorruptDerivedFile) { /*namespace_id=*/0, /*schema_type_id=*/0, document2_expiration_timestamp_))); - // Checks derived score cache - note that they aren't regenerated from + // Checks derived score cache + EXPECT_THAT( + doc_store->GetDocumentAssociatedScoreData(document_id2), + IsOkAndHolds(DocumentAssociatedScoreData( + /*corpus_id=*/0, document2_score_, document2_creation_timestamp_, + /*length_in_tokens=*/4))); + EXPECT_THAT(doc_store->GetCorpusAssociatedScoreData(/*corpus_id=*/0), + IsOkAndHolds(CorpusAssociatedScoreData( + /*num_docs=*/1, /*sum_length_in_tokens=*/4))); + + // Checks usage score data - note that they aren't regenerated from // scratch. + UsageStore::UsageScores expected_scores; + expected_scores.usage_type1_count = 1; + ICING_ASSERT_HAS_VALUE_AND_ASSIGN(UsageStore::UsageScores actual_scores, + doc_store->GetUsageScores(document_id2)); + EXPECT_THAT(actual_scores, Eq(expected_scores)); +} + +TEST_F(DocumentStoreTest, ShouldRecoverFromDiscardDerivedFiles) { + DocumentId document_id1, document_id2; + { + // Can put and delete fine. + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::CreateResult create_result, + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); + std::unique_ptr<DocumentStore> doc_store = + std::move(create_result.document_store); + + ICING_ASSERT_OK_AND_ASSIGN( + document_id1, + doc_store->Put(DocumentProto(test_document1_), /*num_tokens=*/4)); + ICING_ASSERT_OK_AND_ASSIGN( + document_id2, + doc_store->Put(DocumentProto(test_document2_), /*num_tokens=*/4)); + EXPECT_THAT(doc_store->Get(document_id1), + IsOkAndHolds(EqualsProto(test_document1_))); + EXPECT_THAT(doc_store->Get(document_id2), + IsOkAndHolds(EqualsProto(test_document2_))); + // Checks derived score cache + EXPECT_THAT( + doc_store->GetDocumentAssociatedScoreData(document_id1), + IsOkAndHolds(DocumentAssociatedScoreData( + /*corpus_id=*/0, document1_score_, document1_creation_timestamp_, + /*length_in_tokens=*/4))); + EXPECT_THAT( + doc_store->GetDocumentAssociatedScoreData(document_id2), + IsOkAndHolds(DocumentAssociatedScoreData( + /*corpus_id=*/0, document2_score_, document2_creation_timestamp_, + /*length_in_tokens=*/4))); + EXPECT_THAT(doc_store->GetCorpusAssociatedScoreData(/*corpus_id=*/0), + IsOkAndHolds(CorpusAssociatedScoreData( + /*num_docs=*/2, /*sum_length_in_tokens=*/8))); + // Delete document 1 + EXPECT_THAT(doc_store->Delete("icing", "email/1"), IsOk()); + EXPECT_THAT(doc_store->Get(document_id1), + StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); + EXPECT_THAT(doc_store->Get(document_id2), + IsOkAndHolds(EqualsProto(test_document2_))); + + EXPECT_THAT(doc_store->ReportUsage(CreateUsageReport( + /*name_space=*/"icing", /*uri=*/"email/2", + /*timestamp_ms=*/0, UsageReport::USAGE_TYPE1)), + IsOk()); + } + + // Discard all derived files. + ICING_ASSERT_OK( + DocumentStore::DiscardDerivedFiles(&filesystem_, document_store_dir_)); + + // Successfully recover after discarding all derived files. + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::CreateResult create_result, + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); + std::unique_ptr<DocumentStore> doc_store = + std::move(create_result.document_store); + + EXPECT_THAT(doc_store->Get(document_id1), + StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); + EXPECT_THAT(doc_store->Get(document_id2), + IsOkAndHolds(EqualsProto(test_document2_))); + + // Checks derived filter cache + ICING_ASSERT_HAS_VALUE_AND_ASSIGN( + DocumentFilterData doc_filter_data, + doc_store->GetAliveDocumentFilterData(document_id2)); + EXPECT_THAT(doc_filter_data, + Eq(DocumentFilterData( + /*namespace_id=*/0, + /*schema_type_id=*/0, document2_expiration_timestamp_))); + + // Checks derived score cache. EXPECT_THAT( doc_store->GetDocumentAssociatedScoreData(document_id2), IsOkAndHolds(DocumentAssociatedScoreData( @@ -1313,7 +1423,15 @@ TEST_F(DocumentStoreTest, ShouldRecoverFromCorruptDerivedFile) { /*length_in_tokens=*/4))); EXPECT_THAT(doc_store->GetCorpusAssociatedScoreData(/*corpus_id=*/0), IsOkAndHolds(CorpusAssociatedScoreData( - /*num_docs=*/2, /*sum_length_in_tokens=*/8))); + /*num_docs=*/1, /*sum_length_in_tokens=*/4))); + + // Checks usage score data - note that they aren't regenerated from + // scratch. + UsageStore::UsageScores expected_scores; + expected_scores.usage_type1_count = 1; + ICING_ASSERT_HAS_VALUE_AND_ASSIGN(UsageStore::UsageScores actual_scores, + doc_store->GetUsageScores(document_id2)); + EXPECT_THAT(actual_scores, Eq(expected_scores)); } TEST_F(DocumentStoreTest, ShouldRecoverFromBadChecksum) { @@ -2177,7 +2295,9 @@ TEST_F(DocumentStoreTest, RegenerateDerivedFilesSkipsUnknownSchemaTypeIds) { .AddType(SchemaTypeConfigBuilder().SetType("email")) .AddType(SchemaTypeConfigBuilder().SetType("message")) .Build(); - ICING_EXPECT_OK(schema_store->SetSchema(schema)); + ICING_EXPECT_OK(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId email_schema_type_id, schema_store->GetSchemaTypeId("email")); @@ -2232,7 +2352,9 @@ TEST_F(DocumentStoreTest, RegenerateDerivedFilesSkipsUnknownSchemaTypeIds) { SchemaProto schema = SchemaBuilder() .AddType(SchemaTypeConfigBuilder().SetType("email")) .Build(); - ICING_EXPECT_OK(schema_store->SetSchema(schema)); + ICING_EXPECT_OK(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId email_schema_type_id, schema_store->GetSchemaTypeId("email")); @@ -2286,7 +2408,9 @@ TEST_F(DocumentStoreTest, UpdateSchemaStoreUpdatesSchemaTypeIds) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<SchemaStore> schema_store, SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_)); - ICING_EXPECT_OK(schema_store->SetSchema(schema)); + ICING_EXPECT_OK(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId old_email_schema_type_id, schema_store->GetSchemaTypeId("email")); @@ -2334,7 +2458,9 @@ TEST_F(DocumentStoreTest, UpdateSchemaStoreUpdatesSchemaTypeIds) { .AddType(SchemaTypeConfigBuilder().SetType("email")) .Build(); - ICING_EXPECT_OK(schema_store->SetSchema(schema)); + ICING_EXPECT_OK(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId new_email_schema_type_id, schema_store->GetSchemaTypeId("email")); @@ -2377,7 +2503,9 @@ TEST_F(DocumentStoreTest, UpdateSchemaStoreDeletesInvalidDocuments) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<SchemaStore> schema_store, SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_)); - ICING_EXPECT_OK(schema_store->SetSchema(schema)); + ICING_EXPECT_OK(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); // Add two documents, with and without a subject DocumentProto email_without_subject = DocumentBuilder() @@ -2419,7 +2547,8 @@ TEST_F(DocumentStoreTest, UpdateSchemaStoreDeletesInvalidDocuments) { PropertyConfigProto::Cardinality::REQUIRED); ICING_EXPECT_OK(schema_store->SetSchema( - schema, /*ignore_errors_and_delete_documents=*/true)); + schema, /*ignore_errors_and_delete_documents=*/true, + /*allow_circular_schema_definitions=*/false)); ICING_EXPECT_OK(document_store->UpdateSchemaStore(schema_store.get())); @@ -2448,7 +2577,9 @@ TEST_F(DocumentStoreTest, ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<SchemaStore> schema_store, SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_)); - ICING_EXPECT_OK(schema_store->SetSchema(schema)); + ICING_EXPECT_OK(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); // Add a "email" and "message" document DocumentProto email_document = DocumentBuilder() @@ -2490,7 +2621,8 @@ TEST_F(DocumentStoreTest, ICING_EXPECT_OK( schema_store->SetSchema(new_schema, - /*ignore_errors_and_delete_documents=*/true)); + /*ignore_errors_and_delete_documents=*/true, + /*allow_circular_schema_definitions=*/false)); ICING_EXPECT_OK(document_store->UpdateSchemaStore(schema_store.get())); @@ -2518,7 +2650,9 @@ TEST_F(DocumentStoreTest, OptimizedUpdateSchemaStoreUpdatesSchemaTypeIds) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<SchemaStore> schema_store, SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_)); - ICING_EXPECT_OK(schema_store->SetSchema(schema)); + ICING_EXPECT_OK(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId old_email_schema_type_id, schema_store->GetSchemaTypeId("email")); @@ -2566,8 +2700,11 @@ TEST_F(DocumentStoreTest, OptimizedUpdateSchemaStoreUpdatesSchemaTypeIds) { .AddType(SchemaTypeConfigBuilder().SetType("email")) .Build(); - ICING_ASSERT_OK_AND_ASSIGN(SchemaStore::SetSchemaResult set_schema_result, - schema_store->SetSchema(schema)); + ICING_ASSERT_OK_AND_ASSIGN( + SchemaStore::SetSchemaResult set_schema_result, + schema_store->SetSchema(schema, + /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId new_email_schema_type_id, schema_store->GetSchemaTypeId("email")); @@ -2611,7 +2748,9 @@ TEST_F(DocumentStoreTest, OptimizedUpdateSchemaStoreDeletesInvalidDocuments) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<SchemaStore> schema_store, SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_)); - ICING_EXPECT_OK(schema_store->SetSchema(schema)); + ICING_EXPECT_OK(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); // Add two documents, with and without a subject DocumentProto email_without_subject = DocumentBuilder() @@ -2655,7 +2794,8 @@ TEST_F(DocumentStoreTest, OptimizedUpdateSchemaStoreDeletesInvalidDocuments) { ICING_ASSERT_OK_AND_ASSIGN( SchemaStore::SetSchemaResult set_schema_result, schema_store->SetSchema(schema, - /*ignore_errors_and_delete_documents=*/true)); + /*ignore_errors_and_delete_documents=*/true, + /*allow_circular_schema_definitions=*/false)); ICING_EXPECT_OK(document_store->OptimizedUpdateSchemaStore( schema_store.get(), set_schema_result)); @@ -2685,7 +2825,9 @@ TEST_F(DocumentStoreTest, ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<SchemaStore> schema_store, SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_)); - ICING_EXPECT_OK(schema_store->SetSchema(schema)); + ICING_EXPECT_OK(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); // Add a "email" and "message" document DocumentProto email_document = DocumentBuilder() @@ -2728,7 +2870,8 @@ TEST_F(DocumentStoreTest, ICING_ASSERT_OK_AND_ASSIGN( SchemaStore::SetSchemaResult set_schema_result, schema_store->SetSchema(new_schema, - /*ignore_errors_and_delete_documents=*/true)); + /*ignore_errors_and_delete_documents=*/true, + /*allow_circular_schema_definitions=*/false)); ICING_EXPECT_OK(document_store->OptimizedUpdateSchemaStore( schema_store.get(), set_schema_result)); @@ -3475,7 +3618,10 @@ TEST_F(DocumentStoreTest, InitializeForceRecoveryUpdatesTypeIds) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<SchemaStore> schema_store, SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); - ASSERT_THAT(schema_store->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); // The typeid for "email" should be 0. ASSERT_THAT(schema_store->GetSchemaTypeId("email"), IsOkAndHolds(0)); @@ -3524,7 +3670,10 @@ TEST_F(DocumentStoreTest, InitializeForceRecoveryUpdatesTypeIds) { .SetCardinality(CARDINALITY_OPTIONAL))) .AddType(email_type_config) .Build(); - ASSERT_THAT(schema_store->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); // Adding a new type should cause ids to be reassigned. Ids are assigned in // order of appearance so 'alarm' should be 0 and 'email' should be 1. ASSERT_THAT(schema_store->GetSchemaTypeId("alarm"), IsOkAndHolds(0)); @@ -3578,7 +3727,10 @@ TEST_F(DocumentStoreTest, InitializeDontForceRecoveryDoesntUpdateTypeIds) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<SchemaStore> schema_store, SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); - ASSERT_THAT(schema_store->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); // The typeid for "email" should be 0. ASSERT_THAT(schema_store->GetSchemaTypeId("email"), IsOkAndHolds(0)); @@ -3627,7 +3779,10 @@ TEST_F(DocumentStoreTest, InitializeDontForceRecoveryDoesntUpdateTypeIds) { .SetCardinality(CARDINALITY_OPTIONAL))) .AddType(email_type_config) .Build(); - ASSERT_THAT(schema_store->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); // Adding a new type should cause ids to be reassigned. Ids are assigned in // order of appearance so 'alarm' should be 0 and 'email' should be 1. ASSERT_THAT(schema_store->GetSchemaTypeId("alarm"), IsOkAndHolds(0)); @@ -3673,7 +3828,10 @@ TEST_F(DocumentStoreTest, InitializeForceRecoveryDeletesInvalidDocument) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<SchemaStore> schema_store, SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); - ASSERT_THAT(schema_store->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); DocumentProto docWithBody = DocumentBuilder() @@ -3733,7 +3891,8 @@ TEST_F(DocumentStoreTest, InitializeForceRecoveryDeletesInvalidDocument) { .Build(); schema = SchemaBuilder().AddType(email_type_config).Build(); ASSERT_THAT(schema_store->SetSchema( - schema, /*ignore_errors_and_delete_documents=*/true), + schema, /*ignore_errors_and_delete_documents=*/true, + /*allow_circular_schema_definitions=*/false), IsOk()); { @@ -3782,7 +3941,10 @@ TEST_F(DocumentStoreTest, InitializeDontForceRecoveryKeepsInvalidDocument) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<SchemaStore> schema_store, SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); - ASSERT_THAT(schema_store->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); DocumentProto docWithBody = DocumentBuilder() @@ -3842,7 +4004,8 @@ TEST_F(DocumentStoreTest, InitializeDontForceRecoveryKeepsInvalidDocument) { .Build(); schema = SchemaBuilder().AddType(email_type_config).Build(); ASSERT_THAT(schema_store->SetSchema( - schema, /*ignore_errors_and_delete_documents=*/true), + schema, /*ignore_errors_and_delete_documents=*/true, + /*allow_circular_schema_definitions=*/false), IsOk()); { @@ -3889,7 +4052,10 @@ TEST_F(DocumentStoreTest, MigrateToPortableFileBackedProtoLog) { std::unique_ptr<SchemaStore> schema_store, SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_)); - ASSERT_THAT(schema_store->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); // Create dst directory that we'll initialize the DocumentStore over. std::string document_store_dir = document_store_dir_ + "_migrate"; @@ -4015,7 +4181,9 @@ TEST_F(DocumentStoreTest, GetDebugInfo) { std::unique_ptr<SchemaStore> schema_store, SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_)); - ICING_ASSERT_OK(schema_store->SetSchema(schema)); + ICING_ASSERT_OK(schema_store->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, diff --git a/icing/testing/numeric/normal-distribution-number-generator.h b/icing/testing/numeric/normal-distribution-number-generator.h new file mode 100644 index 0000000..73cdd1f --- /dev/null +++ b/icing/testing/numeric/normal-distribution-number-generator.h @@ -0,0 +1,42 @@ +// Copyright (C) 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ICING_TESTING_NUMERIC_NORMAL_DISTRIBUTION_NUMBER_GENERATOR_H_ +#define ICING_TESTING_NUMERIC_NORMAL_DISTRIBUTION_NUMBER_GENERATOR_H_ + +#include <cmath> +#include <random> + +#include "icing/testing/numeric/number-generator.h" + +namespace icing { +namespace lib { + +template <typename T> +class NormalDistributionNumberGenerator : public NumberGenerator<T> { + public: + explicit NormalDistributionNumberGenerator(int seed, double mean, + double stddev) + : NumberGenerator<T>(seed), distribution_(mean, stddev) {} + + T Generate() override { return std::round(distribution_(this->engine_)); } + + private: + std::normal_distribution<> distribution_; +}; + +} // namespace lib +} // namespace icing + +#endif // ICING_TESTING_NUMERIC_NORMAL_DISTRIBUTION_NUMBER_GENERATOR_H_ diff --git a/icing/testing/numeric/uniform-distribution-integer-generator.h b/icing/testing/numeric/uniform-distribution-integer-generator.h index 00d8459..569eebd 100644 --- a/icing/testing/numeric/uniform-distribution-integer-generator.h +++ b/icing/testing/numeric/uniform-distribution-integer-generator.h @@ -15,6 +15,8 @@ #ifndef ICING_TESTING_NUMERIC_UNIFORM_DISTRIBUTION_INTEGER_GENERATOR_H_ #define ICING_TESTING_NUMERIC_UNIFORM_DISTRIBUTION_INTEGER_GENERATOR_H_ +#include <random> + #include "icing/testing/numeric/number-generator.h" namespace icing { diff --git a/icing/tokenization/icu/icu-language-segmenter.cc b/icing/tokenization/icu/icu-language-segmenter.cc index 59bcc18..cac12f7 100644 --- a/icing/tokenization/icu/icu-language-segmenter.cc +++ b/icing/tokenization/icu/icu-language-segmenter.cc @@ -375,8 +375,7 @@ void IcuLanguageSegmenter::ReturnBreakIterator(UBreakIterator* itr) const { } libtextclassifier3::StatusOr<std::unique_ptr<LanguageSegmenter::Iterator>> -IcuLanguageSegmenter::Segment(const std::string_view text, - LanguageSegmenter::AccessType) const { +IcuLanguageSegmenter::Segment(const std::string_view text) const { return IcuLanguageSegmenterIterator::Create(this, ProduceBreakIterator(), text, locale_); } @@ -385,7 +384,7 @@ libtextclassifier3::StatusOr<std::vector<std::string_view>> IcuLanguageSegmenter::GetAllTerms(const std::string_view text) const { ICING_ASSIGN_OR_RETURN( std::unique_ptr<LanguageSegmenter::Iterator> iterator, - Segment(text, LanguageSegmenter::AccessType::kForwardIterator)); + Segment(text)); std::vector<std::string_view> terms; while (iterator->Advance()) { terms.push_back(iterator->GetTerm()); diff --git a/icing/tokenization/icu/icu-language-segmenter.h b/icing/tokenization/icu/icu-language-segmenter.h index 1ca70c5..44de5a2 100644 --- a/icing/tokenization/icu/icu-language-segmenter.h +++ b/icing/tokenization/icu/icu-language-segmenter.h @@ -64,7 +64,7 @@ class IcuLanguageSegmenter : public LanguageSegmenter { // An iterator of terms on success // INTERNAL_ERROR if any error occurs libtextclassifier3::StatusOr<std::unique_ptr<LanguageSegmenter::Iterator>> - Segment(std::string_view text, LanguageSegmenter::AccessType) const override; + Segment(std::string_view text) const override; // The segmentation depends on the language detected in the input text. // diff --git a/icing/tokenization/icu/icu-language-segmenter_test.cc b/icing/tokenization/icu/icu-language-segmenter_test.cc index d1bf5c6..3bacbc6 100644 --- a/icing/tokenization/icu/icu-language-segmenter_test.cc +++ b/icing/tokenization/icu/icu-language-segmenter_test.cc @@ -419,10 +419,8 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, ContinuousWhitespaces) { // iterator is done. text_with_spaces = absl_ports::StrCat(std::string(kNumSeparators, ' '), "Hello", " ", "World"); - ICING_ASSERT_OK_AND_ASSIGN( - auto itr, - language_segmenter->Segment( - text_with_spaces, LanguageSegmenter::AccessType::kForwardIterator)); + ICING_ASSERT_OK_AND_ASSIGN(auto itr, + language_segmenter->Segment(text_with_spaces)); std::vector<std::string_view> terms; while (itr->Advance()) { terms.push_back(itr->GetTerm()); @@ -518,10 +516,8 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, ResetToStartUtf32WordConnector) { auto segmenter, language_segmenter_factory::Create( GetSegmenterOptions(GetLocale(), jni_cache_.get()))); constexpr std::string_view kText = "com.google.android is package"; - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<LanguageSegmenter::Iterator> itr, - segmenter->Segment(kText, - LanguageSegmenter::AccessType::kForwardIterator)); + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, + segmenter->Segment(kText)); // String: "com.google.android is package" // ^ ^^ ^^ @@ -537,10 +533,8 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, NewIteratorResetToStartUtf32) { auto segmenter, language_segmenter_factory::Create( GetSegmenterOptions(GetLocale(), jni_cache_.get()))); constexpr std::string_view kText = "How are you你好吗お元気ですか"; - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<LanguageSegmenter::Iterator> itr, - segmenter->Segment(kText, - LanguageSegmenter::AccessType::kForwardIterator)); + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, + segmenter->Segment(kText)); // String: "How are you你好吗お元気ですか" // ^ ^^ ^^ ^ ^ ^ ^ ^ ^ @@ -556,10 +550,8 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, auto segmenter, language_segmenter_factory::Create( GetSegmenterOptions(GetLocale(), jni_cache_.get()))); constexpr std::string_view kText = "How are you你好吗お元気ですか"; - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<LanguageSegmenter::Iterator> itr, - segmenter->Segment(kText, - LanguageSegmenter::AccessType::kForwardIterator)); + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, + segmenter->Segment(kText)); // String: "How are you你好吗お元気ですか" // ^ ^^ ^^ ^ ^ ^ ^ ^ ^ @@ -576,10 +568,8 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, auto segmenter, language_segmenter_factory::Create( GetSegmenterOptions(GetLocale(), jni_cache_.get()))); constexpr std::string_view kText = "How are you你好吗お元気ですか"; - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<LanguageSegmenter::Iterator> itr, - segmenter->Segment(kText, - LanguageSegmenter::AccessType::kForwardIterator)); + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, + segmenter->Segment(kText)); // String: "How are you你好吗お元気ですか" // ^ ^^ ^^ ^ ^ ^ ^ ^ ^ @@ -598,10 +588,8 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, IteratorDoneResetToStartUtf32) { auto segmenter, language_segmenter_factory::Create( GetSegmenterOptions(GetLocale(), jni_cache_.get()))); constexpr std::string_view kText = "How are you你好吗お元気ですか"; - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<LanguageSegmenter::Iterator> itr, - segmenter->Segment(kText, - LanguageSegmenter::AccessType::kForwardIterator)); + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, + segmenter->Segment(kText)); // String: "How are you你好吗お元気ですか" // ^ ^^ ^^ ^ ^ ^ ^ ^ ^ @@ -619,10 +607,8 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, ResetToTermAfterUtf32WordConnector) { auto segmenter, language_segmenter_factory::Create( GetSegmenterOptions(GetLocale(), jni_cache_.get()))); constexpr std::string_view kText = "package com.google.android name"; - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<LanguageSegmenter::Iterator> itr, - segmenter->Segment(kText, - LanguageSegmenter::AccessType::kForwardIterator)); + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, + segmenter->Segment(kText)); // String: "package com.google.android name" // ^ ^^ ^^ @@ -644,10 +630,8 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, ResetToTermAfterUtf32OutOfBounds) { auto segmenter, language_segmenter_factory::Create( GetSegmenterOptions(GetLocale(), jni_cache_.get()))); constexpr std::string_view kText = "How are you你好吗お元気ですか"; - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<LanguageSegmenter::Iterator> itr, - segmenter->Segment(kText, - LanguageSegmenter::AccessType::kForwardIterator)); + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, + segmenter->Segment(kText)); // String: "How are you你好吗お元気ですか" // ^ ^^ ^^ ^ ^ ^ ^ ^ ^ @@ -677,15 +661,13 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, constexpr std::string_view kText = "How are𡔖 you你好吗お元気ですか"; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> advance_itr, - segmenter->Segment(kText, - LanguageSegmenter::AccessType::kForwardIterator)); + segmenter->Segment(kText)); std::vector<std::string_view> advance_terms = GetAllTermsAdvance(advance_itr.get()); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr, - segmenter->Segment(kText, - LanguageSegmenter::AccessType::kForwardIterator)); + segmenter->Segment(kText)); std::vector<std::string_view> reset_terms = GetAllTermsResetAfterUtf32(reset_to_term_itr.get()); @@ -701,15 +683,13 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, constexpr std::string_view kThai = "ฉันเดินไปทำงานทุกวัน"; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> advance_itr, - segmenter->Segment(kThai, - LanguageSegmenter::AccessType::kForwardIterator)); + segmenter->Segment(kThai)); std::vector<std::string_view> advance_terms = GetAllTermsAdvance(advance_itr.get()); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr, - segmenter->Segment(kThai, - LanguageSegmenter::AccessType::kForwardIterator)); + segmenter->Segment(kThai)); std::vector<std::string_view> reset_terms = GetAllTermsResetAfterUtf32(reset_to_term_itr.get()); @@ -725,15 +705,13 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, constexpr std::string_view kKorean = "나는 매일 출근합니다."; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> advance_itr, - segmenter->Segment(kKorean, - LanguageSegmenter::AccessType::kForwardIterator)); + segmenter->Segment(kKorean)); std::vector<std::string_view> advance_terms = GetAllTermsAdvance(advance_itr.get()); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr, - segmenter->Segment(kKorean, - LanguageSegmenter::AccessType::kForwardIterator)); + segmenter->Segment(kKorean)); std::vector<std::string_view> reset_terms = GetAllTermsResetAfterUtf32(reset_to_term_itr.get()); @@ -753,15 +731,13 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, constexpr std::string_view kText = "How are𡔖 you你好吗お元気ですか"; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> advance_itr, - segmenter->Segment(kText, - LanguageSegmenter::AccessType::kForwardIterator)); + segmenter->Segment(kText)); std::vector<std::string_view> advance_terms = GetAllTermsAdvance(advance_itr.get()); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> advance_and_reset_itr, - segmenter->Segment(kText, - LanguageSegmenter::AccessType::kForwardIterator)); + segmenter->Segment(kText)); std::vector<std::string_view> advance_and_reset_terms = GetAllTermsAdvanceAndResetAfterUtf32(advance_and_reset_itr.get()); @@ -778,15 +754,13 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, constexpr std::string_view kThai = "ฉันเดินไปทำงานทุกวัน"; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> advance_itr, - segmenter->Segment(kThai, - LanguageSegmenter::AccessType::kForwardIterator)); + segmenter->Segment(kThai)); std::vector<std::string_view> advance_terms = GetAllTermsAdvance(advance_itr.get()); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> advance_and_reset_itr, - segmenter->Segment(kThai, - LanguageSegmenter::AccessType::kForwardIterator)); + segmenter->Segment(kThai)); std::vector<std::string_view> advance_and_reset_terms = GetAllTermsAdvanceAndResetAfterUtf32(advance_and_reset_itr.get()); @@ -803,15 +777,13 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, constexpr std::string_view kKorean = "나는 매일 출근합니다."; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> advance_itr, - segmenter->Segment(kKorean, - LanguageSegmenter::AccessType::kForwardIterator)); + segmenter->Segment(kKorean)); std::vector<std::string_view> advance_terms = GetAllTermsAdvance(advance_itr.get()); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> advance_and_reset_itr, - segmenter->Segment(kKorean, - LanguageSegmenter::AccessType::kForwardIterator)); + segmenter->Segment(kKorean)); std::vector<std::string_view> advance_and_reset_terms = GetAllTermsAdvanceAndResetAfterUtf32(advance_and_reset_itr.get()); @@ -828,9 +800,7 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, GetSegmenterOptions(GetLocale(), jni_cache_.get()))); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> itr, - language_segmenter->Segment( - "How are you你好吗お元気ですか", - LanguageSegmenter::AccessType::kForwardIterator)); + language_segmenter->Segment("How are you你好吗お元気ですか")); // String: "How are you你好吗お元気ですか" // ^ ^^ ^^ ^ ^ ^ ^ ^ ^ @@ -867,10 +837,8 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, GetSegmenterOptions(GetLocale(), jni_cache_.get()))); // Multiple continuous whitespaces are treated as one. constexpr std::string_view kTextWithSpace = "Hello World"; - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<LanguageSegmenter::Iterator> itr, - language_segmenter->Segment( - kTextWithSpace, LanguageSegmenter::AccessType::kForwardIterator)); + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, + language_segmenter->Segment(kTextWithSpace)); // String: "Hello World" // ^ ^ ^ @@ -909,10 +877,8 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, ChineseResetToTermAfterUtf32) { // CJKT (Chinese, Japanese, Khmer, Thai) are the 4 main languages that // don't have whitespaces as word delimiter. Chinese constexpr std::string_view kChinese = "我每天走路去上班。"; - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<LanguageSegmenter::Iterator> itr, - language_segmenter->Segment( - kChinese, LanguageSegmenter::AccessType::kForwardIterator)); + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, + language_segmenter->Segment(kChinese)); // String: "我每天走路去上班。" // ^ ^ ^ ^^ ^ // UTF-8 idx: 0 3 9 15 18 24 @@ -938,10 +904,8 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, JapaneseResetToTermAfterUtf32) { GetSegmenterOptions(GetLocale(), jni_cache_.get()))); // Japanese constexpr std::string_view kJapanese = "私は毎日仕事に歩いています。"; - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<LanguageSegmenter::Iterator> itr, - language_segmenter->Segment( - kJapanese, LanguageSegmenter::AccessType::kForwardIterator)); + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, + language_segmenter->Segment(kJapanese)); // String: "私は毎日仕事に歩いています。" // ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ // UTF-8 idx: 0 3 6 12 18212427 33 39 @@ -966,10 +930,8 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, KhmerResetToTermAfterUtf32) { language_segmenter_factory::Create( GetSegmenterOptions(GetLocale(), jni_cache_.get()))); constexpr std::string_view kKhmer = "ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។"; - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<LanguageSegmenter::Iterator> itr, - language_segmenter->Segment( - kKhmer, LanguageSegmenter::AccessType::kForwardIterator)); + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, + language_segmenter->Segment(kKhmer)); // String: "ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។" // ^ ^ ^ ^ ^ // UTF-8 idx: 0 9 24 45 69 @@ -995,10 +957,8 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, ThaiResetToTermAfterUtf32) { GetSegmenterOptions(GetLocale(), jni_cache_.get()))); // Thai constexpr std::string_view kThai = "ฉันเดินไปทำงานทุกวัน"; - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<LanguageSegmenter::Iterator> itr, - language_segmenter->Segment( - kThai, LanguageSegmenter::AccessType::kForwardIterator)); + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, + language_segmenter->Segment(kThai)); // String: "ฉันเดินไปทำงานทุกวัน" // ^ ^ ^ ^ ^ ^ // UTF-8 idx: 0 9 21 27 42 51 @@ -1023,10 +983,8 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, auto segmenter, language_segmenter_factory::Create( GetSegmenterOptions(GetLocale(), jni_cache_.get()))); constexpr std::string_view kText = "package name com.google.android!"; - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<LanguageSegmenter::Iterator> itr, - segmenter->Segment(kText, - LanguageSegmenter::AccessType::kForwardIterator)); + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, + segmenter->Segment(kText)); // String: "package name com.google.android!" // ^ ^^ ^^ ^ @@ -1048,10 +1006,8 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, ResetToTermBeforeOutOfBoundsUtf32) { auto segmenter, language_segmenter_factory::Create( GetSegmenterOptions(GetLocale(), jni_cache_.get()))); constexpr std::string_view kText = "How are you你好吗お元気ですか"; - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<LanguageSegmenter::Iterator> itr, - segmenter->Segment(kText, - LanguageSegmenter::AccessType::kForwardIterator)); + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, + segmenter->Segment(kText)); // String: "How are you你好吗お元気ですか" // ^ ^^ ^^ ^ ^ ^ ^ ^ ^ @@ -1081,15 +1037,13 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, constexpr std::string_view kText = "How are𡔖 you你好吗お元気ですか"; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> advance_itr, - segmenter->Segment(kText, - LanguageSegmenter::AccessType::kForwardIterator)); + segmenter->Segment(kText)); std::vector<std::string_view> advance_terms = GetAllTermsAdvance(advance_itr.get()); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr, - segmenter->Segment(kText, - LanguageSegmenter::AccessType::kForwardIterator)); + segmenter->Segment(kText)); std::vector<std::string_view> reset_terms = GetAllTermsResetBeforeUtf32(reset_to_term_itr.get()); std::reverse(reset_terms.begin(), reset_terms.end()); @@ -1107,15 +1061,13 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, constexpr std::string_view kThai = "ฉันเดินไปทำงานทุกวัน"; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> advance_itr, - segmenter->Segment(kThai, - LanguageSegmenter::AccessType::kForwardIterator)); + segmenter->Segment(kThai)); std::vector<std::string_view> advance_terms = GetAllTermsAdvance(advance_itr.get()); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr, - segmenter->Segment(kThai, - LanguageSegmenter::AccessType::kForwardIterator)); + segmenter->Segment(kThai)); std::vector<std::string_view> reset_terms = GetAllTermsResetBeforeUtf32(reset_to_term_itr.get()); std::reverse(reset_terms.begin(), reset_terms.end()); @@ -1132,15 +1084,13 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, constexpr std::string_view kKorean = "나는 매일 출근합니다."; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> advance_itr, - segmenter->Segment(kKorean, - LanguageSegmenter::AccessType::kForwardIterator)); + segmenter->Segment(kKorean)); std::vector<std::string_view> advance_terms = GetAllTermsAdvance(advance_itr.get()); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr, - segmenter->Segment(kKorean, - LanguageSegmenter::AccessType::kForwardIterator)); + segmenter->Segment(kKorean)); std::vector<std::string_view> reset_terms = GetAllTermsResetBeforeUtf32(reset_to_term_itr.get()); std::reverse(reset_terms.begin(), reset_terms.end()); @@ -1157,9 +1107,7 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, GetSegmenterOptions(GetLocale(), jni_cache_.get()))); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> itr, - language_segmenter->Segment( - "How are you你好吗お元気ですか", - LanguageSegmenter::AccessType::kForwardIterator)); + language_segmenter->Segment("How are you你好吗お元気ですか")); // String: "How are you你好吗お元気ですか" // ^ ^^ ^^ ^ ^ ^ ^ ^ ^ @@ -1197,10 +1145,8 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, GetSegmenterOptions(GetLocale(), jni_cache_.get()))); // Multiple continuous whitespaces are treated as one. constexpr std::string_view kTextWithSpace = "Hello World"; - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<LanguageSegmenter::Iterator> itr, - language_segmenter->Segment( - kTextWithSpace, LanguageSegmenter::AccessType::kForwardIterator)); + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, + language_segmenter->Segment(kTextWithSpace)); // String: "Hello World" // ^ ^ ^ @@ -1238,10 +1184,8 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, ChineseResetToTermBeforeUtf32) { // CJKT (Chinese, Japanese, Khmer, Thai) are the 4 main languages that // don't have whitespaces as word delimiter. Chinese constexpr std::string_view kChinese = "我每天走路去上班。"; - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<LanguageSegmenter::Iterator> itr, - language_segmenter->Segment( - kChinese, LanguageSegmenter::AccessType::kForwardIterator)); + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, + language_segmenter->Segment(kChinese)); // String: "我每天走路去上班。" // ^ ^ ^ ^^ // UTF-8 idx: 0 3 9 15 18 @@ -1264,10 +1208,8 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, JapaneseResetToTermBeforeUtf32) { GetSegmenterOptions(GetLocale(), jni_cache_.get()))); // Japanese constexpr std::string_view kJapanese = "私は毎日仕事に歩いています。"; - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<LanguageSegmenter::Iterator> itr, - language_segmenter->Segment( - kJapanese, LanguageSegmenter::AccessType::kForwardIterator)); + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, + language_segmenter->Segment(kJapanese)); // String: "私は毎日仕事に歩いています。" // ^ ^ ^ ^ ^ ^ ^ ^ ^ // UTF-8 idx: 0 3 6 12 18212427 33 @@ -1289,10 +1231,8 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, KhmerResetToTermBeforeUtf32) { language_segmenter_factory::Create( GetSegmenterOptions(GetLocale(), jni_cache_.get()))); constexpr std::string_view kKhmer = "ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។"; - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<LanguageSegmenter::Iterator> itr, - language_segmenter->Segment( - kKhmer, LanguageSegmenter::AccessType::kForwardIterator)); + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, + language_segmenter->Segment(kKhmer)); // String: "ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។" // ^ ^ ^ ^ // UTF-8 idx: 0 9 24 45 @@ -1315,10 +1255,8 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, ThaiResetToTermBeforeUtf32) { GetSegmenterOptions(GetLocale(), jni_cache_.get()))); // Thai constexpr std::string_view kThai = "ฉันเดินไปทำงานทุกวัน"; - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<LanguageSegmenter::Iterator> itr, - language_segmenter->Segment( - kThai, LanguageSegmenter::AccessType::kForwardIterator)); + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, + language_segmenter->Segment(kThai)); // String: "ฉันเดินไปทำงานทุกวัน" // ^ ^ ^ ^ ^ ^ // UTF-8 idx: 0 9 21 27 42 51 @@ -1360,13 +1298,10 @@ TEST_P(IcuLanguageSegmenterAllLocalesTest, MultipleLangSegmentersTest) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> iterator_one, - language_segmenter->Segment( - "foo bar baz", LanguageSegmenter::AccessType::kForwardIterator)); + language_segmenter->Segment("foo bar baz")); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> iterator_two, - language_segmenter->Segment( - "abra kadabra alakazam", - LanguageSegmenter::AccessType::kForwardIterator)); + language_segmenter->Segment("abra kadabra alakazam")); ASSERT_TRUE(iterator_one->Advance()); ASSERT_TRUE(iterator_two->Advance()); diff --git a/icing/tokenization/language-segmenter-iterator_test.cc b/icing/tokenization/language-segmenter-iterator_test.cc index b14ce19..3aff45c 100644 --- a/icing/tokenization/language-segmenter-iterator_test.cc +++ b/icing/tokenization/language-segmenter-iterator_test.cc @@ -54,10 +54,8 @@ TEST_F(LanguageSegmenterIteratorTest, AdvanceAndGetTerm) { ICING_ASSERT_OK_AND_ASSIGN( auto language_segmenter, language_segmenter_factory::Create(std::move(options))); - ICING_ASSERT_OK_AND_ASSIGN( - auto iterator, - language_segmenter->Segment( - "foo bar", LanguageSegmenter::AccessType::kForwardIterator)); + ICING_ASSERT_OK_AND_ASSIGN(auto iterator, + language_segmenter->Segment("foo bar")); EXPECT_TRUE(iterator->Advance()); EXPECT_THAT(iterator->GetTerm(), Eq("foo")); @@ -78,10 +76,8 @@ TEST_F(LanguageSegmenterIteratorTest, ICING_ASSERT_OK_AND_ASSIGN( auto language_segmenter, language_segmenter_factory::Create(std::move(options))); - ICING_ASSERT_OK_AND_ASSIGN( - auto iterator, - language_segmenter->Segment( - "foo bar", LanguageSegmenter::AccessType::kBidirectionalIterator)); + ICING_ASSERT_OK_AND_ASSIGN(auto iterator, + language_segmenter->Segment("foo bar")); EXPECT_THAT(iterator->ResetToTermStartingAfterUtf32(/*offset=*/0), IsOkAndHolds(3)); // The term " " @@ -98,10 +94,8 @@ TEST_F(LanguageSegmenterIteratorTest, ICING_ASSERT_OK_AND_ASSIGN( auto language_segmenter, language_segmenter_factory::Create(std::move(options))); - ICING_ASSERT_OK_AND_ASSIGN( - auto iterator, - language_segmenter->Segment( - "foo bar", LanguageSegmenter::AccessType::kBidirectionalIterator)); + ICING_ASSERT_OK_AND_ASSIGN(auto iterator, + language_segmenter->Segment("foo bar")); EXPECT_THAT(iterator->ResetToTermStartingAfterUtf32(/*offset=*/-1), IsOk()); @@ -119,10 +113,7 @@ TEST_F(LanguageSegmenterIteratorTest, ICING_ASSERT_OK_AND_ASSIGN( auto language_segmenter, language_segmenter_factory::Create(std::move(options))); - ICING_ASSERT_OK_AND_ASSIGN( - auto iterator, - language_segmenter->Segment( - text, LanguageSegmenter::AccessType::kBidirectionalIterator)); + ICING_ASSERT_OK_AND_ASSIGN(auto iterator, language_segmenter->Segment(text)); EXPECT_THAT(iterator->ResetToTermStartingAfterUtf32(/*offset=*/text.length()), StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); @@ -136,10 +127,7 @@ TEST_F(LanguageSegmenterIteratorTest, ICING_ASSERT_OK_AND_ASSIGN( auto language_segmenter, language_segmenter_factory::Create(std::move(options))); - ICING_ASSERT_OK_AND_ASSIGN( - auto iterator, - language_segmenter->Segment( - text, LanguageSegmenter::AccessType::kBidirectionalIterator)); + ICING_ASSERT_OK_AND_ASSIGN(auto iterator, language_segmenter->Segment(text)); EXPECT_THAT(iterator->ResetToTermStartingAfterUtf32(/*offset=*/100), StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); @@ -152,10 +140,8 @@ TEST_F(LanguageSegmenterIteratorTest, ICING_ASSERT_OK_AND_ASSIGN( auto language_segmenter, language_segmenter_factory::Create(std::move(options))); - ICING_ASSERT_OK_AND_ASSIGN( - auto iterator, - language_segmenter->Segment( - "foo bar", LanguageSegmenter::AccessType::kBidirectionalIterator)); + ICING_ASSERT_OK_AND_ASSIGN(auto iterator, + language_segmenter->Segment("foo bar")); EXPECT_THAT(iterator->ResetToTermEndingBeforeUtf32(/*offset=*/6), IsOkAndHolds(3)); // The term " " @@ -172,10 +158,8 @@ TEST_F(LanguageSegmenterIteratorTest, ICING_ASSERT_OK_AND_ASSIGN( auto language_segmenter, language_segmenter_factory::Create(std::move(options))); - ICING_ASSERT_OK_AND_ASSIGN( - auto iterator, - language_segmenter->Segment( - "foo bar", LanguageSegmenter::AccessType::kBidirectionalIterator)); + ICING_ASSERT_OK_AND_ASSIGN(auto iterator, + language_segmenter->Segment("foo bar")); // Zero is a valid argument, but there aren't any terms that end before it. EXPECT_THAT(iterator->ResetToTermEndingBeforeUtf32(/*offset=*/0), @@ -189,10 +173,8 @@ TEST_F(LanguageSegmenterIteratorTest, ICING_ASSERT_OK_AND_ASSIGN( auto language_segmenter, language_segmenter_factory::Create(std::move(options))); - ICING_ASSERT_OK_AND_ASSIGN( - auto iterator, - language_segmenter->Segment( - "foo bar", LanguageSegmenter::AccessType::kBidirectionalIterator)); + ICING_ASSERT_OK_AND_ASSIGN(auto iterator, + language_segmenter->Segment("foo bar")); EXPECT_THAT(iterator->ResetToTermEndingBeforeUtf32(/*offset=*/-1), StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); @@ -209,10 +191,7 @@ TEST_F(LanguageSegmenterIteratorTest, ICING_ASSERT_OK_AND_ASSIGN( auto language_segmenter, language_segmenter_factory::Create(std::move(options))); - ICING_ASSERT_OK_AND_ASSIGN( - auto iterator, - language_segmenter->Segment( - text, LanguageSegmenter::AccessType::kBidirectionalIterator)); + ICING_ASSERT_OK_AND_ASSIGN(auto iterator, language_segmenter->Segment(text)); EXPECT_THAT(iterator->ResetToTermEndingBeforeUtf32(/*offset=*/text.length()), IsOk()); diff --git a/icing/tokenization/language-segmenter.h b/icing/tokenization/language-segmenter.h index 83a47d4..913386a 100644 --- a/icing/tokenization/language-segmenter.h +++ b/icing/tokenization/language-segmenter.h @@ -38,11 +38,6 @@ namespace lib { // segmenter->GetAllTerms(text)); class LanguageSegmenter { public: - enum class AccessType { - kForwardIterator, - kBidirectionalIterator, - }; - virtual ~LanguageSegmenter() = default; // An iterator helping to find terms in the input text. @@ -170,7 +165,7 @@ class LanguageSegmenter { // outlives the returned iterator. virtual libtextclassifier3::StatusOr< std::unique_ptr<LanguageSegmenter::Iterator>> - Segment(std::string_view text, AccessType access_type) const = 0; + Segment(std::string_view text) const = 0; // Segments and returns all terms in the input text. // diff --git a/icing/tokenization/language-segmenter_benchmark.cc b/icing/tokenization/language-segmenter_benchmark.cc index 50c625e..748a322 100644 --- a/icing/tokenization/language-segmenter_benchmark.cc +++ b/icing/tokenization/language-segmenter_benchmark.cc @@ -68,10 +68,7 @@ void BM_SegmentNoSpace(benchmark::State& state) { for (auto _ : state) { std::unique_ptr<LanguageSegmenter::Iterator> iterator = - language_segmenter - ->Segment(input_string, - LanguageSegmenter::AccessType::kForwardIterator) - .ValueOrDie(); + language_segmenter->Segment(input_string).ValueOrDie(); while (iterator->Advance()) { iterator->GetTerm(); } @@ -111,10 +108,7 @@ void BM_SegmentWithSpaces(benchmark::State& state) { for (auto _ : state) { std::unique_ptr<LanguageSegmenter::Iterator> iterator = - language_segmenter - ->Segment(input_string, - LanguageSegmenter::AccessType::kForwardIterator) - .ValueOrDie(); + language_segmenter->Segment(input_string).ValueOrDie(); while (iterator->Advance()) { iterator->GetTerm(); } @@ -154,10 +148,7 @@ void BM_SegmentCJK(benchmark::State& state) { for (auto _ : state) { std::unique_ptr<LanguageSegmenter::Iterator> iterator = - language_segmenter - ->Segment(input_string, - LanguageSegmenter::AccessType::kForwardIterator) - .ValueOrDie(); + language_segmenter->Segment(input_string).ValueOrDie(); while (iterator->Advance()) { iterator->GetTerm(); } diff --git a/icing/tokenization/plain-tokenizer.cc b/icing/tokenization/plain-tokenizer.cc index 9175f3a..d40022b 100644 --- a/icing/tokenization/plain-tokenizer.cc +++ b/icing/tokenization/plain-tokenizer.cc @@ -130,19 +130,17 @@ class PlainTokenIterator : public Tokenizer::Iterator { }; libtextclassifier3::StatusOr<std::unique_ptr<Tokenizer::Iterator>> -PlainTokenizer::Tokenize(std::string_view text, - LanguageSegmenter::AccessType access_type) const { +PlainTokenizer::Tokenize(std::string_view text) const { ICING_ASSIGN_OR_RETURN( std::unique_ptr<LanguageSegmenter::Iterator> base_iterator, - language_segmenter_.Segment(text, access_type)); + language_segmenter_.Segment(text)); return std::make_unique<PlainTokenIterator>(std::move(base_iterator)); } libtextclassifier3::StatusOr<std::vector<Token>> PlainTokenizer::TokenizeAll( std::string_view text) const { - ICING_ASSIGN_OR_RETURN( - std::unique_ptr<Tokenizer::Iterator> iterator, - Tokenize(text, LanguageSegmenter::AccessType::kForwardIterator)); + ICING_ASSIGN_OR_RETURN(std::unique_ptr<Tokenizer::Iterator> iterator, + Tokenize(text)); std::vector<Token> tokens; while (iterator->Advance()) { std::vector<Token> batch_tokens = iterator->GetTokens(); diff --git a/icing/tokenization/plain-tokenizer.h b/icing/tokenization/plain-tokenizer.h index 61a8b5a..25b40fd 100644 --- a/icing/tokenization/plain-tokenizer.h +++ b/icing/tokenization/plain-tokenizer.h @@ -33,8 +33,7 @@ class PlainTokenizer : public Tokenizer { : language_segmenter_(*language_segmenter) {} libtextclassifier3::StatusOr<std::unique_ptr<Tokenizer::Iterator>> Tokenize( - std::string_view text, - LanguageSegmenter::AccessType access_type) const override; + std::string_view text) const override; libtextclassifier3::StatusOr<std::vector<Token>> TokenizeAll( std::string_view text) const override; diff --git a/icing/tokenization/plain-tokenizer_test.cc b/icing/tokenization/plain-tokenizer_test.cc index f94a558..6c426da 100644 --- a/icing/tokenization/plain-tokenizer_test.cc +++ b/icing/tokenization/plain-tokenizer_test.cc @@ -25,7 +25,6 @@ #include "icing/testing/jni-test-helpers.h" #include "icing/testing/test-data.h" #include "icing/tokenization/language-segmenter-factory.h" -#include "icing/tokenization/language-segmenter.h" #include "icing/tokenization/tokenizer-factory.h" #include "unicode/uloc.h" @@ -68,10 +67,8 @@ TEST_F(PlainTokenizerTest, NoTokensBeforeAdvancing) { language_segmenter.get())); constexpr std::string_view kText = "Hello, world!"; - ICING_ASSERT_OK_AND_ASSIGN( - auto token_iterator, - plain_tokenizer->Tokenize( - kText, LanguageSegmenter::AccessType::kForwardIterator)); + ICING_ASSERT_OK_AND_ASSIGN(auto token_iterator, + plain_tokenizer->Tokenize(kText)); // We should get no tokens if we get the token before advancing. EXPECT_THAT(token_iterator->GetTokens(), IsEmpty()); @@ -89,10 +86,8 @@ TEST_F(PlainTokenizerTest, LastTokenAfterFullyAdvanced) { language_segmenter.get())); constexpr std::string_view kText = "Hello, world!"; - ICING_ASSERT_OK_AND_ASSIGN( - auto token_iterator, - plain_tokenizer->Tokenize( - kText, LanguageSegmenter::AccessType::kForwardIterator)); + ICING_ASSERT_OK_AND_ASSIGN(auto token_iterator, + plain_tokenizer->Tokenize(kText)); while (token_iterator->Advance()) {} @@ -349,10 +344,7 @@ TEST_F(PlainTokenizerTest, ResetToTokenStartingAfterSimple) { language_segmenter.get())); constexpr std::string_view kText = "f b"; - auto iterator = - plain_tokenizer - ->Tokenize(kText, LanguageSegmenter::AccessType::kBidirectionalIterator) - .ValueOrDie(); + auto iterator = plain_tokenizer->Tokenize(kText).ValueOrDie(); EXPECT_TRUE(iterator->ResetToTokenStartingAfter(0)); EXPECT_THAT(iterator->GetTokens(), @@ -373,10 +365,7 @@ TEST_F(PlainTokenizerTest, ResetToTokenEndingBeforeSimple) { language_segmenter.get())); constexpr std::string_view kText = "f b"; - auto iterator = - plain_tokenizer - ->Tokenize(kText, LanguageSegmenter::AccessType::kBidirectionalIterator) - .ValueOrDie(); + auto iterator = plain_tokenizer->Tokenize(kText).ValueOrDie(); EXPECT_TRUE(iterator->ResetToTokenEndingBefore(2)); EXPECT_THAT(iterator->GetTokens(), @@ -423,10 +412,7 @@ TEST_F(PlainTokenizerTest, ResetToTokenStartingAfter) { "bat", // 16: " bat" }; - auto iterator = - plain_tokenizer - ->Tokenize(kText, LanguageSegmenter::AccessType::kBidirectionalIterator) - .ValueOrDie(); + auto iterator = plain_tokenizer->Tokenize(kText).ValueOrDie(); EXPECT_TRUE(iterator->Advance()); EXPECT_THAT(iterator->GetTokens(), ElementsAre(EqualsToken(Token::Type::REGULAR, "foo"))); @@ -480,10 +466,7 @@ TEST_F(PlainTokenizerTest, ResetToTokenEndingBefore) { "foo", // 4: "foo " }; - auto iterator = - plain_tokenizer - ->Tokenize(kText, LanguageSegmenter::AccessType::kBidirectionalIterator) - .ValueOrDie(); + auto iterator = plain_tokenizer->Tokenize(kText).ValueOrDie(); EXPECT_TRUE(iterator->Advance()); EXPECT_THAT(iterator->GetTokens(), ElementsAre(EqualsToken(Token::Type::REGULAR, "foo"))); diff --git a/icing/tokenization/raw-query-tokenizer.cc b/icing/tokenization/raw-query-tokenizer.cc index aca317c..1dcbf9b 100644 --- a/icing/tokenization/raw-query-tokenizer.cc +++ b/icing/tokenization/raw-query-tokenizer.cc @@ -690,8 +690,7 @@ class RawQueryTokenIterator : public Tokenizer::Iterator { } // namespace libtextclassifier3::StatusOr<std::unique_ptr<Tokenizer::Iterator>> -RawQueryTokenizer::Tokenize(std::string_view text, - LanguageSegmenter::AccessType) const { +RawQueryTokenizer::Tokenize(std::string_view text) const { ICING_ASSIGN_OR_RETURN(std::vector<Token> tokens, TokenizeAll(text)); return std::make_unique<RawQueryTokenIterator>(std::move(tokens)); } diff --git a/icing/tokenization/raw-query-tokenizer.h b/icing/tokenization/raw-query-tokenizer.h index 1087b04..6316e45 100644 --- a/icing/tokenization/raw-query-tokenizer.h +++ b/icing/tokenization/raw-query-tokenizer.h @@ -33,7 +33,7 @@ class RawQueryTokenizer : public Tokenizer { : language_segmenter_(*language_segmenter) {} libtextclassifier3::StatusOr<std::unique_ptr<Tokenizer::Iterator>> Tokenize( - std::string_view text, LanguageSegmenter::AccessType) const override; + std::string_view text) const override; libtextclassifier3::StatusOr<std::vector<Token>> TokenizeAll( std::string_view text) const override; diff --git a/icing/tokenization/raw-query-tokenizer_test.cc b/icing/tokenization/raw-query-tokenizer_test.cc index 2044f95..a00f2f7 100644 --- a/icing/tokenization/raw-query-tokenizer_test.cc +++ b/icing/tokenization/raw-query-tokenizer_test.cc @@ -21,7 +21,6 @@ #include "icing/testing/icu-data-file-helper.h" #include "icing/testing/test-data.h" #include "icing/tokenization/language-segmenter-factory.h" -#include "icing/tokenization/language-segmenter.h" #include "icing/tokenization/tokenizer-factory.h" #include "icing/tokenization/tokenizer.h" #include "unicode/uloc.h" @@ -61,10 +60,8 @@ TEST_F(RawQueryTokenizerTest, NoTokensBeforeAdvancing) { language_segmenter.get())); constexpr std::string_view kText = "Hello, world!"; - ICING_ASSERT_OK_AND_ASSIGN( - auto token_iterator, - raw_query_tokenizer->Tokenize( - kText, LanguageSegmenter::AccessType::kForwardIterator)); + ICING_ASSERT_OK_AND_ASSIGN(auto token_iterator, + raw_query_tokenizer->Tokenize(kText)); // We should get no tokens if we get the token before advancing. EXPECT_THAT(token_iterator->GetTokens(), IsEmpty()); diff --git a/icing/tokenization/reverse_jni/reverse-jni-break-iterator.cc b/icing/tokenization/reverse_jni/reverse-jni-break-iterator.cc index 4bb7991..dbd7f5a 100644 --- a/icing/tokenization/reverse_jni/reverse-jni-break-iterator.cc +++ b/icing/tokenization/reverse_jni/reverse-jni-break-iterator.cc @@ -31,13 +31,18 @@ namespace icing { namespace lib { +namespace { +// Chosen based on results in go/reverse-jni-benchmarks +static constexpr int kBatchSize = 100; +} // namespace + // ----------------------------------------------------------------------------- // Implementations that call out to JVM. Behold the beauty. // ----------------------------------------------------------------------------- libtextclassifier3::StatusOr<std::unique_ptr<ReverseJniBreakIterator>> ReverseJniBreakIterator::Create(const JniCache* jni_cache, - std::string_view text, std::string_view locale, - int batch_size) { + std::string_view text, + std::string_view locale) { if (jni_cache == nullptr) { return absl_ports::InvalidArgumentError( "Create must be called with a valid JniCache pointer!"); @@ -85,17 +90,15 @@ ReverseJniBreakIterator::Create(const JniCache* jni_cache, ICING_RETURN_IF_ERROR(libtextclassifier3::JniHelper::CallVoidMethod( jenv, iterator_batcher.get(), jni_cache->breakiterator_settext, java_text.get())); - return std::unique_ptr<ReverseJniBreakIterator>(new ReverseJniBreakIterator( - jni_cache, std::move(iterator_batcher), batch_size)); + return std::unique_ptr<ReverseJniBreakIterator>( + new ReverseJniBreakIterator(jni_cache, std::move(iterator_batcher))); } ReverseJniBreakIterator::ReverseJniBreakIterator( const JniCache* jni_cache, - libtextclassifier3::ScopedGlobalRef<jobject> iterator_batcher, - int batch_size) + libtextclassifier3::ScopedGlobalRef<jobject> iterator_batcher) : jni_cache_(jni_cache), iterator_batcher_(std::move(iterator_batcher)), - batch_size_(batch_size), is_done_(false), is_almost_done_(false) {} @@ -110,7 +113,7 @@ int ReverseJniBreakIterator::Next() { is_done_ = true; return ReverseJniBreakIterator::kDone; } - is_almost_done_ = break_indices_cache_.size() < batch_size_; + is_almost_done_ = break_indices_cache_.size() < kBatchSize; } int break_index = break_indices_cache_.front(); break_indices_cache_.pop(); @@ -153,7 +156,7 @@ int ReverseJniBreakIterator::FetchNextBatch() { libtextclassifier3::ScopedLocalRef<jintArray> break_indices, libtextclassifier3::JniHelper::CallObjectMethod<jintArray>( jni_cache_->GetEnv(), iterator_batcher_.get(), - jni_cache_->breakiterator_next, batch_size_), + jni_cache_->breakiterator_next, kBatchSize), ReverseJniBreakIterator::kDone); if (break_indices == nullptr || jni_cache_->ExceptionCheckAndClear()) { return ReverseJniBreakIterator::kDone; diff --git a/icing/tokenization/reverse_jni/reverse-jni-break-iterator.h b/icing/tokenization/reverse_jni/reverse-jni-break-iterator.h index b1dcc87..537666c 100644 --- a/icing/tokenization/reverse_jni/reverse-jni-break-iterator.h +++ b/icing/tokenization/reverse_jni/reverse-jni-break-iterator.h @@ -44,9 +44,6 @@ namespace lib { // EXPECT_THAT(nexts, ElementsAre(1, 3, 5, 6, 8)); class ReverseJniBreakIterator { public: - // Chosen based on results in go/reverse-jni-benchmarks - static constexpr int kBatchSize = 100; - static constexpr int kDone = -1; // Creates a ReverseJniBreakiterator with the given text and locale. @@ -57,7 +54,7 @@ class ReverseJniBreakIterator { // INTERNAL if unable to create any of the required Java objects static libtextclassifier3::StatusOr<std::unique_ptr<ReverseJniBreakIterator>> Create(const JniCache* jni_cache, std::string_view text, - std::string_view locale, int batch_size); + std::string_view locale); // Returns the UTF-16 boundary following the current boundary. If the current // boundary is the last text boundary, it returns @@ -91,10 +88,9 @@ class ReverseJniBreakIterator { private: ReverseJniBreakIterator( const JniCache* jni_cache, - libtextclassifier3::ScopedGlobalRef<jobject> iterator_batcher, - int batch_size); + libtextclassifier3::ScopedGlobalRef<jobject> iterator_batcher); - // Fetches the results of up to batch_size next calls and stores them in + // Fetches the results of up to kBatchSize next calls and stores them in // break_indices_cache_. Returns the number of results or kDone if no more // results could be fetched. int FetchNextBatch(); @@ -113,11 +109,9 @@ class ReverseJniBreakIterator { // BreakIteratorBatcher#next. std::queue<int> break_indices_cache_; - int batch_size_; - bool is_done_; - // The last batch was incomplete (< batch_size_ results were returned). The + // The last batch was incomplete (< kBatchSize results were returned). The // next call to BreakIteratorBatcher#next is guaranteed to return an // empty array. Once the results from the last batch are evicted from // break_indices_cache, ReverseJniBreakIterator will transition to is_done_. diff --git a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter.cc b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter.cc index e6bcf4b..bd80718 100644 --- a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter.cc +++ b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter.cc @@ -293,28 +293,18 @@ class ReverseJniLanguageSegmenterIterator : public LanguageSegmenter::Iterator { }; libtextclassifier3::StatusOr<std::unique_ptr<LanguageSegmenter::Iterator>> -ReverseJniLanguageSegmenter::Segment( - const std::string_view text, - LanguageSegmenter::AccessType access_type) const { - // Only batch if we're only doing forward iteration. Bidirectional iteration - // will result in us frequently discarding unconsumed batched word breaks. - // Therefore, we won't bother batching them. - int batch_size = - (access_type == LanguageSegmenter::AccessType::kForwardIterator) - ? ReverseJniBreakIterator::kBatchSize - : 1; +ReverseJniLanguageSegmenter::Segment(const std::string_view text) const { ICING_ASSIGN_OR_RETURN( std::unique_ptr<ReverseJniBreakIterator> break_iterator, - ReverseJniBreakIterator::Create(jni_cache_, text, locale_, batch_size)); + ReverseJniBreakIterator::Create(jni_cache_, text, locale_)); return std::make_unique<ReverseJniLanguageSegmenterIterator>( text, std::move(break_iterator)); } libtextclassifier3::StatusOr<std::vector<std::string_view>> ReverseJniLanguageSegmenter::GetAllTerms(const std::string_view text) const { - ICING_ASSIGN_OR_RETURN( - std::unique_ptr<LanguageSegmenter::Iterator> iterator, - Segment(text, LanguageSegmenter::AccessType::kForwardIterator)); + ICING_ASSIGN_OR_RETURN(std::unique_ptr<LanguageSegmenter::Iterator> iterator, + Segment(text)); std::vector<std::string_view> terms; while (iterator->Advance()) { terms.push_back(iterator->GetTerm()); diff --git a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter.h b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter.h index e9f84ad..29df4ee 100644 --- a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter.h +++ b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter.h @@ -34,8 +34,7 @@ class ReverseJniLanguageSegmenter : public LanguageSegmenter { : locale_(std::move(locale)), jni_cache_(jni_cache) {} libtextclassifier3::StatusOr<std::unique_ptr<LanguageSegmenter::Iterator>> - Segment(std::string_view text, - LanguageSegmenter::AccessType access_type) const override; + Segment(std::string_view text) const override; libtextclassifier3::StatusOr<std::vector<std::string_view>> GetAllTerms( std::string_view text) const override; diff --git a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter_test.cc b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter_test.cc index be652ff..47a01fe 100644 --- a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter_test.cc +++ b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter_test.cc @@ -394,10 +394,8 @@ TEST_P(ReverseJniLanguageSegmenterTest, ContinuousWhitespaces) { // iterator is done. text_with_spaces = absl_ports::StrCat(std::string(kNumSeparators, ' '), "Hello", " ", "World"); - ICING_ASSERT_OK_AND_ASSIGN( - auto itr, - language_segmenter->Segment( - text_with_spaces, LanguageSegmenter::AccessType::kForwardIterator)); + ICING_ASSERT_OK_AND_ASSIGN(auto itr, + language_segmenter->Segment(text_with_spaces)); std::vector<std::string_view> terms; while (itr->Advance()) { terms.push_back(itr->GetTerm()); @@ -493,10 +491,8 @@ TEST_P(ReverseJniLanguageSegmenterTest, ResetToStartUtf32WordConnector) { auto segmenter, language_segmenter_factory::Create( GetSegmenterOptions(GetLocale(), jni_cache_.get()))); constexpr std::string_view kText = "com:google:android is package"; - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<LanguageSegmenter::Iterator> itr, - segmenter->Segment( - kText, LanguageSegmenter::AccessType::kBidirectionalIterator)); + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, + segmenter->Segment(kText)); // String: "com:google:android is package" // ^ ^^ ^^ @@ -512,10 +508,8 @@ TEST_P(ReverseJniLanguageSegmenterTest, NewIteratorResetToStartUtf32) { auto segmenter, language_segmenter_factory::Create( GetSegmenterOptions(GetLocale(), jni_cache_.get()))); constexpr std::string_view kText = "How are you你好吗お元気ですか"; - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<LanguageSegmenter::Iterator> itr, - segmenter->Segment( - kText, LanguageSegmenter::AccessType::kBidirectionalIterator)); + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, + segmenter->Segment(kText)); // String: "How are you你好吗お元気ですか" // ^ ^^ ^^ ^ ^ ^ ^ ^ ^ @@ -530,10 +524,8 @@ TEST_P(ReverseJniLanguageSegmenterTest, IteratorOneAdvanceResetToStartUtf32) { auto segmenter, language_segmenter_factory::Create( GetSegmenterOptions(GetLocale(), jni_cache_.get()))); constexpr std::string_view kText = "How are you你好吗お元気ですか"; - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<LanguageSegmenter::Iterator> itr, - segmenter->Segment( - kText, LanguageSegmenter::AccessType::kBidirectionalIterator)); + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, + segmenter->Segment(kText)); // String: "How are you你好吗お元気ですか" // ^ ^^ ^^ ^ ^ ^ ^ ^ ^ @@ -550,10 +542,8 @@ TEST_P(ReverseJniLanguageSegmenterTest, auto segmenter, language_segmenter_factory::Create( GetSegmenterOptions(GetLocale(), jni_cache_.get()))); constexpr std::string_view kText = "How are you你好吗お元気ですか"; - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<LanguageSegmenter::Iterator> itr, - segmenter->Segment( - kText, LanguageSegmenter::AccessType::kBidirectionalIterator)); + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, + segmenter->Segment(kText)); // String: "How are you你好吗お元気ですか" // ^ ^^ ^^ ^ ^ ^ ^ ^ ^ @@ -572,10 +562,8 @@ TEST_P(ReverseJniLanguageSegmenterTest, IteratorDoneResetToStartUtf32) { auto segmenter, language_segmenter_factory::Create( GetSegmenterOptions(GetLocale(), jni_cache_.get()))); constexpr std::string_view kText = "How are you你好吗お元気ですか"; - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<LanguageSegmenter::Iterator> itr, - segmenter->Segment( - kText, LanguageSegmenter::AccessType::kBidirectionalIterator)); + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, + segmenter->Segment(kText)); // String: "How are you你好吗お元気ですか" // ^ ^^ ^^ ^ ^ ^ ^ ^ ^ @@ -593,10 +581,8 @@ TEST_P(ReverseJniLanguageSegmenterTest, ResetToTermAfterUtf32WordConnector) { auto segmenter, language_segmenter_factory::Create( GetSegmenterOptions(GetLocale(), jni_cache_.get()))); constexpr std::string_view kText = "package com:google:android name"; - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<LanguageSegmenter::Iterator> itr, - segmenter->Segment( - kText, LanguageSegmenter::AccessType::kBidirectionalIterator)); + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, + segmenter->Segment(kText)); // String: "package com:google:android name" // ^ ^^ ^^ @@ -618,10 +604,8 @@ TEST_P(ReverseJniLanguageSegmenterTest, ResetToTermAfterUtf32OutOfBounds) { auto segmenter, language_segmenter_factory::Create( GetSegmenterOptions(GetLocale(), jni_cache_.get()))); constexpr std::string_view kText = "How are you你好吗お元気ですか"; - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<LanguageSegmenter::Iterator> itr, - segmenter->Segment( - kText, LanguageSegmenter::AccessType::kBidirectionalIterator)); + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, + segmenter->Segment(kText)); // String: "How are you你好吗お元気ですか" // ^ ^^ ^^ ^ ^ ^ ^ ^ ^ @@ -651,15 +635,13 @@ TEST_P(ReverseJniLanguageSegmenterTest, constexpr std::string_view kText = "How are𡔖 you你好吗お元気ですか"; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> advance_itr, - segmenter->Segment(kText, - LanguageSegmenter::AccessType::kForwardIterator)); + segmenter->Segment(kText)); std::vector<std::string_view> advance_terms = GetAllTermsAdvance(advance_itr.get()); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr, - segmenter->Segment( - kText, LanguageSegmenter::AccessType::kBidirectionalIterator)); + segmenter->Segment(kText)); std::vector<std::string_view> reset_terms = GetAllTermsResetAfterUtf32(reset_to_term_itr.get()); @@ -675,15 +657,13 @@ TEST_P(ReverseJniLanguageSegmenterTest, constexpr std::string_view kThai = "ฉันเดินไปทำงานทุกวัน"; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> advance_itr, - segmenter->Segment(kThai, - LanguageSegmenter::AccessType::kForwardIterator)); + segmenter->Segment(kThai)); std::vector<std::string_view> advance_terms = GetAllTermsAdvance(advance_itr.get()); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr, - segmenter->Segment( - kThai, LanguageSegmenter::AccessType::kBidirectionalIterator)); + segmenter->Segment(kThai)); std::vector<std::string_view> reset_terms = GetAllTermsResetAfterUtf32(reset_to_term_itr.get()); @@ -699,15 +679,13 @@ TEST_P(ReverseJniLanguageSegmenterTest, constexpr std::string_view kKorean = "나는 매일 출근합니다."; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> advance_itr, - segmenter->Segment(kKorean, - LanguageSegmenter::AccessType::kForwardIterator)); + segmenter->Segment(kKorean)); std::vector<std::string_view> advance_terms = GetAllTermsAdvance(advance_itr.get()); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr, - segmenter->Segment( - kKorean, LanguageSegmenter::AccessType::kBidirectionalIterator)); + segmenter->Segment(kKorean)); std::vector<std::string_view> reset_terms = GetAllTermsResetAfterUtf32(reset_to_term_itr.get()); @@ -727,15 +705,13 @@ TEST_P(ReverseJniLanguageSegmenterTest, constexpr std::string_view kText = "How are𡔖 you你好吗お元気ですか"; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> advance_itr, - segmenter->Segment(kText, - LanguageSegmenter::AccessType::kForwardIterator)); + segmenter->Segment(kText)); std::vector<std::string_view> advance_terms = GetAllTermsAdvance(advance_itr.get()); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> advance_and_reset_itr, - segmenter->Segment( - kText, LanguageSegmenter::AccessType::kBidirectionalIterator)); + segmenter->Segment(kText)); std::vector<std::string_view> advance_and_reset_terms = GetAllTermsAdvanceAndResetAfterUtf32(advance_and_reset_itr.get()); @@ -752,15 +728,13 @@ TEST_P(ReverseJniLanguageSegmenterTest, constexpr std::string_view kThai = "ฉันเดินไปทำงานทุกวัน"; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> advance_itr, - segmenter->Segment(kThai, - LanguageSegmenter::AccessType::kForwardIterator)); + segmenter->Segment(kThai)); std::vector<std::string_view> advance_terms = GetAllTermsAdvance(advance_itr.get()); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> advance_and_reset_itr, - segmenter->Segment( - kThai, LanguageSegmenter::AccessType::kBidirectionalIterator)); + segmenter->Segment(kThai)); std::vector<std::string_view> advance_and_reset_terms = GetAllTermsAdvanceAndResetAfterUtf32(advance_and_reset_itr.get()); @@ -777,15 +751,13 @@ TEST_P(ReverseJniLanguageSegmenterTest, constexpr std::string_view kKorean = "나는 매일 출근합니다."; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> advance_itr, - segmenter->Segment(kKorean, - LanguageSegmenter::AccessType::kForwardIterator)); + segmenter->Segment(kKorean)); std::vector<std::string_view> advance_terms = GetAllTermsAdvance(advance_itr.get()); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> advance_and_reset_itr, - segmenter->Segment( - kKorean, LanguageSegmenter::AccessType::kBidirectionalIterator)); + segmenter->Segment(kKorean)); std::vector<std::string_view> advance_and_reset_terms = GetAllTermsAdvanceAndResetAfterUtf32(advance_and_reset_itr.get()); @@ -801,9 +773,7 @@ TEST_P(ReverseJniLanguageSegmenterTest, MixedLanguagesResetToTermAfterUtf32) { GetSegmenterOptions(GetLocale(), jni_cache_.get()))); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> itr, - language_segmenter->Segment( - "How are you你好吗お元気ですか", - LanguageSegmenter::AccessType::kBidirectionalIterator)); + language_segmenter->Segment("How are you你好吗お元気ですか")); // String: "How are you你好吗お元気ですか" // ^ ^^ ^^ ^ ^ ^ ^ ^ ^ @@ -840,11 +810,8 @@ TEST_P(ReverseJniLanguageSegmenterTest, GetSegmenterOptions(GetLocale(), jni_cache_.get()))); // Multiple continuous whitespaces are treated as one. constexpr std::string_view kTextWithSpace = "Hello World"; - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<LanguageSegmenter::Iterator> itr, - language_segmenter->Segment( - kTextWithSpace, - LanguageSegmenter::AccessType::kBidirectionalIterator)); + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, + language_segmenter->Segment(kTextWithSpace)); // String: "Hello World" // ^ ^ ^ @@ -883,10 +850,8 @@ TEST_P(ReverseJniLanguageSegmenterTest, ChineseResetToTermAfterUtf32) { // CJKT (Chinese, Japanese, Khmer, Thai) are the 4 main languages that // don't have whitespaces as word delimiter. Chinese constexpr std::string_view kChinese = "我每天走路去上班。"; - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<LanguageSegmenter::Iterator> itr, - language_segmenter->Segment( - kChinese, LanguageSegmenter::AccessType::kBidirectionalIterator)); + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, + language_segmenter->Segment(kChinese)); // String: "我每天走路去上班。" // ^ ^ ^ ^^ ^ // UTF-8 idx: 0 3 9 15 18 24 @@ -912,10 +877,8 @@ TEST_P(ReverseJniLanguageSegmenterTest, JapaneseResetToTermAfterUtf32) { GetSegmenterOptions(GetLocale(), jni_cache_.get()))); // Japanese constexpr std::string_view kJapanese = "私は毎日仕事に歩いています。"; - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<LanguageSegmenter::Iterator> itr, - language_segmenter->Segment( - kJapanese, LanguageSegmenter::AccessType::kBidirectionalIterator)); + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, + language_segmenter->Segment(kJapanese)); // String: "私は毎日仕事に歩いています。" // ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ // UTF-8 idx: 0 3 6 12 18212427 33 39 @@ -940,10 +903,8 @@ TEST_P(ReverseJniLanguageSegmenterTest, KhmerResetToTermAfterUtf32) { language_segmenter_factory::Create( GetSegmenterOptions(GetLocale(), jni_cache_.get()))); constexpr std::string_view kKhmer = "ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។"; - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<LanguageSegmenter::Iterator> itr, - language_segmenter->Segment( - kKhmer, LanguageSegmenter::AccessType::kBidirectionalIterator)); + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, + language_segmenter->Segment(kKhmer)); // String: "ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។" // ^ ^ ^ ^ ^ // UTF-8 idx: 0 9 24 45 69 @@ -969,10 +930,8 @@ TEST_P(ReverseJniLanguageSegmenterTest, ThaiResetToTermAfterUtf32) { GetSegmenterOptions(GetLocale(), jni_cache_.get()))); // Thai constexpr std::string_view kThai = "ฉันเดินไปทำงานทุกวัน"; - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<LanguageSegmenter::Iterator> itr, - language_segmenter->Segment( - kThai, LanguageSegmenter::AccessType::kBidirectionalIterator)); + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, + language_segmenter->Segment(kThai)); // String: "ฉันเดินไปทำงานทุกวัน" // ^ ^ ^ ^ ^ ^ // UTF-8 idx: 0 9 21 27 42 51 @@ -996,10 +955,8 @@ TEST_P(ReverseJniLanguageSegmenterTest, ResetToTermBeforeWordConnectorUtf32) { auto segmenter, language_segmenter_factory::Create( GetSegmenterOptions(GetLocale(), jni_cache_.get()))); constexpr std::string_view kText = "package name com:google:android!"; - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<LanguageSegmenter::Iterator> itr, - segmenter->Segment( - kText, LanguageSegmenter::AccessType::kBidirectionalIterator)); + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, + segmenter->Segment(kText)); // String: "package name com:google:android!" // ^ ^^ ^^ ^ @@ -1021,10 +978,8 @@ TEST_P(ReverseJniLanguageSegmenterTest, ResetToTermBeforeOutOfBoundsUtf32) { auto segmenter, language_segmenter_factory::Create( GetSegmenterOptions(GetLocale(), jni_cache_.get()))); constexpr std::string_view kText = "How are you你好吗お元気ですか"; - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<LanguageSegmenter::Iterator> itr, - segmenter->Segment( - kText, LanguageSegmenter::AccessType::kBidirectionalIterator)); + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, + segmenter->Segment(kText)); // String: "How are you你好吗お元気ですか" // ^ ^^ ^^ ^ ^ ^ ^ ^ ^ @@ -1054,15 +1009,13 @@ TEST_P(ReverseJniLanguageSegmenterTest, constexpr std::string_view kText = "How are𡔖 you你好吗お元気ですか"; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> advance_itr, - segmenter->Segment(kText, - LanguageSegmenter::AccessType::kForwardIterator)); + segmenter->Segment(kText)); std::vector<std::string_view> advance_terms = GetAllTermsAdvance(advance_itr.get()); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr, - segmenter->Segment( - kText, LanguageSegmenter::AccessType::kBidirectionalIterator)); + segmenter->Segment(kText)); std::vector<std::string_view> reset_terms = GetAllTermsResetBeforeUtf32(reset_to_term_itr.get()); std::reverse(reset_terms.begin(), reset_terms.end()); @@ -1080,15 +1033,13 @@ TEST_P(ReverseJniLanguageSegmenterTest, constexpr std::string_view kThai = "ฉันเดินไปทำงานทุกวัน"; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> advance_itr, - segmenter->Segment(kThai, - LanguageSegmenter::AccessType::kForwardIterator)); + segmenter->Segment(kThai)); std::vector<std::string_view> advance_terms = GetAllTermsAdvance(advance_itr.get()); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr, - segmenter->Segment( - kThai, LanguageSegmenter::AccessType::kBidirectionalIterator)); + segmenter->Segment(kThai)); std::vector<std::string_view> reset_terms = GetAllTermsResetBeforeUtf32(reset_to_term_itr.get()); std::reverse(reset_terms.begin(), reset_terms.end()); @@ -1105,15 +1056,13 @@ TEST_P(ReverseJniLanguageSegmenterTest, constexpr std::string_view kKorean = "나는 매일 출근합니다."; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> advance_itr, - segmenter->Segment(kKorean, - LanguageSegmenter::AccessType::kForwardIterator)); + segmenter->Segment(kKorean)); std::vector<std::string_view> advance_terms = GetAllTermsAdvance(advance_itr.get()); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> reset_to_term_itr, - segmenter->Segment( - kKorean, LanguageSegmenter::AccessType::kBidirectionalIterator)); + segmenter->Segment(kKorean)); std::vector<std::string_view> reset_terms = GetAllTermsResetBeforeUtf32(reset_to_term_itr.get()); std::reverse(reset_terms.begin(), reset_terms.end()); @@ -1129,9 +1078,7 @@ TEST_P(ReverseJniLanguageSegmenterTest, MixedLanguagesResetToTermBeforeUtf32) { GetSegmenterOptions(GetLocale(), jni_cache_.get()))); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<LanguageSegmenter::Iterator> itr, - language_segmenter->Segment( - "How are you你好吗お元気ですか", - LanguageSegmenter::AccessType::kBidirectionalIterator)); + language_segmenter->Segment("How are you你好吗お元気ですか")); // String: "How are you你好吗お元気ですか" // ^ ^^ ^^ ^ ^ ^ ^ ^ ^ @@ -1169,11 +1116,8 @@ TEST_P(ReverseJniLanguageSegmenterTest, GetSegmenterOptions(GetLocale(), jni_cache_.get()))); // Multiple continuous whitespaces are treated as one. constexpr std::string_view kTextWithSpace = "Hello World"; - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<LanguageSegmenter::Iterator> itr, - language_segmenter->Segment( - kTextWithSpace, - LanguageSegmenter::AccessType::kBidirectionalIterator)); + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, + language_segmenter->Segment(kTextWithSpace)); // String: "Hello World" // ^ ^ ^ @@ -1211,10 +1155,8 @@ TEST_P(ReverseJniLanguageSegmenterTest, ChineseResetToTermBeforeUtf32) { // CJKT (Chinese, Japanese, Khmer, Thai) are the 4 main languages that // don't have whitespaces as word delimiter. Chinese constexpr std::string_view kChinese = "我每天走路去上班。"; - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<LanguageSegmenter::Iterator> itr, - language_segmenter->Segment( - kChinese, LanguageSegmenter::AccessType::kBidirectionalIterator)); + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, + language_segmenter->Segment(kChinese)); // String: "我每天走路去上班。" // ^ ^ ^ ^^ // UTF-8 idx: 0 3 9 15 18 @@ -1237,10 +1179,8 @@ TEST_P(ReverseJniLanguageSegmenterTest, JapaneseResetToTermBeforeUtf32) { GetSegmenterOptions(GetLocale(), jni_cache_.get()))); // Japanese constexpr std::string_view kJapanese = "私は毎日仕事に歩いています。"; - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<LanguageSegmenter::Iterator> itr, - language_segmenter->Segment( - kJapanese, LanguageSegmenter::AccessType::kBidirectionalIterator)); + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, + language_segmenter->Segment(kJapanese)); // String: "私は毎日仕事に歩いています。" // ^ ^ ^ ^ ^ ^ ^ ^ ^ // UTF-8 idx: 0 3 6 12 18212427 33 @@ -1262,10 +1202,8 @@ TEST_P(ReverseJniLanguageSegmenterTest, KhmerResetToTermBeforeUtf32) { language_segmenter_factory::Create( GetSegmenterOptions(GetLocale(), jni_cache_.get()))); constexpr std::string_view kKhmer = "ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។"; - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<LanguageSegmenter::Iterator> itr, - language_segmenter->Segment( - kKhmer, LanguageSegmenter::AccessType::kBidirectionalIterator)); + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, + language_segmenter->Segment(kKhmer)); // String: "ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។" // ^ ^ ^ ^ // UTF-8 idx: 0 9 24 45 @@ -1288,10 +1226,8 @@ TEST_P(ReverseJniLanguageSegmenterTest, ThaiResetToTermBeforeUtf32) { GetSegmenterOptions(GetLocale(), jni_cache_.get()))); // Thai constexpr std::string_view kThai = "ฉันเดินไปทำงานทุกวัน"; - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<LanguageSegmenter::Iterator> itr, - language_segmenter->Segment( - kThai, LanguageSegmenter::AccessType::kBidirectionalIterator)); + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr, + language_segmenter->Segment(kThai)); // String: "ฉันเดินไปทำงานทุกวัน" // ^ ^ ^ ^ ^ ^ // UTF-8 idx: 0 9 21 27 42 51 diff --git a/icing/tokenization/rfc822-tokenizer.cc b/icing/tokenization/rfc822-tokenizer.cc index 35b82ca..13c58c5 100644 --- a/icing/tokenization/rfc822-tokenizer.cc +++ b/icing/tokenization/rfc822-tokenizer.cc @@ -778,15 +778,14 @@ class Rfc822TokenIterator : public Tokenizer::Iterator { }; libtextclassifier3::StatusOr<std::unique_ptr<Tokenizer::Iterator>> -Rfc822Tokenizer::Tokenize(std::string_view text, - LanguageSegmenter::AccessType) const { +Rfc822Tokenizer::Tokenize(std::string_view text) const { return std::make_unique<Rfc822TokenIterator>(text); } libtextclassifier3::StatusOr<std::vector<Token>> Rfc822Tokenizer::TokenizeAll( std::string_view text) const { - std::unique_ptr<Tokenizer::Iterator> iterator = - std::make_unique<Rfc822TokenIterator>(text); + ICING_ASSIGN_OR_RETURN(std::unique_ptr<Tokenizer::Iterator> iterator, + Tokenize(text)); std::vector<Token> tokens; while (iterator->Advance()) { std::vector<Token> batch_tokens = iterator->GetTokens(); diff --git a/icing/tokenization/rfc822-tokenizer.h b/icing/tokenization/rfc822-tokenizer.h index 094f1cf..09e4624 100644 --- a/icing/tokenization/rfc822-tokenizer.h +++ b/icing/tokenization/rfc822-tokenizer.h @@ -17,7 +17,6 @@ #include <vector> -#include "icing/tokenization/language-segmenter.h" #include "icing/tokenization/tokenizer.h" namespace icing { @@ -26,7 +25,7 @@ namespace lib { class Rfc822Tokenizer : public Tokenizer { public: libtextclassifier3::StatusOr<std::unique_ptr<Tokenizer::Iterator>> Tokenize( - std::string_view text, LanguageSegmenter::AccessType) const override; + std::string_view text) const override; libtextclassifier3::StatusOr<std::vector<Token>> TokenizeAll( std::string_view text) const override; diff --git a/icing/tokenization/rfc822-tokenizer_test.cc b/icing/tokenization/rfc822-tokenizer_test.cc index e1a7fc8..ee3a95d 100644 --- a/icing/tokenization/rfc822-tokenizer_test.cc +++ b/icing/tokenization/rfc822-tokenizer_test.cc @@ -21,7 +21,6 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" #include "icing/testing/common-matchers.h" -#include "icing/tokenization/language-segmenter.h" namespace icing { namespace lib { @@ -32,10 +31,7 @@ using ::testing::IsEmpty; TEST(Rfc822TokenizerTest, StartingState) { Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer(); std::string text = "a@g.c"; - auto token_iterator = - rfc822_tokenizer - .Tokenize(text, LanguageSegmenter::AccessType::kForwardIterator) - .ValueOrDie(); + auto token_iterator = rfc822_tokenizer.Tokenize(text).ValueOrDie(); ASSERT_THAT(token_iterator->GetTokens(), IsEmpty()); ASSERT_TRUE(token_iterator->Advance()); @@ -966,10 +962,7 @@ TEST(Rfc822TokenizerTest, Commas) { TEST(Rfc822TokenizerTest, ResetToTokenStartingAfter) { Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer(); std::string text = "a@g.c,b@g.c"; - auto token_iterator = - rfc822_tokenizer - .Tokenize(text, LanguageSegmenter::AccessType::kBidirectionalIterator) - .ValueOrDie(); + auto token_iterator = rfc822_tokenizer.Tokenize(text).ValueOrDie(); ASSERT_TRUE(token_iterator->Advance()); ASSERT_TRUE(token_iterator->Advance()); @@ -985,10 +978,7 @@ TEST(Rfc822TokenizerTest, ResetToTokenStartingAfter) { TEST(Rfc822TokenizerTest, ResetToTokenEndingBefore) { Rfc822Tokenizer rfc822_tokenizer = Rfc822Tokenizer(); std::string text = "a@g.c,b@g.c"; - auto token_iterator = - rfc822_tokenizer - .Tokenize(text, LanguageSegmenter::AccessType::kBidirectionalIterator) - .ValueOrDie(); + auto token_iterator = rfc822_tokenizer.Tokenize(text).ValueOrDie(); token_iterator->Advance(); ASSERT_TRUE(token_iterator->ResetToTokenEndingBefore(5)); diff --git a/icing/tokenization/tokenizer.h b/icing/tokenization/tokenizer.h index 3336266..fb7613f 100644 --- a/icing/tokenization/tokenizer.h +++ b/icing/tokenization/tokenizer.h @@ -22,7 +22,6 @@ #include "icing/text_classifier/lib3/utils/base/statusor.h" #include "icing/absl_ports/canonical_errors.h" -#include "icing/tokenization/language-segmenter.h" #include "icing/tokenization/token.h" #include "icing/util/character-iterator.h" @@ -34,10 +33,8 @@ namespace lib { // iterator or a list of tokens. Example usage: // // std::unique_ptr<Tokenizer> tokenizer = GetTokenizer(); -// ICING_ASSIGN_OR_RETURN( -// std::unique_ptr<Tokenizer::Iterator> iter, -// tokenizer->Tokenize(text, -// LanguageSegmenter::AccessType::kForwardIterator)); +// ICING_ASSIGN_OR_RETURN(std::unique_ptr<Tokenizer::Iterator> iter, +// tokenizer->Tokenize(text)); // ICING_ASSIGN_OR_RETURN(std::vector<Token> tokens, // tokenizer->TokenizeAll(text)); class Tokenizer { @@ -79,10 +76,7 @@ class Tokenizer { // offset. Returns false if there are no valid tokens starting after // offset. // Ex. - // auto iterator = - // tokenizer.Tokenize("foo bar baz", - // LanguageSegmenter::AccessType::kForwardIterator) - // .ValueOrDie(); + // auto iterator = tokenizer.Tokenize("foo bar baz").ValueOrDie(); // iterator.ResetToTokenStartingAfter(4); // // The first full token starting after position 4 (the 'b' in "bar") is // // "baz". @@ -95,10 +89,8 @@ class Tokenizer { // offset. Returns false if there are no valid tokens ending // before offset. // Ex. - // auto iterator = - // tokenizer.Tokenize("foo bar baz", - // LanguageSegmenter::AccessType::kForwardIterator) - // .ValueOrDie(); // iterator.ResetToTokenEndingBefore(4); + // auto iterator = tokenizer.Tokenize("foo bar baz").ValueOrDie(); + // iterator.ResetToTokenEndingBefore(4); // // The first full token ending before position 4 (the 'b' in "bar") is // // "foo". // PrintToken(iterator.GetToken()); // prints "foo" @@ -119,8 +111,7 @@ class Tokenizer { // types. // INTERNAL_ERROR if any other errors occur virtual libtextclassifier3::StatusOr<std::unique_ptr<Iterator>> Tokenize( - std::string_view text, - LanguageSegmenter::AccessType access_type) const = 0; + std::string_view text) const = 0; // Tokenizes and returns all tokens in the input text. The input text should // outlive the returned vector. diff --git a/icing/tokenization/verbatim-tokenizer.cc b/icing/tokenization/verbatim-tokenizer.cc index cf6d5e3..9ca611d 100644 --- a/icing/tokenization/verbatim-tokenizer.cc +++ b/icing/tokenization/verbatim-tokenizer.cc @@ -124,15 +124,14 @@ class VerbatimTokenIterator : public Tokenizer::Iterator { }; libtextclassifier3::StatusOr<std::unique_ptr<Tokenizer::Iterator>> -VerbatimTokenizer::Tokenize(std::string_view text, - LanguageSegmenter::AccessType) const { +VerbatimTokenizer::Tokenize(std::string_view text) const { return std::make_unique<VerbatimTokenIterator>(text); } libtextclassifier3::StatusOr<std::vector<Token>> VerbatimTokenizer::TokenizeAll( std::string_view text) const { - std::unique_ptr<Tokenizer::Iterator> iterator = - std::make_unique<VerbatimTokenIterator>(text); + ICING_ASSIGN_OR_RETURN(std::unique_ptr<Tokenizer::Iterator> iterator, + Tokenize(text)); std::vector<Token> tokens; while (iterator->Advance()) { std::vector<Token> batch = iterator->GetTokens(); diff --git a/icing/tokenization/verbatim-tokenizer.h b/icing/tokenization/verbatim-tokenizer.h index c3746af..8404cf1 100644 --- a/icing/tokenization/verbatim-tokenizer.h +++ b/icing/tokenization/verbatim-tokenizer.h @@ -20,7 +20,6 @@ #include <vector> #include "icing/text_classifier/lib3/utils/base/statusor.h" -#include "icing/tokenization/language-segmenter.h" #include "icing/tokenization/tokenizer.h" namespace icing { @@ -30,7 +29,7 @@ namespace lib { class VerbatimTokenizer : public Tokenizer { public: libtextclassifier3::StatusOr<std::unique_ptr<Tokenizer::Iterator>> Tokenize( - std::string_view text, LanguageSegmenter::AccessType) const override; + std::string_view text) const override; libtextclassifier3::StatusOr<std::vector<Token>> TokenizeAll( std::string_view text) const override; diff --git a/icing/tokenization/verbatim-tokenizer_test.cc b/icing/tokenization/verbatim-tokenizer_test.cc index 5aeb343..bae69ff 100644 --- a/icing/tokenization/verbatim-tokenizer_test.cc +++ b/icing/tokenization/verbatim-tokenizer_test.cc @@ -22,7 +22,6 @@ #include "icing/testing/jni-test-helpers.h" #include "icing/testing/test-data.h" #include "icing/tokenization/language-segmenter-factory.h" -#include "icing/tokenization/language-segmenter.h" #include "icing/tokenization/token.h" #include "icing/tokenization/tokenizer-factory.h" #include "icing/util/character-iterator.h" @@ -95,10 +94,7 @@ TEST_F(VerbatimTokenizerTest, NoTokensBeforeAdvancing) { language_segmenter_.get())); constexpr std::string_view kText = "Hello, world!"; - auto token_iterator = - verbatim_tokenizer - ->Tokenize(kText, LanguageSegmenter::AccessType::kForwardIterator) - .ValueOrDie(); + auto token_iterator = verbatim_tokenizer->Tokenize(kText).ValueOrDie(); // We should get no tokens if we get the token before advancing. EXPECT_THAT(token_iterator->GetTokens(), IsEmpty()); @@ -111,10 +107,7 @@ TEST_F(VerbatimTokenizerTest, ResetToTokenEndingBefore) { language_segmenter_.get())); constexpr std::string_view kText = "Hello, world!"; - auto token_iterator = - verbatim_tokenizer - ->Tokenize(kText, LanguageSegmenter::AccessType::kBidirectionalIterator) - .ValueOrDie(); + auto token_iterator = verbatim_tokenizer->Tokenize(kText).ValueOrDie(); // Reset to beginning of verbatim of token. We provide an offset of 13 as it // is larger than the final index (12) of the verbatim token. @@ -141,10 +134,7 @@ TEST_F(VerbatimTokenizerTest, ResetToTokenStartingAfter) { language_segmenter_.get())); constexpr std::string_view kText = "Hello, world!"; - auto token_iterator = - verbatim_tokenizer - ->Tokenize(kText, LanguageSegmenter::AccessType::kBidirectionalIterator) - .ValueOrDie(); + auto token_iterator = verbatim_tokenizer->Tokenize(kText).ValueOrDie(); // Get token without resetting EXPECT_TRUE(token_iterator->Advance()); @@ -169,10 +159,7 @@ TEST_F(VerbatimTokenizerTest, ResetToStart) { language_segmenter_.get())); constexpr std::string_view kText = "Hello, world!"; - auto token_iterator = - verbatim_tokenizer - ->Tokenize(kText, LanguageSegmenter::AccessType::kBidirectionalIterator) - .ValueOrDie(); + auto token_iterator = verbatim_tokenizer->Tokenize(kText).ValueOrDie(); // Get token without resetting EXPECT_TRUE(token_iterator->Advance()); @@ -192,10 +179,7 @@ TEST_F(VerbatimTokenizerTest, CalculateTokenStart) { language_segmenter_.get())); constexpr std::string_view kText = "Hello, world!"; - auto token_iterator = - verbatim_tokenizer - ->Tokenize(kText, LanguageSegmenter::AccessType::kForwardIterator) - .ValueOrDie(); + auto token_iterator = verbatim_tokenizer->Tokenize(kText).ValueOrDie(); ICING_ASSERT_OK_AND_ASSIGN(CharacterIterator start_character_iterator, token_iterator->CalculateTokenStart()); @@ -211,10 +195,7 @@ TEST_F(VerbatimTokenizerTest, CalculateTokenEnd) { language_segmenter_.get())); constexpr std::string_view kText = "Hello, world!"; - auto token_iterator = - verbatim_tokenizer - ->Tokenize(kText, LanguageSegmenter::AccessType::kForwardIterator) - .ValueOrDie(); + auto token_iterator = verbatim_tokenizer->Tokenize(kText).ValueOrDie(); ICING_ASSERT_OK_AND_ASSIGN(CharacterIterator end_character_iterator, token_iterator->CalculateTokenEndExclusive()); diff --git a/icing/util/document-validator.cc b/icing/util/document-validator.cc index ca15ee3..9d5fea7 100644 --- a/icing/util/document-validator.cc +++ b/icing/util/document-validator.cc @@ -151,15 +151,19 @@ libtextclassifier3::Status DocumentValidator::Validate( // fail, we don't need to validate the extra documents. if (property_config.data_type() == PropertyConfigProto::DataType::DOCUMENT) { - const std::string_view nested_type_expected = - property_config.schema_type(); + ICING_ASSIGN_OR_RETURN( + const std::unordered_set<SchemaTypeId>* nested_type_ids_expected, + schema_store_->GetSchemaTypeIdsWithChildren( + property_config.schema_type())); for (const DocumentProto& nested_document : property.document_values()) { - if (nested_type_expected.compare(nested_document.schema()) != 0) { + libtextclassifier3::StatusOr<SchemaTypeId> nested_document_type_id_or = + schema_store_->GetSchemaTypeId(nested_document.schema()); + if (!nested_document_type_id_or.ok() || + nested_type_ids_expected->count( + nested_document_type_id_or.ValueOrDie()) == 0) { return absl_ports::InvalidArgumentError(absl_ports::StrCat( - "Property '", property.name(), "' should have type '", - nested_type_expected, - "' but actual " - "value has type '", + "Property '", property.name(), "' should be type or subtype of '", + property_config.schema_type(), "' but actual value has type '", nested_document.schema(), "' for key: (", document.namespace_(), ", ", document.uri(), ").")); } diff --git a/icing/util/document-validator_test.cc b/icing/util/document-validator_test.cc index 310494a..9d10b36 100644 --- a/icing/util/document-validator_test.cc +++ b/icing/util/document-validator_test.cc @@ -35,13 +35,16 @@ namespace { using ::testing::HasSubstr; -// type and property names of EmailMessage +// type and property names of EmailMessage and EmailMessageWithNote constexpr char kTypeEmail[] = "EmailMessage"; +constexpr char kTypeEmailWithNote[] = "EmailMessageWithNote"; constexpr char kPropertySubject[] = "subject"; constexpr char kPropertyText[] = "text"; constexpr char kPropertyRecipients[] = "recipients"; +constexpr char kPropertyNote[] = "note"; // type and property names of Conversation constexpr char kTypeConversation[] = "Conversation"; +constexpr char kTypeConversationWithEmailNote[] = "ConversationWithEmailNote"; constexpr char kPropertyName[] = "name"; constexpr char kPropertyEmails[] = "emails"; // Other values @@ -72,6 +75,26 @@ class DocumentValidatorTest : public ::testing::Test { .SetCardinality(CARDINALITY_REPEATED))) .AddType( SchemaTypeConfigBuilder() + .SetType(kTypeEmailWithNote) + .AddParentType(kTypeEmail) + .AddProperty(PropertyConfigBuilder() + .SetName(kPropertySubject) + .SetDataType(TYPE_STRING) + .SetCardinality(CARDINALITY_REQUIRED)) + .AddProperty(PropertyConfigBuilder() + .SetName(kPropertyText) + .SetDataType(TYPE_STRING) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName(kPropertyRecipients) + .SetDataType(TYPE_STRING) + .SetCardinality(CARDINALITY_REPEATED)) + .AddProperty(PropertyConfigBuilder() + .SetName(kPropertyNote) + .SetDataType(TYPE_STRING) + .SetCardinality(CARDINALITY_OPTIONAL))) + .AddType( + SchemaTypeConfigBuilder() .SetType(kTypeConversation) .AddProperty(PropertyConfigBuilder() .SetName(kPropertyName) @@ -83,6 +106,19 @@ class DocumentValidatorTest : public ::testing::Test { .SetDataTypeDocument( kTypeEmail, /*index_nested_properties=*/true) .SetCardinality(CARDINALITY_REPEATED))) + .AddType( + SchemaTypeConfigBuilder() + .SetType(kTypeConversationWithEmailNote) + .AddProperty(PropertyConfigBuilder() + .SetName(kPropertyName) + .SetDataType(TYPE_STRING) + .SetCardinality(CARDINALITY_REQUIRED)) + .AddProperty(PropertyConfigBuilder() + .SetName(kPropertyEmails) + .SetDataTypeDocument( + kTypeEmailWithNote, + /*index_nested_properties=*/true) + .SetCardinality(CARDINALITY_REPEATED))) .Build(); schema_dir_ = GetTestTempDir() + "/schema_store"; @@ -90,13 +126,16 @@ class DocumentValidatorTest : public ::testing::Test { ICING_ASSERT_OK_AND_ASSIGN( schema_store_, SchemaStore::Create(&filesystem_, schema_dir_, &fake_clock_)); - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); document_validator_ = std::make_unique<DocumentValidator>(schema_store_.get()); } - static DocumentBuilder SimpleEmailBuilder() { + DocumentBuilder SimpleEmailBuilder() { return DocumentBuilder() .SetKey(kDefaultNamespace, "email/1") .SetSchema(kTypeEmail) @@ -106,7 +145,18 @@ class DocumentValidatorTest : public ::testing::Test { kDefaultString); } - static DocumentBuilder SimpleConversationBuilder() { + DocumentBuilder SimpleEmailWithNoteBuilder() { + return DocumentBuilder() + .SetKey(kDefaultNamespace, "email_with_note/1") + .SetSchema(kTypeEmailWithNote) + .AddStringProperty(kPropertySubject, kDefaultString) + .AddStringProperty(kPropertyText, kDefaultString) + .AddStringProperty(kPropertyRecipients, kDefaultString, kDefaultString, + kDefaultString) + .AddStringProperty(kPropertyNote, kDefaultString); + } + + DocumentBuilder SimpleConversationBuilder() { return DocumentBuilder() .SetKey(kDefaultNamespace, "conversation/1") .SetSchema(kTypeConversation) @@ -299,10 +349,82 @@ TEST_F(DocumentValidatorTest, SimpleEmailBuilder().Build()) .Build(); - EXPECT_THAT(document_validator_->Validate(conversation), - StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT, - HasSubstr("'emails' should have type 'EmailMessage' but " - "actual value has type 'Conversation'"))); + EXPECT_THAT( + document_validator_->Validate(conversation), + StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT, + HasSubstr("'emails' should be type or subtype of 'EmailMessage' " + "but actual value has type 'Conversation'"))); +} + +TEST_F(DocumentValidatorTest, ValidateNestedPropertyMatchSubtypeOk) { + DocumentProto conversation = + DocumentBuilder() + .SetKey(kDefaultNamespace, "conversation/1") + .SetSchema(kTypeConversation) + .AddStringProperty(kPropertyName, kDefaultString) + .AddDocumentProperty(kPropertyEmails, SimpleEmailBuilder().Build(), + // This is a subtype, which is ok. + SimpleEmailWithNoteBuilder().Build(), + SimpleEmailBuilder().Build()) + .Build(); + + EXPECT_THAT(document_validator_->Validate(conversation), IsOk()); +} + +TEST_F(DocumentValidatorTest, ValidateNestedPropertyNonexistentTypeInvalid) { + DocumentProto conversation = + DocumentBuilder() + .SetKey(kDefaultNamespace, "conversation/1") + .SetSchema(kTypeConversation) + .AddStringProperty(kPropertyName, kDefaultString) + .AddDocumentProperty( + kPropertyEmails, SimpleEmailBuilder().Build(), + // Nonexistent type is not allowed + DocumentBuilder() + .SetKey(kDefaultNamespace, "email_with_note/1") + .SetSchema("Nonexistent") + .Build(), + SimpleEmailBuilder().Build()) + .Build(); + + EXPECT_THAT( + document_validator_->Validate(conversation), + StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT, + HasSubstr("'emails' should be type or subtype of 'EmailMessage' " + "but actual value has type 'Nonexistent'"))); +} + +TEST_F(DocumentValidatorTest, ValidateNestedPropertyMatchSuperTypeInvalid) { + DocumentProto conversation1 = + DocumentBuilder() + .SetKey(kDefaultNamespace, "conversation_with_email_note/1") + .SetSchema(kTypeConversationWithEmailNote) + .AddStringProperty(kPropertyName, kDefaultString) + .AddDocumentProperty(kPropertyEmails, + SimpleEmailWithNoteBuilder().Build(), + SimpleEmailWithNoteBuilder().Build(), + SimpleEmailWithNoteBuilder().Build()) + .Build(); + EXPECT_THAT(document_validator_->Validate(conversation1), IsOk()); + + DocumentProto conversation2 = + DocumentBuilder() + .SetKey(kDefaultNamespace, "conversation_with_email_note/2") + .SetSchema(kTypeConversationWithEmailNote) + .AddStringProperty(kPropertyName, kDefaultString) + .AddDocumentProperty(kPropertyEmails, + SimpleEmailWithNoteBuilder().Build(), + // This is a super type, which is not ok. + SimpleEmailBuilder().Build(), + SimpleEmailWithNoteBuilder().Build()) + .Build(); + EXPECT_THAT( + document_validator_->Validate(conversation2), + StatusIs( + libtextclassifier3::StatusCode::INVALID_ARGUMENT, + HasSubstr( + "'emails' should be type or subtype of 'EmailMessageWithNote' " + "but actual value has type 'EmailMessage'"))); } TEST_F(DocumentValidatorTest, ValidateNestedPropertyInvalid) { @@ -351,7 +473,10 @@ TEST_F(DocumentValidatorTest, HandleTypeConfigMapChangesOk) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<SchemaStore> schema_store, SchemaStore::Create(&filesystem_, custom_schema_dir, &fake_clock_)); - ASSERT_THAT(schema_store->SetSchema(email_schema), IsOk()); + ASSERT_THAT(schema_store->SetSchema( + email_schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); DocumentValidator document_validator(schema_store.get()); @@ -381,7 +506,11 @@ TEST_F(DocumentValidatorTest, HandleTypeConfigMapChangesOk) { // DocumentValidator should be able to handle the SchemaStore getting updated // separately - ASSERT_THAT(schema_store->SetSchema(email_and_conversation_schema), IsOk()); + ASSERT_THAT( + schema_store->SetSchema(email_and_conversation_schema, + /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); ICING_EXPECT_OK(document_validator.Validate(conversation)); } diff --git a/icing/util/tokenized-document.cc b/icing/util/tokenized-document.cc index 004181e..19aaddf 100644 --- a/icing/util/tokenized-document.cc +++ b/icing/util/tokenized-document.cc @@ -45,10 +45,8 @@ libtextclassifier3::StatusOr<std::vector<TokenizedSection>> Tokenize( section.metadata.tokenizer, language_segmenter)); std::vector<std::string_view> token_sequence; for (std::string_view subcontent : section.content) { - ICING_ASSIGN_OR_RETURN( - std::unique_ptr<Tokenizer::Iterator> itr, - tokenizer->Tokenize(subcontent, - LanguageSegmenter::AccessType::kForwardIterator)); + ICING_ASSIGN_OR_RETURN(std::unique_ptr<Tokenizer::Iterator> itr, + tokenizer->Tokenize(subcontent)); while (itr->Advance()) { std::vector<Token> batch_tokens = itr->GetTokens(); for (const Token& token : batch_tokens) { diff --git a/icing/util/tokenized-document_test.cc b/icing/util/tokenized-document_test.cc index c0b20bb..7c97776 100644 --- a/icing/util/tokenized-document_test.cc +++ b/icing/util/tokenized-document_test.cc @@ -44,9 +44,9 @@ namespace lib { namespace { +using ::icing::lib::portable_equals_proto::EqualsProto; using ::testing::ElementsAre; using ::testing::Eq; -using ::icing::lib::portable_equals_proto::EqualsProto; using ::testing::IsEmpty; using ::testing::SizeIs; @@ -168,7 +168,9 @@ class TokenizedDocumentTest : public ::testing::Test { JOINABLE_VALUE_TYPE_QUALIFIED_ID) .SetCardinality(CARDINALITY_OPTIONAL))) .Build(); - ICING_ASSERT_OK(schema_store_->SetSchema(schema)); + ICING_ASSERT_OK(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false)); } void TearDown() override { diff --git a/proto/icing/proto/initialize.proto b/proto/icing/proto/initialize.proto index db5dbed..18884c6 100644 --- a/proto/icing/proto/initialize.proto +++ b/proto/icing/proto/initialize.proto @@ -23,7 +23,7 @@ option java_package = "com.google.android.icing.proto"; option java_multiple_files = true; option objc_class_prefix = "ICNG"; -// Next tag: 8 +// Next tag: 9 message IcingSearchEngineOptions { // Directory to persist files for Icing. Required. // If Icing was previously initialized with this directory, it will reload @@ -82,6 +82,20 @@ message IcingSearchEngineOptions { // Optional. optional int32 compression_level = 7 [default = 3]; + // OPTIONAL: Whether to allow circular references between schema types for + // the schema definition. + // + // Even when set to true, circular references are still not allowed in the + // following cases: + // 1. All edges of a cycle have index_nested_properties=true + // 2. One of the types in the cycle has a joinable property, or depends on + // a type with a joinable property. + // This is because such a cycle would lead to an infinite number of + // indexed/joinable properties: + // + // The default value is false. + optional bool allow_circular_schema_definitions = 8; + reserved 2; } diff --git a/proto/icing/proto/logging.proto b/proto/icing/proto/logging.proto index 04f655d..ca795cd 100644 --- a/proto/icing/proto/logging.proto +++ b/proto/icing/proto/logging.proto @@ -49,6 +49,9 @@ message InitializeStatsProto { // The document log is using legacy format. LEGACY_DOCUMENT_LOG_FORMAT = 5; + + // The current code version is different from existing data version. + VERSION_CHANGED = 6; } // Possible recovery causes for document store: @@ -114,7 +117,7 @@ message InitializeStatsProto { } // Stats of the top-level function IcingSearchEngine::Put(). -// Next tag: 7 +// Next tag: 10 message PutDocumentStatsProto { // Overall time used for the function call. optional int32 latency_ms = 1; @@ -122,8 +125,7 @@ message PutDocumentStatsProto { // Time used to store the document. optional int32 document_store_latency_ms = 2; - // Time used to index the document. It does not include the time to merge - // indices. + // Time used to index the document. optional int32 index_latency_ms = 3; // Time used to merge the indices. @@ -139,6 +141,16 @@ message PutDocumentStatsProto { reserved 2; } optional TokenizationStats tokenization_stats = 6; + + // Time used to index all indexable string terms in the document. It does not + // include the time to merge indices. + optional int32 term_index_latency_ms = 7; + + // Time used to index all indexable integers in the document. + optional int32 integer_index_latency_ms = 8; + + // Time used to index all qualified id join strings in the document. + optional int32 qualified_id_join_index_latency_ms = 9; } // Stats of the top-level function IcingSearchEngine::Search() and diff --git a/proto/icing/proto/schema.proto b/proto/icing/proto/schema.proto index dc625fc..b972ece 100644 --- a/proto/icing/proto/schema.proto +++ b/proto/icing/proto/schema.proto @@ -60,11 +60,11 @@ message SchemaTypeConfigProto { // it will default to value == 0. optional int32 version = 5; - // An experimental field to make the type as a subtype of parent_type, which - // enables parent_type to be interpreted as its subtypes in the context of the - // Search APIs, including schema type filters and projections specified in + // An experimental field to make the type as a subtype of parent_types, which + // enables parent_types to be interpreted as its subtypes in the context of + // the Search APIs, including schema type filters and projections specified in // TypePropertyMask. - optional string parent_type = 6; + repeated string parent_types = 6; reserved 2, 3; } diff --git a/proto/icing/proto/search.proto b/proto/icing/proto/search.proto index 8bdbf0c..e5ad269 100644 --- a/proto/icing/proto/search.proto +++ b/proto/icing/proto/search.proto @@ -98,7 +98,7 @@ message SearchSpecProto { // Client-supplied specifications on what to include/how to format the search // results. -// Next tag: 8 +// Next tag: 9 message ResultSpecProto { // The results will be returned in pages, and num_per_page specifies the // number of documents in one page. @@ -199,6 +199,10 @@ message ResultSpecProto { NAMESPACE_AND_SCHEMA_TYPE = 3; } optional ResultGroupingType result_group_type = 7; + + // The max # of child documents will be attached and returned in the result + // for each parent. It is only used for join API. + optional int32 max_joined_children_per_parent_to_return = 8; } // The representation of a single match within a DocumentProto property. @@ -499,7 +503,10 @@ message JoinSpecProto { optional string child_property_expression = 3; // The max number of child documents to join to a parent document. - optional int32 max_joined_child_count = 4; + // DEPRECATED: use ResultSpecProto.max_joined_children_per_parent_to_return to + // control the number of children that are returned. There is no supported + // control for the number of children being scored at this time. + optional int32 max_joined_child_count = 4 [deprecated = true]; // The strategy by which to score the aggregation of child documents. For // example, you might want to know which entity document has the most actions diff --git a/synced_AOSP_CL_number.txt b/synced_AOSP_CL_number.txt index 0d6bfb4..ae59ff7 100644 --- a/synced_AOSP_CL_number.txt +++ b/synced_AOSP_CL_number.txt @@ -1 +1 @@ -set(synced_AOSP_CL_number=524885330) +set(synced_AOSP_CL_number=531296607) |