diff options
author | Grace Zhao <gracezrx@google.com> | 2024-01-05 15:33:48 -0800 |
---|---|---|
committer | Grace Zhao <gracezrx@google.com> | 2024-01-05 23:44:48 +0000 |
commit | 030b99d2648aea03e5c2e3809a13fdfacdccde11 (patch) | |
tree | 0db961d8ad94bbf346f5b5a701d6fa3fb86fe7e1 | |
parent | a3e306de05f977e27ae8e9ce840b54fa5b838366 (diff) | |
download | icing-030b99d2648aea03e5c2e3809a13fdfacdccde11.tar.gz |
Update Icing from upstream.
Descriptions:
========================================================================
[Trunk Stable Flags and Files Rebuild #1] Implement v2 version file in version-util
========================================================================
[Trunk Stable Flags and Files Rebuild #2] Integrate v2 version file with IcingSearchEngine
========================================================================
Remove dead code in index initialization for HasPropertyOperator after introducing v2 version file
========================================================================
[Icing version 4] Bump Icing kVersion to 4 for Android V
========================================================================
[Icing][Expand QueryStats][4/x] Add lite index hit buffer info into QueryStats
========================================================================
[Icing][Expand QueryStats][5/x] Add query processor advanced query
components latencies into QueryStats::SearchStats
========================================================================
Add instructions to error message for AppSearch advanced query features backward compatibility
========================================================================
NO_IFTTT="Path is only valid in G3."
Bug: 314816301
Bug: 309826655
Bug: 305098009
Change-Id: I6eddda6dee8362aed5c56844b69dd159a715aaec
33 files changed, 1631 insertions, 357 deletions
diff --git a/icing/file/version-util.cc b/icing/file/version-util.cc index dd233e0..e750b0c 100644 --- a/icing/file/version-util.cc +++ b/icing/file/version-util.cc @@ -15,36 +15,47 @@ #include "icing/file/version-util.h" #include <cstdint> +#include <memory> #include <string> +#include <string_view> +#include <unordered_set> #include <utility> #include "icing/text_classifier/lib3/utils/base/status.h" #include "icing/text_classifier/lib3/utils/base/statusor.h" #include "icing/absl_ports/canonical_errors.h" +#include "icing/absl_ports/str_cat.h" +#include "icing/file/file-backed-proto.h" #include "icing/file/filesystem.h" #include "icing/index/index.h" +#include "icing/proto/initialize.pb.h" +#include "icing/util/status-macros.h" namespace icing { namespace lib { namespace version_util { -libtextclassifier3::StatusOr<VersionInfo> ReadVersion( - const Filesystem& filesystem, const std::string& version_file_path, +namespace { + +libtextclassifier3::StatusOr<VersionInfo> ReadV1VersionInfo( + const Filesystem& filesystem, const std::string& version_file_dir, const std::string& index_base_dir) { // 1. Read the version info. + const std::string v1_version_filepath = + MakeVersionFilePath(version_file_dir, kVersionFilenameV1); VersionInfo existing_version_info(-1, -1); - if (filesystem.FileExists(version_file_path.c_str()) && - !filesystem.PRead(version_file_path.c_str(), &existing_version_info, + if (filesystem.FileExists(v1_version_filepath.c_str()) && + !filesystem.PRead(v1_version_filepath.c_str(), &existing_version_info, sizeof(VersionInfo), /*offset=*/0)) { - return absl_ports::InternalError("Fail to read version"); + return absl_ports::InternalError("Failed to read v1 version file"); } // 2. Check the Index magic to see if we're actually on version 0. - libtextclassifier3::StatusOr<int> existing_flash_index_magic_or = + libtextclassifier3::StatusOr<int> existing_flash_index_magic = Index::ReadFlashIndexMagic(&filesystem, index_base_dir); - if (!existing_flash_index_magic_or.ok()) { - if (absl_ports::IsNotFound(existing_flash_index_magic_or.status())) { + if (!existing_flash_index_magic.ok()) { + if (absl_ports::IsNotFound(existing_flash_index_magic.status())) { // Flash index magic doesn't exist. In this case, we're unable to // determine the version change state correctly (regardless of the // existence of the version file), so invalidate VersionInfo by setting @@ -53,9 +64,9 @@ libtextclassifier3::StatusOr<VersionInfo> ReadVersion( return existing_version_info; } // Real error. - return std::move(existing_flash_index_magic_or).status(); + return std::move(existing_flash_index_magic).status(); } - if (existing_flash_index_magic_or.ValueOrDie() == + if (existing_flash_index_magic.ValueOrDie() == kVersionZeroFlashIndexMagic) { existing_version_info.version = 0; if (existing_version_info.max_version == -1) { @@ -66,15 +77,124 @@ libtextclassifier3::StatusOr<VersionInfo> ReadVersion( return existing_version_info; } -libtextclassifier3::Status WriteVersion(const Filesystem& filesystem, - const std::string& version_file_path, - const VersionInfo& version_info) { - ScopedFd scoped_fd(filesystem.OpenForWrite(version_file_path.c_str())); +libtextclassifier3::StatusOr<IcingSearchEngineVersionProto> ReadV2VersionInfo( + const Filesystem& filesystem, const std::string& version_file_dir) { + // Read the v2 version file. V2 version file stores the + // IcingSearchEngineVersionProto as a file-backed proto. + const std::string v2_version_filepath = + MakeVersionFilePath(version_file_dir, kVersionFilenameV2); + FileBackedProto<IcingSearchEngineVersionProto> v2_version_file( + filesystem, v2_version_filepath); + ICING_ASSIGN_OR_RETURN(const IcingSearchEngineVersionProto* v2_version_proto, + v2_version_file.Read()); + + return *v2_version_proto; +} + +} // namespace + +libtextclassifier3::StatusOr<IcingSearchEngineVersionProto> ReadVersion( + const Filesystem& filesystem, const std::string& version_file_dir, + const std::string& index_base_dir) { + // 1. Read the v1 version file + ICING_ASSIGN_OR_RETURN( + VersionInfo v1_version_info, + ReadV1VersionInfo(filesystem, version_file_dir, index_base_dir)); + if (!v1_version_info.IsValid()) { + // This happens if IcingLib's state is invalid (e.g. flash index header file + // is missing). Return the invalid version numbers in this case. + IcingSearchEngineVersionProto version_proto; + version_proto.set_version(v1_version_info.version); + version_proto.set_max_version(v1_version_info.max_version); + return version_proto; + } + + // 2. Read the v2 version file + auto v2_version_proto = ReadV2VersionInfo(filesystem, version_file_dir); + if (!v2_version_proto.ok()) { + if (!absl_ports::IsNotFound(v2_version_proto.status())) { + // Real error. + return std::move(v2_version_proto).status(); + } + // The v2 version file has not been written + IcingSearchEngineVersionProto version_proto; + if (v1_version_info.version < kFirstV2Version) { + // There are two scenarios for this case: + // 1. It's the first time that we're upgrading from a lower version to a + // version >= kFirstV2Version. + // - It's expected that the v2 version file has not been written yet in + // this case and we return the v1 version numbers instead. + // 2. We're rolling forward from a version < kFirstV2Version, after + // rolling back from a previous version >= kFirstV2Version, and for + // some unknown reason we lost the v2 version file in the previous + // version. + // - e.g. version #4 -> version #1 -> version #4, but we lost the v2 + // file during version #1. + // - This is a rollforward case, but it's still fine to return the v1 + // version number here as ShouldRebuildDerivedFiles can handle + // rollforwards correctly. + version_proto.set_version(v1_version_info.version); + version_proto.set_max_version(v1_version_info.max_version); + } else { + // Something weird has happened. During last initialization we were + // already on a version >= kFirstV2Version, so the v2 version file + // should have been written. + // Return an invalid version number in this case and trigger rebuilding + // everything. + version_proto.set_version(-1); + version_proto.set_max_version(v1_version_info.max_version); + } + return version_proto; + } + + // 3. Check if versions match. If not, it means that we're rolling forward + // from a version < kFirstV2Version. In order to trigger rebuilding + // everything, we return an invalid version number in this case. + IcingSearchEngineVersionProto v2_version_proto_value = + std::move(v2_version_proto).ValueOrDie(); + if (v1_version_info.version != v2_version_proto_value.version()) { + v2_version_proto_value.set_version(-1); + v2_version_proto_value.mutable_enabled_features()->Clear(); + } + + return v2_version_proto_value; +} + +libtextclassifier3::Status WriteV1Version(const Filesystem& filesystem, + const std::string& version_file_dir, + const VersionInfo& version_info) { + ScopedFd scoped_fd(filesystem.OpenForWrite( + MakeVersionFilePath(version_file_dir, kVersionFilenameV1).c_str())); if (!scoped_fd.is_valid() || !filesystem.PWrite(scoped_fd.get(), /*offset=*/0, &version_info, sizeof(VersionInfo)) || !filesystem.DataSync(scoped_fd.get())) { - return absl_ports::InternalError("Fail to write version"); + return absl_ports::InternalError("Failed to write v1 version file"); + } + return libtextclassifier3::Status::OK; +} + +libtextclassifier3::Status WriteV2Version( + const Filesystem& filesystem, const std::string& version_file_dir, + std::unique_ptr<IcingSearchEngineVersionProto> version_proto) { + FileBackedProto<IcingSearchEngineVersionProto> v2_version_file( + filesystem, MakeVersionFilePath(version_file_dir, kVersionFilenameV2)); + libtextclassifier3::Status v2_write_status = + v2_version_file.Write(std::move(version_proto)); + if (!v2_write_status.ok()) { + return absl_ports::InternalError(absl_ports::StrCat( + "Failed to write v2 version file: ", v2_write_status.error_message())); + } + return libtextclassifier3::Status::OK; +} + +libtextclassifier3::Status DiscardVersionFiles( + const Filesystem& filesystem, std::string_view version_file_dir) { + if (!filesystem.DeleteFile( + MakeVersionFilePath(version_file_dir, kVersionFilenameV1).c_str()) || + !filesystem.DeleteFile( + MakeVersionFilePath(version_file_dir, kVersionFilenameV2).c_str())) { + return absl_ports::InternalError("Failed to discard version files"); } return libtextclassifier3::Status::OK; } @@ -102,6 +222,65 @@ StateChange GetVersionStateChange(const VersionInfo& existing_version_info, } } +DerivedFilesRebuildResult CalculateRequiredDerivedFilesRebuild( + const IcingSearchEngineVersionProto& prev_version_proto, + const IcingSearchEngineVersionProto& curr_version_proto) { + // 1. Do version check using version and max_version numbers + if (ShouldRebuildDerivedFiles(GetVersionInfoFromProto(prev_version_proto), + curr_version_proto.version())) { + return DerivedFilesRebuildResult( + /*needs_document_store_derived_files_rebuild=*/true, + /*needs_schema_store_derived_files_rebuild=*/true, + /*needs_term_index_rebuild=*/true, + /*needs_integer_index_rebuild=*/true, + /*needs_qualified_id_join_index_rebuild=*/true); + } + + // 2. Compare the previous enabled features with the current enabled features + // and rebuild if there are differences. + std::unordered_set<IcingSearchEngineFeatureInfoProto::FlaggedFeatureType> + prev_features; + for (const auto& feature : prev_version_proto.enabled_features()) { + prev_features.insert(feature.feature_type()); + } + std::unordered_set<IcingSearchEngineFeatureInfoProto::FlaggedFeatureType> + curr_features; + for (const auto& feature : curr_version_proto.enabled_features()) { + curr_features.insert(feature.feature_type()); + } + DerivedFilesRebuildResult result; + for (const auto& prev_feature : prev_features) { + // If there is an UNKNOWN feature in the previous feature set (note that we + // never use UNKNOWN when writing the version proto), it means that: + // - The previous version proto contains a feature enum that is only defined + // in a newer version. + // - We've now rolled back to an old version that doesn't understand this + // new enum value, and proto serialization defaults it to 0 (UNKNOWN). + // - In this case we need to rebuild everything. + if (prev_feature == IcingSearchEngineFeatureInfoProto::UNKNOWN) { + return DerivedFilesRebuildResult( + /*needs_document_store_derived_files_rebuild=*/true, + /*needs_schema_store_derived_files_rebuild=*/true, + /*needs_term_index_rebuild=*/true, + /*needs_integer_index_rebuild=*/true, + /*needs_qualified_id_join_index_rebuild=*/true); + } + if (curr_features.find(prev_feature) == curr_features.end()) { + DerivedFilesRebuildResult required_rebuilds = + GetFeatureDerivedFilesRebuildResult(prev_feature); + result.CombineWithOtherRebuildResultOr(required_rebuilds); + } + } + for (const auto& curr_feature : curr_features) { + if (prev_features.find(curr_feature) == prev_features.end()) { + DerivedFilesRebuildResult required_rebuilds = + GetFeatureDerivedFilesRebuildResult(curr_feature); + result.CombineWithOtherRebuildResultOr(required_rebuilds); + } + } + return result; +} + bool ShouldRebuildDerivedFiles(const VersionInfo& existing_version_info, int32_t curr_version) { StateChange state_change = @@ -135,6 +314,10 @@ bool ShouldRebuildDerivedFiles(const VersionInfo& existing_version_info, // version 2 -> version 3 upgrade, no need to rebuild break; } + case 3: { + // version 3 -> version 4 upgrade, no need to rebuild + break; + } default: // This should not happen. Rebuild anyway if unsure. should_rebuild |= true; @@ -144,6 +327,56 @@ bool ShouldRebuildDerivedFiles(const VersionInfo& existing_version_info, return should_rebuild; } +DerivedFilesRebuildResult GetFeatureDerivedFilesRebuildResult( + IcingSearchEngineFeatureInfoProto::FlaggedFeatureType feature) { + switch (feature) { + case IcingSearchEngineFeatureInfoProto::FEATURE_HAS_PROPERTY_OPERATOR: { + return DerivedFilesRebuildResult( + /*needs_document_store_derived_files_rebuild=*/false, + /*needs_schema_store_derived_files_rebuild=*/false, + /*needs_term_index_rebuild=*/true, + /*needs_integer_index_rebuild=*/false, + /*needs_qualified_id_join_index_rebuild=*/false); + } + case IcingSearchEngineFeatureInfoProto::UNKNOWN: + return DerivedFilesRebuildResult( + /*needs_document_store_derived_files_rebuild=*/true, + /*needs_schema_store_derived_files_rebuild=*/true, + /*needs_term_index_rebuild=*/true, + /*needs_integer_index_rebuild=*/true, + /*needs_qualified_id_join_index_rebuild=*/true); + } +} + +IcingSearchEngineFeatureInfoProto GetFeatureInfoProto( + IcingSearchEngineFeatureInfoProto::FlaggedFeatureType feature) { + IcingSearchEngineFeatureInfoProto info; + info.set_feature_type(feature); + + DerivedFilesRebuildResult result = + GetFeatureDerivedFilesRebuildResult(feature); + info.set_needs_document_store_rebuild( + result.needs_document_store_derived_files_rebuild); + info.set_needs_schema_store_rebuild( + result.needs_schema_store_derived_files_rebuild); + info.set_needs_term_index_rebuild(result.needs_term_index_rebuild); + info.set_needs_integer_index_rebuild(result.needs_integer_index_rebuild); + info.set_needs_qualified_id_join_index_rebuild( + result.needs_qualified_id_join_index_rebuild); + + return info; +} + +void AddEnabledFeatures(const IcingSearchEngineOptions& options, + IcingSearchEngineVersionProto* version_proto) { + auto* enabled_features = version_proto->mutable_enabled_features(); + // HasPropertyOperator feature + if (options.build_property_existence_metadata_hits()) { + enabled_features->Add(GetFeatureInfoProto( + IcingSearchEngineFeatureInfoProto::FEATURE_HAS_PROPERTY_OPERATOR)); + } +} + } // namespace version_util } // namespace lib diff --git a/icing/file/version-util.h b/icing/file/version-util.h index b2d51df..feadaf6 100644 --- a/icing/file/version-util.h +++ b/icing/file/version-util.h @@ -16,11 +16,15 @@ #define ICING_FILE_VERSION_UTIL_H_ #include <cstdint> +#include <memory> #include <string> +#include <string_view> #include "icing/text_classifier/lib3/utils/base/status.h" #include "icing/text_classifier/lib3/utils/base/statusor.h" +#include "icing/absl_ports/str_cat.h" #include "icing/file/filesystem.h" +#include "icing/proto/initialize.pb.h" namespace icing { namespace lib { @@ -32,16 +36,26 @@ namespace version_util { // - Version 2: M-2023-09, M-2023-11, M-2024-01. Schema is compatible with v1. // (There were no M-2023-10, M-2023-12). // - Version 3: M-2024-02. Schema is compatible with v1 and v2. +// - Version 4: Android V base. Schema is compatible with v1, v2 and v3. // +// TODO(b/314816301): Bump kVersion to 4 for Android V rollout with v2 version +// detection // LINT.IfChange(kVersion) -inline static constexpr int32_t kVersion = 3; +inline static constexpr int32_t kVersion = 4; // LINT.ThenChange(//depot/google3/icing/schema/schema-store.cc:min_overlay_version_compatibility) inline static constexpr int32_t kVersionOne = 1; inline static constexpr int32_t kVersionTwo = 2; inline static constexpr int32_t kVersionThree = 3; +inline static constexpr int32_t kVersionFour = 4; + +// Version at which v2 version file is introduced. +inline static constexpr int32_t kFirstV2Version = kVersionFour; inline static constexpr int kVersionZeroFlashIndexMagic = 0x6dfba6ae; +inline static constexpr std::string_view kVersionFilenameV1 = "version"; +inline static constexpr std::string_view kVersionFilenameV2 = "version2"; + struct VersionInfo { int32_t version; int32_t max_version; @@ -67,28 +81,144 @@ enum class StateChange { kVersionZeroRollForward, }; -// Helper method to read version info (using version file and flash index header -// magic) from the existing data. If the state is invalid (e.g. flash index -// header file is missing), then return an invalid VersionInfo. +// Contains information about which derived files need to be rebuild. +// +// These flags only reflect whether each component should be rebuilt, but do not +// handle any dependencies. The caller should handle the dependencies by +// themselves. +// e.g. - qualified id join index depends on document store derived files, but +// it's possible to have needs_document_store_derived_files_rebuild = +// true and needs_qualified_id_join_index_rebuild = false. +// - The caller should know that join index should also be rebuilt in this +// case even though needs_qualified_id_join_index_rebuild = false. +struct DerivedFilesRebuildResult { + bool needs_document_store_derived_files_rebuild = false; + bool needs_schema_store_derived_files_rebuild = false; + bool needs_term_index_rebuild = false; + bool needs_integer_index_rebuild = false; + bool needs_qualified_id_join_index_rebuild = false; + + DerivedFilesRebuildResult() = default; + + explicit DerivedFilesRebuildResult( + bool needs_document_store_derived_files_rebuild_in, + bool needs_schema_store_derived_files_rebuild_in, + bool needs_term_index_rebuild_in, bool needs_integer_index_rebuild_in, + bool needs_qualified_id_join_index_rebuild_in) + : needs_document_store_derived_files_rebuild( + needs_document_store_derived_files_rebuild_in), + needs_schema_store_derived_files_rebuild( + needs_schema_store_derived_files_rebuild_in), + needs_term_index_rebuild(needs_term_index_rebuild_in), + needs_integer_index_rebuild(needs_integer_index_rebuild_in), + needs_qualified_id_join_index_rebuild( + needs_qualified_id_join_index_rebuild_in) {} + + bool IsRebuildNeeded() const { + return needs_document_store_derived_files_rebuild || + needs_schema_store_derived_files_rebuild || + needs_term_index_rebuild || needs_integer_index_rebuild || + needs_qualified_id_join_index_rebuild; + } + + bool operator==(const DerivedFilesRebuildResult& other) const { + return needs_document_store_derived_files_rebuild == + other.needs_document_store_derived_files_rebuild && + needs_schema_store_derived_files_rebuild == + other.needs_schema_store_derived_files_rebuild && + needs_term_index_rebuild == other.needs_term_index_rebuild && + needs_integer_index_rebuild == other.needs_integer_index_rebuild && + needs_qualified_id_join_index_rebuild == + other.needs_qualified_id_join_index_rebuild; + } + + void CombineWithOtherRebuildResultOr(const DerivedFilesRebuildResult& other) { + needs_document_store_derived_files_rebuild = + needs_document_store_derived_files_rebuild || + other.needs_document_store_derived_files_rebuild; + needs_schema_store_derived_files_rebuild = + needs_schema_store_derived_files_rebuild || + other.needs_schema_store_derived_files_rebuild; + needs_term_index_rebuild = + needs_term_index_rebuild || other.needs_term_index_rebuild; + needs_integer_index_rebuild = + needs_integer_index_rebuild || other.needs_integer_index_rebuild; + needs_qualified_id_join_index_rebuild = + needs_qualified_id_join_index_rebuild || + other.needs_qualified_id_join_index_rebuild; + } +}; + +// There are two icing version files: +// 1. V1 version file contains version and max_version info of the existing +// data. +// 2. V2 version file writes the version information using +// FileBackedProto<IcingSearchEngineVersionProto>. This contains information +// about the version's enabled trunk stable features in addition to the +// version numbers written for V1. +// +// Both version files must be written to maintain backwards compatibility. +inline std::string MakeVersionFilePath(std::string_view version_file_dir, + std::string_view version_file_name) { + return absl_ports::StrCat(version_file_dir, "/", version_file_name); +} + +// Returns a VersionInfo from a given IcingSearchEngineVersionProto. +inline VersionInfo GetVersionInfoFromProto( + const IcingSearchEngineVersionProto& version_proto) { + return VersionInfo(version_proto.version(), version_proto.max_version()); +} + + +// Reads the IcingSearchEngineVersionProto from the version files of the +// existing data. +// +// This method reads both the v1 and v2 version files, and returns the v1 +// version numbers in the absence of the v2 version file. If there is a mismatch +// between the v1 and v2 version numbers, or if the state is invalid (e.g. flash +// index header file is missing), then an invalid VersionInfo is returned. // // RETURNS: -// - Existing data's VersionInfo on success +// - Existing data's IcingSearchEngineVersionProto on success // - INTERNAL_ERROR on I/O errors -libtextclassifier3::StatusOr<VersionInfo> ReadVersion( - const Filesystem& filesystem, const std::string& version_file_path, +libtextclassifier3::StatusOr<IcingSearchEngineVersionProto> ReadVersion( + const Filesystem& filesystem, const std::string& version_file_dir, const std::string& index_base_dir); -// Helper method to write version file. +// Writes the v1 version file. V1 version file is written for all versions and +// contains only Icing's VersionInfo (version number and max_version) // // RETURNS: // - OK on success // - INTERNAL_ERROR on I/O errors -libtextclassifier3::Status WriteVersion(const Filesystem& filesystem, - const std::string& version_file_path, - const VersionInfo& version_info); +libtextclassifier3::Status WriteV1Version(const Filesystem& filesystem, + const std::string& version_file_dir, + const VersionInfo& version_info); -// Helper method to determine the change state between the existing data version -// and the current code version. +// Writes the v2 version file. V2 version file writes the version information +// using FileBackedProto<IcingSearchEngineVersionProto>. +// +// REQUIRES: version_proto.version >= kFirstV2Version. We implement v2 version +// checking in kFirstV2Version, so callers will always use a version # greater +// than this. +// +// RETURNS: +// - OK on success +// - INTERNAL_ERROR on I/O errors +libtextclassifier3::Status WriteV2Version( + const Filesystem& filesystem, const std::string& version_file_dir, + std::unique_ptr<IcingSearchEngineVersionProto> version_proto); + +// Deletes Icing's version files from version_file_dir. +// +// Returns: +// - OK on success +// - INTERNAL_ERROR on I/O error +libtextclassifier3::Status DiscardVersionFiles( + const Filesystem& filesystem, std::string_view version_file_dir); + +// Determines the change state between the existing data version and the current +// code version. // // REQUIRES: curr_version > 0. We implement version checking in version 1, so // the callers (except unit tests) will always use a version # greater than 0. @@ -97,7 +227,19 @@ libtextclassifier3::Status WriteVersion(const Filesystem& filesystem, StateChange GetVersionStateChange(const VersionInfo& existing_version_info, int32_t curr_version = kVersion); -// Helper method to determine whether Icing should rebuild all derived files. +// Determines the derived files that need to be rebuilt between Icing's existing +// data based on previous data version and enabled features, and the current +// code version and enabled features. +// +// REQUIRES: curr_version >= kFirstV2Version. We implement v2 version checking +// in kFirstV2Version, so callers will always use a version # greater than this. +// +// RETURNS: DerivedFilesRebuildResult +DerivedFilesRebuildResult CalculateRequiredDerivedFilesRebuild( + const IcingSearchEngineVersionProto& prev_version_proto, + const IcingSearchEngineVersionProto& curr_version_proto); + +// Determines whether Icing should rebuild all derived files. // Sometimes it is not required to rebuild derived files when // roll-forward/upgrading. This function "encodes" upgrade paths and checks if // the roll-forward/upgrading requires derived files to be rebuilt or not. @@ -107,6 +249,24 @@ StateChange GetVersionStateChange(const VersionInfo& existing_version_info, bool ShouldRebuildDerivedFiles(const VersionInfo& existing_version_info, int32_t curr_version = kVersion); +// Returns the derived files rebuilds required for a given feature. +DerivedFilesRebuildResult GetFeatureDerivedFilesRebuildResult( + IcingSearchEngineFeatureInfoProto::FlaggedFeatureType feature); + +// Constructs the IcingSearchEngineFeatureInfoProto for a given feature. +IcingSearchEngineFeatureInfoProto GetFeatureInfoProto( + IcingSearchEngineFeatureInfoProto::FlaggedFeatureType feature); + +// Populates the enabled_features field for an IcingSearchEngineFeatureInfoProto +// based on icing's initialization flag options. +// +// All enabled features are converted into an IcingSearchEngineFeatureInfoProto +// and returned in IcingSearchEngineVersionProto::enabled_features. A conversion +// should be added for each trunk stable feature flag defined in +// IcingSearchEngineOptions. +void AddEnabledFeatures(const IcingSearchEngineOptions& options, + IcingSearchEngineVersionProto* version_proto); + } // namespace version_util } // namespace lib diff --git a/icing/file/version-util_test.cc b/icing/file/version-util_test.cc index 9dedb1d..f2922e2 100644 --- a/icing/file/version-util_test.cc +++ b/icing/file/version-util_test.cc @@ -14,14 +14,19 @@ #include "icing/file/version-util.h" +#include <cstdint> +#include <memory> #include <optional> #include <string> +#include <unordered_set> #include <utility> #include "gmock/gmock.h" #include "gtest/gtest.h" #include "icing/file/filesystem.h" #include "icing/file/posting_list/flash-index-storage-header.h" +#include "icing/portable/equals-proto.h" +#include "icing/proto/initialize.pb.h" #include "icing/testing/common-matchers.h" #include "icing/testing/tmp-directory.h" @@ -32,19 +37,38 @@ namespace version_util { namespace { using ::testing::Eq; +using ::testing::IsEmpty; using ::testing::IsFalse; using ::testing::IsTrue; +IcingSearchEngineVersionProto MakeTestVersionProto( + const VersionInfo& version_info, + const std::unordered_set< + IcingSearchEngineFeatureInfoProto::FlaggedFeatureType>& features_set) { + IcingSearchEngineVersionProto version_proto; + version_proto.set_version(version_info.version); + version_proto.set_max_version(version_info.max_version); + + auto* enabled_features = version_proto.mutable_enabled_features(); + for (const auto& feature : features_set) { + enabled_features->Add(GetFeatureInfoProto(feature)); + } + return version_proto; +} + struct VersionUtilReadVersionTestParam { - std::optional<VersionInfo> existing_version_info; + std::optional<VersionInfo> existing_v1_version_info; + std::optional<VersionInfo> existing_v2_version_info; std::optional<int> existing_flash_index_magic; VersionInfo expected_version_info; explicit VersionUtilReadVersionTestParam( - std::optional<VersionInfo> existing_version_info_in, + std::optional<VersionInfo> existing_v1_version_info_in, + std::optional<VersionInfo> existing_v2_version_info_in, std::optional<int> existing_flash_index_magic_in, VersionInfo expected_version_info_in) - : existing_version_info(std::move(existing_version_info_in)), + : existing_v1_version_info(std::move(existing_v1_version_info_in)), + existing_v2_version_info(std::move(existing_v2_version_info_in)), existing_flash_index_magic(std::move(existing_flash_index_magic_in)), expected_version_info(std::move(expected_version_info_in)) {} }; @@ -54,7 +78,6 @@ class VersionUtilReadVersionTest protected: void SetUp() override { base_dir_ = GetTestTempDir() + "/version_util_test"; - version_file_path_ = base_dir_ + "/version"; index_path_ = base_dir_ + "/index"; ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(base_dir_.c_str())); @@ -68,19 +91,28 @@ class VersionUtilReadVersionTest Filesystem filesystem_; std::string base_dir_; - std::string version_file_path_; std::string index_path_; }; TEST_P(VersionUtilReadVersionTest, ReadVersion) { const VersionUtilReadVersionTestParam& param = GetParam(); + IcingSearchEngineVersionProto dummy_version_proto; - // Prepare version file and flash index file. - if (param.existing_version_info.has_value()) { - ICING_ASSERT_OK(WriteVersion(filesystem_, version_file_path_, - param.existing_version_info.value())); + if (param.existing_v1_version_info.has_value()) { + ICING_ASSERT_OK(WriteV1Version(filesystem_, base_dir_, + param.existing_v1_version_info.value())); + } + if (param.existing_v2_version_info.has_value()) { + dummy_version_proto = MakeTestVersionProto( + param.existing_v2_version_info.value(), + {IcingSearchEngineFeatureInfoProto::FEATURE_HAS_PROPERTY_OPERATOR, + IcingSearchEngineFeatureInfoProto::UNKNOWN}); + ICING_ASSERT_OK(WriteV2Version( + filesystem_, base_dir_, + std::make_unique<IcingSearchEngineVersionProto>(dummy_version_proto))); } + // Prepare flash index file. if (param.existing_flash_index_magic.has_value()) { HeaderBlock header_block(&filesystem_, /*block_size=*/4096); header_block.header()->magic = param.existing_flash_index_magic.value(); @@ -94,10 +126,22 @@ TEST_P(VersionUtilReadVersionTest, ReadVersion) { ASSERT_TRUE(header_block.Write(sfd.get())); } - ICING_ASSERT_OK_AND_ASSIGN( - VersionInfo version_info, - ReadVersion(filesystem_, version_file_path_, index_path_)); - EXPECT_THAT(version_info, Eq(param.expected_version_info)); + ICING_ASSERT_OK_AND_ASSIGN(IcingSearchEngineVersionProto version_proto, + ReadVersion(filesystem_, base_dir_, index_path_)); + if (param.existing_v2_version_info.has_value() && + param.expected_version_info.version == + param.existing_v2_version_info.value().version) { + EXPECT_THAT(version_proto, + portable_equals_proto::EqualsProto(dummy_version_proto)); + } else { + // We're returning the version from v1 version file, or an invalid version. + // version_proto.enabled_features should be empty in this case. + EXPECT_THAT(version_proto.version(), + Eq(param.expected_version_info.version)); + EXPECT_THAT(version_proto.max_version(), + Eq(param.expected_version_info.max_version)); + EXPECT_THAT(version_proto.enabled_features(), IsEmpty()); + } } INSTANTIATE_TEST_SUITE_P( @@ -107,7 +151,8 @@ INSTANTIATE_TEST_SUITE_P( // - Flash index doesn't exist // - Result: version -1, max_version -1 (invalid) VersionUtilReadVersionTestParam( - /*existing_version_info_in=*/std::nullopt, + /*existing_v1_version_info_in=*/std::nullopt, + /*existing_v2_version_info_in=*/std::nullopt, /*existing_flash_index_magic_in=*/std::nullopt, /*expected_version_info_in=*/ VersionInfo(/*version_in=*/-1, /*max_version=*/-1)), @@ -116,7 +161,8 @@ INSTANTIATE_TEST_SUITE_P( // - Flash index exists with version 0 magic // - Result: version 0, max_version 0 VersionUtilReadVersionTestParam( - /*existing_version_info_in=*/std::nullopt, + /*existing_v1_version_info_in=*/std::nullopt, + /*existing_v2_version_info_in=*/std::nullopt, /*existing_flash_index_magic_in=*/ std::make_optional<int>(kVersionZeroFlashIndexMagic), /*expected_version_info_in=*/ @@ -126,65 +172,157 @@ INSTANTIATE_TEST_SUITE_P( // - Flash index exists with non version 0 magic // - Result: version -1, max_version -1 (invalid) VersionUtilReadVersionTestParam( - /*existing_version_info_in=*/std::nullopt, + /*existing_v1_version_info_in=*/std::nullopt, + /*existing_v2_version_info_in=*/std::nullopt, /*existing_flash_index_magic_in=*/ std::make_optional<int>(kVersionZeroFlashIndexMagic + 1), /*expected_version_info_in=*/ VersionInfo(/*version_in=*/-1, /*max_version=*/-1)), - // - Version file exists + // - Version file v1 exists // - Flash index doesn't exist // - Result: version -1, max_version 1 (invalid) VersionUtilReadVersionTestParam( - /*existing_version_info_in=*/std::make_optional<VersionInfo>( + /*existing_v1_version_info_in=*/std::make_optional<VersionInfo>( /*version_in=*/1, /*max_version=*/1), + /*existing_v2_version_info_in=*/std::nullopt, /*existing_flash_index_magic_in=*/std::nullopt, /*expected_version_info_in=*/ VersionInfo(/*version_in=*/-1, /*max_version=*/1)), - // - Version file exists: version 1, max_version 1 + // - Version file v1 exists: version 1, max_version 1 // - Flash index exists with version 0 magic // - Result: version 0, max_version 1 VersionUtilReadVersionTestParam( - /*existing_version_info_in=*/std::make_optional<VersionInfo>( + /*existing_v1_version_info_in=*/std::make_optional<VersionInfo>( /*version_in=*/1, /*max_version=*/1), + /*existing_v2_version_info_in=*/std::nullopt, /*existing_flash_index_magic_in=*/ std::make_optional<int>(kVersionZeroFlashIndexMagic), /*expected_version_info_in=*/ VersionInfo(/*version_in=*/0, /*max_version=*/1)), - // - Version file exists: version 2, max_version 3 + // - Version file v1 exists: version 2, max_version 3 // - Flash index exists with version 0 magic // - Result: version 0, max_version 3 VersionUtilReadVersionTestParam( - /*existing_version_info_in=*/std::make_optional<VersionInfo>( + /*existing_v1_version_info_in=*/std::make_optional<VersionInfo>( /*version_in=*/2, /*max_version=*/3), + /*existing_v2_version_info_in=*/std::nullopt, /*existing_flash_index_magic_in=*/ std::make_optional<int>(kVersionZeroFlashIndexMagic), /*expected_version_info_in=*/ VersionInfo(/*version_in=*/0, /*max_version=*/3)), - // - Version file exists: version 1, max_version 1 + // - Version file v1 exists: version 1, max_version 1 // - Flash index exists with non version 0 magic // - Result: version 1, max_version 1 VersionUtilReadVersionTestParam( - /*existing_version_info_in=*/std::make_optional<VersionInfo>( + /*existing_v1_version_info_in=*/std::make_optional<VersionInfo>( /*version_in=*/1, /*max_version=*/1), + /*existing_v2_version_info_in=*/std::nullopt, /*existing_flash_index_magic_in=*/ std::make_optional<int>(kVersionZeroFlashIndexMagic + 1), /*expected_version_info_in=*/ VersionInfo(/*version_in=*/1, /*max_version=*/1)), - // - Version file exists: version 2, max_version 3 + // - Version file v1 exists: version 2, max_version 3 // - Flash index exists with non version 0 magic // - Result: version 2, max_version 3 VersionUtilReadVersionTestParam( - /*existing_version_info_in=*/std::make_optional<VersionInfo>( + /*existing_v1_version_info_in=*/std::make_optional<VersionInfo>( /*version_in=*/2, /*max_version=*/3), + /*existing_v2_version_info_in=*/std::nullopt, + /*existing_flash_index_magic_in=*/ + std::make_optional<int>(kVersionZeroFlashIndexMagic + 1), + /*expected_version_info_in=*/ + VersionInfo(/*version_in=*/2, /*max_version=*/3)), + + // - Version file v1 exists: version 2, max_version 4 + // - Version file v2 exists: version 4, max_version 4 + // - Flash index exists with non version 0 magic + // - Result: version -1, max_version 4 + VersionUtilReadVersionTestParam( + /*existing_v1_version_info_in=*/std::make_optional<VersionInfo>( + /*version_in=*/2, /*max_version=*/4), + /*existing_v2_version_info_in=*/ + std::make_optional<VersionInfo>( + /*version_in=*/4, /*max_version=*/4), + /*existing_flash_index_magic_in=*/ + std::make_optional<int>(kVersionZeroFlashIndexMagic + 1), + /*expected_version_info_in=*/ + VersionInfo(/*version_in=*/-1, /*max_version=*/4)), + + // - Version file v1 exists: version 4, max_version 4 + // - Version file v2 exists: version 4, max_version 4 + // - Flash index exists with version 0 magic + // - Result: version -1, max_version 4 + VersionUtilReadVersionTestParam( + /*existing_v1_version_info_in=*/std::make_optional<VersionInfo>( + /*version_in=*/4, /*max_version=*/4), + /*existing_v2_version_info_in=*/ + std::make_optional<VersionInfo>( + /*version_in=*/4, /*max_version=*/4), + /*existing_flash_index_magic_in=*/ + std::make_optional<int>(kVersionZeroFlashIndexMagic), + /*expected_version_info_in=*/ + VersionInfo(/*version_in=*/-1, /*max_version=*/4)), + + // - Version file v1 exists: version 4, max_version 4 + // - Version file v2 exists: version 4, max_version 4 + // - Flash index exists with non version 0 magic + // - Result: version 4, max_version 4 + VersionUtilReadVersionTestParam( + /*existing_v1_version_info_in=*/std::make_optional<VersionInfo>( + /*version_in=*/4, /*max_version=*/4), + /*existing_v2_version_info_in=*/ + std::make_optional<VersionInfo>( + /*version_in=*/4, /*max_version=*/4), + /*existing_flash_index_magic_in=*/ + std::make_optional<int>(kVersionZeroFlashIndexMagic + 1), + /*expected_version_info_in=*/ + VersionInfo(/*version_in=*/4, /*max_version=*/4)), + + // - Version file v1 exists: version 4, max_version 4 + // - Version file v2 does not exist + // - Flash index exists with non version 0 magic + // - Result: version -1, max_version 4 + VersionUtilReadVersionTestParam( + /*existing_v1_version_info_in=*/std::make_optional<VersionInfo>( + /*version_in=*/4, /*max_version=*/4), + /*existing_v2_version_info_in=*/std::nullopt, /*existing_flash_index_magic_in=*/ std::make_optional<int>(kVersionZeroFlashIndexMagic + 1), /*expected_version_info_in=*/ - VersionInfo(/*version_in=*/2, /*max_version=*/3)))); + VersionInfo(/*version_in=*/-1, /*max_version=*/4)), + + // - Version file v1 does not exist + // - Version file v2 exists: version 4, max_version 4 + // - Flash index exists with non version 0 magic + // - Result: version -1, max_version -1 + VersionUtilReadVersionTestParam( + /*existing_v1_version_info_in=*/std::nullopt, + /*existing_v2_version_info_in=*/ + std::make_optional<VersionInfo>( + /*version_in=*/4, /*max_version=*/4), + /*existing_flash_index_magic_in=*/ + std::make_optional<int>(kVersionZeroFlashIndexMagic + 1), + /*expected_version_info_in=*/ + VersionInfo(/*version_in=*/-1, /*max_version=*/-1)), + + // - Version file v1 doesn't exist + // - Version file v2 exists: version 4, max_version 4 + // - Flash index doesn't exist + // - Result: version -1, max_version -1 (invalid since flash index + // doesn't exist) + VersionUtilReadVersionTestParam( + /*existing_v1_version_info_in=*/std::nullopt, + /*existing_v2_version_info_in=*/ + std::make_optional<VersionInfo>( + /*version_in=*/4, /*max_version=*/4), + /*existing_flash_index_magic_in=*/std::nullopt, + /*expected_version_info_in=*/ + VersionInfo(/*version_in=*/-1, /*max_version=*/-1)))); struct VersionUtilStateChangeTestParam { VersionInfo existing_version_info; @@ -389,6 +527,240 @@ INSTANTIATE_TEST_SUITE_P( /*curr_version_in=*/2, /*expected_state_change_in=*/StateChange::kRollBack))); +struct VersionUtilDerivedFilesRebuildTestParam { + int32_t existing_version; + int32_t max_version; + std::unordered_set<IcingSearchEngineFeatureInfoProto::FlaggedFeatureType> + existing_enabled_features; + int32_t curr_version; + std::unordered_set<IcingSearchEngineFeatureInfoProto::FlaggedFeatureType> + curr_enabled_features; + DerivedFilesRebuildResult expected_derived_files_rebuild_result; + + explicit VersionUtilDerivedFilesRebuildTestParam( + int32_t existing_version_in, int32_t max_version_in, + std::unordered_set<IcingSearchEngineFeatureInfoProto::FlaggedFeatureType> + existing_enabled_features_in, + int32_t curr_version_in, + std::unordered_set<IcingSearchEngineFeatureInfoProto::FlaggedFeatureType> + curr_enabled_features_in, + DerivedFilesRebuildResult expected_derived_files_rebuild_result_in) + : existing_version(existing_version_in), + max_version(max_version_in), + existing_enabled_features(std::move(existing_enabled_features_in)), + curr_version(curr_version_in), + curr_enabled_features(std::move(curr_enabled_features_in)), + expected_derived_files_rebuild_result( + std::move(expected_derived_files_rebuild_result_in)) {} +}; + +class VersionUtilDerivedFilesRebuildTest + : public ::testing::TestWithParam<VersionUtilDerivedFilesRebuildTestParam> { +}; + +TEST_P(VersionUtilDerivedFilesRebuildTest, + CalculateRequiredDerivedFilesRebuild) { + const VersionUtilDerivedFilesRebuildTestParam& param = GetParam(); + + EXPECT_THAT(CalculateRequiredDerivedFilesRebuild( + /*prev_version_proto=*/MakeTestVersionProto( + VersionInfo(param.existing_version, param.max_version), + param.existing_enabled_features), + /*curr_version_proto=*/ + MakeTestVersionProto( + VersionInfo(param.curr_version, param.max_version), + param.curr_enabled_features)), + Eq(param.expected_derived_files_rebuild_result)); +} + +INSTANTIATE_TEST_SUITE_P( + VersionUtilDerivedFilesRebuildTest, VersionUtilDerivedFilesRebuildTest, + testing::Values( + // - Existing version -1, max_version -1 (invalid) + // - Existing enabled features = {} + // - Current version = 4 + // - Current enabled features = {} + // + // - Result: rebuild everything + VersionUtilDerivedFilesRebuildTestParam( + /*existing_version_in=*/-1, /*max_version_in=*/-1, + /*existing_enabled_features_in=*/{}, /*curr_version_in=*/4, + /*curr_enabled_features_in=*/{}, + /*expected_derived_files_rebuild_result_in=*/ + DerivedFilesRebuildResult( + /*needs_document_store_derived_files_rebuild=*/true, + /*needs_schema_store_derived_files_rebuild=*/true, + /*needs_term_index_rebuild=*/true, + /*needs_integer_index_rebuild=*/true, + /*needs_qualified_id_join_index_rebuild=*/true)), + + // - Existing version -1, max_version 2 (invalid) + // - Existing enabled features = {} + // - Current version = 4 + // - Current enabled features = {} + // + // - Result: rebuild everything + VersionUtilDerivedFilesRebuildTestParam( + /*existing_version_in=*/-1, /*max_version_in=*/-1, + /*existing_enabled_features_in=*/{}, /*curr_version_in=*/4, + /*curr_enabled_features_in=*/{}, + /*expected_derived_files_rebuild_result_in=*/ + DerivedFilesRebuildResult( + /*needs_document_store_derived_files_rebuild=*/true, + /*needs_schema_store_derived_files_rebuild=*/true, + /*needs_term_index_rebuild=*/true, + /*needs_integer_index_rebuild=*/true, + /*needs_qualified_id_join_index_rebuild=*/true)), + + // - Existing version 3, max_version 3 (pre v2 version check) + // - Existing enabled features = {} + // - Current version = 4 + // - Current enabled features = {} + // + // - Result: don't rebuild anything + VersionUtilDerivedFilesRebuildTestParam( + /*existing_version_in=*/3, /*max_version_in=*/3, + /*existing_enabled_features_in=*/{}, /*curr_version_in=*/4, + /*curr_enabled_features_in=*/{}, + /*expected_derived_files_rebuild_result_in=*/ + DerivedFilesRebuildResult( + /*needs_document_store_derived_files_rebuild=*/false, + /*needs_schema_store_derived_files_rebuild=*/false, + /*needs_term_index_rebuild=*/false, + /*needs_integer_index_rebuild=*/false, + /*needs_qualified_id_join_index_rebuild=*/false)), + + // - Existing version 3, max_version 3 (pre v2 version check) + // - Existing enabled features = {} + // - Current version = 4 + // - Current enabled features = {FEATURE_HAS_PROPERTY_OPERATOR} + // + // - Result: rebuild term index + VersionUtilDerivedFilesRebuildTestParam( + /*existing_version_in=*/3, /*max_version_in=*/3, + /*existing_enabled_features_in=*/{}, /*curr_version_in=*/4, + /*curr_enabled_features_in=*/ + {IcingSearchEngineFeatureInfoProto::FEATURE_HAS_PROPERTY_OPERATOR}, + /*expected_derived_files_rebuild_result_in=*/ + DerivedFilesRebuildResult( + /*needs_document_store_derived_files_rebuild=*/false, + /*needs_schema_store_derived_files_rebuild=*/false, + /*needs_term_index_rebuild=*/true, + /*needs_integer_index_rebuild=*/false, + /*needs_qualified_id_join_index_rebuild=*/false)), + + // - Existing version 4, max_version 4 + // - Existing enabled features = {} + // - Current version = 4 + // - Current enabled features = {} + // + // - Result: don't rebuild anything + VersionUtilDerivedFilesRebuildTestParam( + /*existing_version_in=*/4, /*max_version_in=*/4, + /*existing_enabled_features_in=*/{}, /*curr_version_in=*/4, + /*curr_enabled_features_in=*/{}, + /*expected_derived_files_rebuild_result_in=*/ + DerivedFilesRebuildResult( + /*needs_document_store_derived_files_rebuild=*/false, + /*needs_schema_store_derived_files_rebuild=*/false, + /*needs_term_index_rebuild=*/false, + /*needs_integer_index_rebuild=*/false, + /*needs_qualified_id_join_index_rebuild=*/false)), + + // - Existing version 4, max_version 5 + // - Existing enabled features = {} + // - Current version = 5 + // - Current enabled features = {} + // + // - Result: Rollforward -- rebuild everything + VersionUtilDerivedFilesRebuildTestParam( + /*existing_version_in=*/4, /*max_version_in=*/5, + /*existing_enabled_features_in=*/{}, /*curr_version_in=*/5, + /*curr_enabled_features_in=*/{}, + /*expected_derived_files_rebuild_result_in=*/ + DerivedFilesRebuildResult( + /*needs_document_store_derived_files_rebuild=*/true, + /*needs_schema_store_derived_files_rebuild=*/true, + /*needs_term_index_rebuild=*/true, + /*needs_integer_index_rebuild=*/true, + /*needs_qualified_id_join_index_rebuild=*/true)), + + // - Existing version 5, max_version 5 + // - Existing enabled features = {} + // - Current version = 4 + // - Current enabled features = {} + // + // - Result: Rollback -- rebuild everything + VersionUtilDerivedFilesRebuildTestParam( + /*existing_version_in=*/5, /*max_version_in=*/5, + /*existing_enabled_features_in=*/{}, /*curr_version_in=*/4, + /*curr_enabled_features_in=*/{}, + /*expected_derived_files_rebuild_result_in=*/ + DerivedFilesRebuildResult( + /*needs_document_store_derived_files_rebuild=*/true, + /*needs_schema_store_derived_files_rebuild=*/true, + /*needs_term_index_rebuild=*/true, + /*needs_integer_index_rebuild=*/true, + /*needs_qualified_id_join_index_rebuild=*/true)), + + // - Existing version 4, max_version 4 + // - Existing enabled features = {} + // - Current version = 4 + // - Current enabled features = {FEATURE_HAS_PROPERTY_OPERATOR} + // + // - Result: rebuild term index + VersionUtilDerivedFilesRebuildTestParam( + /*existing_version_in=*/4, /*max_version_in=*/4, + /*existing_enabled_features_in=*/{}, /*curr_version_in=*/4, + /*curr_enabled_features_in=*/ + {IcingSearchEngineFeatureInfoProto::FEATURE_HAS_PROPERTY_OPERATOR}, + /*expected_derived_files_rebuild_result_in=*/ + DerivedFilesRebuildResult( + /*needs_document_store_derived_files_rebuild=*/false, + /*needs_schema_store_derived_files_rebuild=*/false, + /*needs_term_index_rebuild=*/true, + /*needs_integer_index_rebuild=*/false, + /*needs_qualified_id_join_index_rebuild=*/false)), + + // - Existing version 4, max_version 4 + // - Existing enabled features = {FEATURE_HAS_PROPERTY_OPERATOR} + // - Current version = 4 + // - Current enabled features = {} + // + // - Result: rebuild term index + VersionUtilDerivedFilesRebuildTestParam( + /*existing_version_in=*/4, /*max_version_in=*/4, + /*existing_enabled_features_in=*/ + {IcingSearchEngineFeatureInfoProto::FEATURE_HAS_PROPERTY_OPERATOR}, + /*curr_version_in=*/4, /*curr_enabled_features_in=*/{}, + /*expected_derived_files_rebuild_result_in=*/ + DerivedFilesRebuildResult( + /*needs_document_store_derived_files_rebuild=*/false, + /*needs_schema_store_derived_files_rebuild=*/false, + /*needs_term_index_rebuild=*/true, + /*needs_integer_index_rebuild=*/false, + /*needs_qualified_id_join_index_rebuild=*/false)), + + // - Existing version 4, max_version 4 + // - Existing enabled features = {UNKNOWN} + // - Current version = 4 + // - Current enabled features = {FEATURE_HAS_PROPERTY_OPERATOR} + // + // - Result: rebuild everything + VersionUtilDerivedFilesRebuildTestParam( + /*existing_version_in=*/4, /*max_version_in=*/4, + /*existing_enabled_features_in=*/ + {IcingSearchEngineFeatureInfoProto::UNKNOWN}, /*curr_version_in=*/4, + /*curr_enabled_features_in=*/ + {IcingSearchEngineFeatureInfoProto::FEATURE_HAS_PROPERTY_OPERATOR}, + /*expected_derived_files_rebuild_result_in=*/ + DerivedFilesRebuildResult( + /*needs_document_store_derived_files_rebuild=*/true, + /*needs_schema_store_derived_files_rebuild=*/true, + /*needs_term_index_rebuild=*/true, + /*needs_integer_index_rebuild=*/true, + /*needs_qualified_id_join_index_rebuild=*/true)))); + TEST(VersionUtilTest, ShouldRebuildDerivedFilesUndeterminedVersion) { EXPECT_THAT( ShouldRebuildDerivedFiles(VersionInfo(-1, -1), /*curr_version=*/1), @@ -475,8 +847,80 @@ TEST(VersionUtilTest, Upgrade) { EXPECT_THAT(ShouldRebuildDerivedFiles(VersionInfo(kVersionOne, kVersionOne), /*curr_version=*/kVersionThree), IsFalse()); + + // kVersionThree -> kVersionFour. + EXPECT_THAT( + ShouldRebuildDerivedFiles(VersionInfo(kVersionThree, kVersionThree), + /*curr_version=*/kVersionFour), + IsFalse()); + + // kVersionTwo -> kVersionFour + EXPECT_THAT(ShouldRebuildDerivedFiles(VersionInfo(kVersionTwo, kVersionTwo), + /*curr_version=*/kVersionFour), + IsFalse()); + + // kVersionOne -> kVersionFour. + EXPECT_THAT(ShouldRebuildDerivedFiles(VersionInfo(kVersionOne, kVersionOne), + /*curr_version=*/kVersionFour), + IsFalse()); +} + +TEST(VersionUtilTest, GetFeatureDerivedFilesRebuildResult_unknown) { + EXPECT_THAT(GetFeatureDerivedFilesRebuildResult( + IcingSearchEngineFeatureInfoProto::UNKNOWN), + Eq(DerivedFilesRebuildResult( + /*needs_document_store_derived_files_rebuild=*/true, + /*needs_schema_store_derived_files_rebuild=*/true, + /*needs_term_index_rebuild=*/true, + /*needs_integer_index_rebuild=*/true, + /*needs_qualified_id_join_index_rebuild=*/true))); } +TEST(VersionUtilTest, + GetFeatureDerivedFilesRebuildResult_featureHasPropertyOperator) { + EXPECT_THAT( + GetFeatureDerivedFilesRebuildResult( + IcingSearchEngineFeatureInfoProto::FEATURE_HAS_PROPERTY_OPERATOR), + Eq(DerivedFilesRebuildResult( + /*needs_document_store_derived_files_rebuild=*/false, + /*needs_schema_store_derived_files_rebuild=*/false, + /*needs_term_index_rebuild=*/true, + /*needs_integer_index_rebuild=*/false, + /*needs_qualified_id_join_index_rebuild=*/false))); +} + +class VersionUtilFeatureProtoTest + : public ::testing::TestWithParam< + IcingSearchEngineFeatureInfoProto::FlaggedFeatureType> {}; + +TEST_P(VersionUtilFeatureProtoTest, GetFeatureInfoProto) { + IcingSearchEngineFeatureInfoProto::FlaggedFeatureType feature_type = + GetParam(); + DerivedFilesRebuildResult rebuild_result = + GetFeatureDerivedFilesRebuildResult(feature_type); + + IcingSearchEngineFeatureInfoProto feature_info = + GetFeatureInfoProto(feature_type); + EXPECT_THAT(feature_info.feature_type(), Eq(feature_type)); + + EXPECT_THAT(feature_info.needs_document_store_rebuild(), + Eq(rebuild_result.needs_document_store_derived_files_rebuild)); + EXPECT_THAT(feature_info.needs_schema_store_rebuild(), + Eq(rebuild_result.needs_schema_store_derived_files_rebuild)); + EXPECT_THAT(feature_info.needs_term_index_rebuild(), + Eq(rebuild_result.needs_term_index_rebuild)); + EXPECT_THAT(feature_info.needs_integer_index_rebuild(), + Eq(rebuild_result.needs_integer_index_rebuild)); + EXPECT_THAT(feature_info.needs_qualified_id_join_index_rebuild(), + Eq(rebuild_result.needs_qualified_id_join_index_rebuild)); +} + +INSTANTIATE_TEST_SUITE_P( + VersionUtilFeatureProtoTest, VersionUtilFeatureProtoTest, + testing::Values( + IcingSearchEngineFeatureInfoProto::UNKNOWN, + IcingSearchEngineFeatureInfoProto::FEATURE_HAS_PROPERTY_OPERATOR)); + } // namespace } // namespace version_util diff --git a/icing/icing-search-engine.cc b/icing/icing-search-engine.cc index 72be4e9..72e744b 100644 --- a/icing/icing-search-engine.cc +++ b/icing/icing-search-engine.cc @@ -14,7 +14,10 @@ #include "icing/icing-search-engine.h" +#include <algorithm> +#include <cstddef> #include <cstdint> +#include <functional> #include <memory> #include <string> #include <string_view> @@ -41,12 +44,16 @@ #include "icing/index/iterator/doc-hit-info-iterator.h" #include "icing/index/numeric/integer-index.h" #include "icing/index/term-indexing-handler.h" +#include "icing/index/term-metadata.h" +#include "icing/jni/jni-cache.h" +#include "icing/join/join-children-fetcher.h" #include "icing/join/join-processor.h" #include "icing/join/qualified-id-join-index-impl-v1.h" #include "icing/join/qualified-id-join-index-impl-v2.h" #include "icing/join/qualified-id-join-index.h" #include "icing/join/qualified-id-join-indexing-handler.h" #include "icing/legacy/index/icing-filesystem.h" +#include "icing/performance-configuration.h" #include "icing/portable/endian.h" #include "icing/proto/debug.pb.h" #include "icing/proto/document.pb.h" @@ -73,9 +80,8 @@ #include "icing/result/projector.h" #include "icing/result/result-adjustment-info.h" #include "icing/result/result-retriever-v2.h" +#include "icing/result/result-state-manager.h" #include "icing/schema/schema-store.h" -#include "icing/schema/schema-util.h" -#include "icing/schema/section.h" #include "icing/scoring/advanced_scoring/score-expression.h" #include "icing/scoring/priority-queue-scored-document-hits-ranker.h" #include "icing/scoring/scored-document-hit.h" @@ -84,11 +90,8 @@ #include "icing/store/document-id.h" #include "icing/store/document-store.h" #include "icing/tokenization/language-segmenter-factory.h" -#include "icing/tokenization/language-segmenter.h" #include "icing/transform/normalizer-factory.h" -#include "icing/transform/normalizer.h" #include "icing/util/clock.h" -#include "icing/util/crc32.h" #include "icing/util/data-loss.h" #include "icing/util/logging.h" #include "icing/util/status-macros.h" @@ -100,7 +103,6 @@ namespace lib { namespace { -constexpr std::string_view kVersionFilename = "version"; constexpr std::string_view kDocumentSubfolderName = "document_dir"; constexpr std::string_view kIndexSubfolderName = "index_dir"; constexpr std::string_view kIntegerIndexSubfolderName = "integer_index_dir"; @@ -253,12 +255,6 @@ CreateQualifiedIdJoinIndex(const Filesystem& filesystem, } } -// Version file is a single file under base_dir containing version info of the -// existing data. -std::string MakeVersionFilePath(const std::string& base_dir) { - return absl_ports::StrCat(base_dir, "/", kVersionFilename); -} - // Document store files are in a standalone subfolder for easier file // management. We can delete and recreate the subfolder and not touch/affect // anything else. @@ -604,32 +600,49 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers( return status; } - // Read version file and determine the state change. - const std::string version_filepath = MakeVersionFilePath(options_.base_dir()); + // Do version and flags compatibility check + // Read version file, determine the state change and rebuild derived files if + // needed. const std::string index_dir = MakeIndexDirectoryPath(options_.base_dir()); ICING_ASSIGN_OR_RETURN( - version_util::VersionInfo version_info, - version_util::ReadVersion(*filesystem_, version_filepath, index_dir)); + IcingSearchEngineVersionProto stored_version_proto, + version_util::ReadVersion( + *filesystem_, /*version_file_dir=*/options_.base_dir(), index_dir)); + version_util::VersionInfo stored_version_info = + version_util::GetVersionInfoFromProto(stored_version_proto); version_util::StateChange version_state_change = - version_util::GetVersionStateChange(version_info); + version_util::GetVersionStateChange(stored_version_info); + + // Construct icing's current version proto based on the current code version + IcingSearchEngineVersionProto current_version_proto; + current_version_proto.set_version(version_util::kVersion); + current_version_proto.set_max_version( + std::max(stored_version_info.max_version, version_util::kVersion)); + version_util::AddEnabledFeatures(options_, ¤t_version_proto); + + // Step 1: If versions are incompatible, migrate schema according to the + // version state change. if (version_state_change != version_util::StateChange::kCompatible) { - // Step 1: migrate schema according to the version state change. ICING_RETURN_IF_ERROR(SchemaStore::MigrateSchema( filesystem_.get(), MakeSchemaDirectoryPath(options_.base_dir()), version_state_change, version_util::kVersion)); + } - // Step 2: discard all derived data if needed rebuild. - if (version_util::ShouldRebuildDerivedFiles(version_info)) { - ICING_RETURN_IF_ERROR(DiscardDerivedFiles()); - } + // Step 2: Discard derived files that need to be rebuilt + version_util::DerivedFilesRebuildResult required_derived_files_rebuild = + version_util::CalculateRequiredDerivedFilesRebuild(stored_version_proto, + current_version_proto); + ICING_RETURN_IF_ERROR(DiscardDerivedFiles(required_derived_files_rebuild)); - // Step 3: update version file - version_util::VersionInfo new_version_info( - version_util::kVersion, - std::max(version_info.max_version, version_util::kVersion)); - ICING_RETURN_IF_ERROR(version_util::WriteVersion( - *filesystem_, version_filepath, new_version_info)); - } + // Step 3: update version files. We need to update both the V1 and V2 + // version files. + ICING_RETURN_IF_ERROR(version_util::WriteV1Version( + *filesystem_, /*version_file_dir=*/options_.base_dir(), + version_util::GetVersionInfoFromProto(current_version_proto))); + ICING_RETURN_IF_ERROR(version_util::WriteV2Version( + *filesystem_, /*version_file_dir=*/options_.base_dir(), + std::make_unique<IcingSearchEngineVersionProto>( + std::move(current_version_proto)))); ICING_RETURN_IF_ERROR(InitializeSchemaStore(initialize_stats)); @@ -688,10 +701,9 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers( // We're going to need to build the index from scratch. So just delete its // directory now. // Discard index directory and instantiate a new one. - Index::Options index_options( - index_dir, options_.index_merge_size(), - options_.lite_index_sort_at_indexing(), options_.lite_index_sort_size(), - options_.build_property_existence_metadata_hits()); + Index::Options index_options(index_dir, options_.index_merge_size(), + options_.lite_index_sort_at_indexing(), + options_.lite_index_sort_size()); if (!filesystem_->DeleteDirectoryRecursively(index_dir.c_str()) || !filesystem_->CreateDirectoryRecursively(index_dir.c_str())) { return absl_ports::InternalError( @@ -776,6 +788,31 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers( if (!index_init_status.ok() && !absl_ports::IsDataLoss(index_init_status)) { return index_init_status; } + + // Set recovery cause to FEATURE_FLAG_CHANGED according to the calculated + // required_derived_files_rebuild + if (required_derived_files_rebuild + .needs_document_store_derived_files_rebuild) { + initialize_stats->set_document_store_recovery_cause( + InitializeStatsProto::FEATURE_FLAG_CHANGED); + } + if (required_derived_files_rebuild + .needs_schema_store_derived_files_rebuild) { + initialize_stats->set_schema_store_recovery_cause( + InitializeStatsProto::FEATURE_FLAG_CHANGED); + } + if (required_derived_files_rebuild.needs_term_index_rebuild) { + initialize_stats->set_index_restoration_cause( + InitializeStatsProto::FEATURE_FLAG_CHANGED); + } + if (required_derived_files_rebuild.needs_integer_index_rebuild) { + initialize_stats->set_integer_index_restoration_cause( + InitializeStatsProto::FEATURE_FLAG_CHANGED); + } + if (required_derived_files_rebuild.needs_qualified_id_join_index_rebuild) { + initialize_stats->set_qualified_id_join_index_restoration_cause( + InitializeStatsProto::FEATURE_FLAG_CHANGED); + } } if (status.ok()) { @@ -842,10 +879,9 @@ libtextclassifier3::Status IcingSearchEngine::InitializeIndex( return absl_ports::InternalError( absl_ports::StrCat("Could not create directory: ", index_dir)); } - Index::Options index_options( - index_dir, options_.index_merge_size(), - options_.lite_index_sort_at_indexing(), options_.lite_index_sort_size(), - options_.build_property_existence_metadata_hits()); + Index::Options index_options(index_dir, options_.index_merge_size(), + options_.lite_index_sort_at_indexing(), + options_.lite_index_sort_size()); // Term index InitializeStatsProto::RecoveryCause index_recovery_cause; @@ -1480,7 +1516,8 @@ DeleteByQueryResultProto IcingSearchEngine::DeleteByQuery( // Gets unordered results from query processor auto query_processor_or = QueryProcessor::Create( index_.get(), integer_index_.get(), language_segmenter_.get(), - normalizer_.get(), document_store_.get(), schema_store_.get()); + normalizer_.get(), document_store_.get(), schema_store_.get(), + clock_.get()); if (!query_processor_or.ok()) { TransformStatus(query_processor_or.status(), result_status); delete_stats->set_parse_query_latency_ms( @@ -1979,6 +2016,7 @@ SearchResultProto IcingSearchEngine::InternalSearch( result_status->set_message("IcingSearchEngine has not been initialized!"); return result_proto; } + index_->PublishQueryStats(query_stats); libtextclassifier3::Status status = ValidateResultSpec(document_store_.get(), result_spec); @@ -2183,7 +2221,8 @@ IcingSearchEngine::QueryScoringResults IcingSearchEngine::ProcessQueryAndScore( // Gets unordered results from query processor auto query_processor_or = QueryProcessor::Create( index_.get(), integer_index_.get(), language_segmenter_.get(), - normalizer_.get(), document_store_.get(), schema_store_.get()); + normalizer_.get(), document_store_.get(), schema_store_.get(), + clock_.get()); if (!query_processor_or.ok()) { search_stats->set_parse_query_latency_ms( component_timer->GetElapsedMilliseconds()); @@ -2198,7 +2237,8 @@ IcingSearchEngine::QueryScoringResults IcingSearchEngine::ProcessQueryAndScore( libtextclassifier3::StatusOr<QueryResults> query_results_or; if (ranking_strategy_or.ok()) { query_results_or = query_processor->ParseSearch( - search_spec, ranking_strategy_or.ValueOrDie(), current_time_ms); + search_spec, ranking_strategy_or.ValueOrDie(), current_time_ms, + search_stats); } else { query_results_or = ranking_strategy_or.status(); } @@ -2702,7 +2742,12 @@ IcingSearchEngine::TruncateIndicesTo(DocumentId last_stored_document_id) { qualified_id_join_index_needed_restoration); } -libtextclassifier3::Status IcingSearchEngine::DiscardDerivedFiles() { +libtextclassifier3::Status IcingSearchEngine::DiscardDerivedFiles( + const version_util::DerivedFilesRebuildResult& rebuild_result) { + if (!rebuild_result.IsRebuildNeeded()) { + return libtextclassifier3::Status::OK; + } + if (schema_store_ != nullptr || document_store_ != nullptr || index_ != nullptr || integer_index_ != nullptr || qualified_id_join_index_ != nullptr) { @@ -2711,30 +2756,40 @@ libtextclassifier3::Status IcingSearchEngine::DiscardDerivedFiles() { } // Schema store - ICING_RETURN_IF_ERROR( - SchemaStore::DiscardDerivedFiles(filesystem_.get(), options_.base_dir())); + if (rebuild_result.needs_schema_store_derived_files_rebuild) { + ICING_RETURN_IF_ERROR(SchemaStore::DiscardDerivedFiles( + filesystem_.get(), options_.base_dir())); + } // Document store - ICING_RETURN_IF_ERROR(DocumentStore::DiscardDerivedFiles( - filesystem_.get(), options_.base_dir())); + if (rebuild_result.needs_document_store_derived_files_rebuild) { + ICING_RETURN_IF_ERROR(DocumentStore::DiscardDerivedFiles( + filesystem_.get(), options_.base_dir())); + } // Term index - if (!filesystem_->DeleteDirectoryRecursively( - MakeIndexDirectoryPath(options_.base_dir()).c_str())) { - return absl_ports::InternalError("Failed to discard index"); + if (rebuild_result.needs_term_index_rebuild) { + if (!filesystem_->DeleteDirectoryRecursively( + MakeIndexDirectoryPath(options_.base_dir()).c_str())) { + return absl_ports::InternalError("Failed to discard index"); + } } // Integer index - if (!filesystem_->DeleteDirectoryRecursively( - MakeIntegerIndexWorkingPath(options_.base_dir()).c_str())) { - return absl_ports::InternalError("Failed to discard integer index"); + if (rebuild_result.needs_integer_index_rebuild) { + if (!filesystem_->DeleteDirectoryRecursively( + MakeIntegerIndexWorkingPath(options_.base_dir()).c_str())) { + return absl_ports::InternalError("Failed to discard integer index"); + } } // Qualified id join index - if (!filesystem_->DeleteDirectoryRecursively( - MakeQualifiedIdJoinIndexWorkingPath(options_.base_dir()).c_str())) { - return absl_ports::InternalError( - "Failed to discard qualified id join index"); + if (rebuild_result.needs_qualified_id_join_index_rebuild) { + if (!filesystem_->DeleteDirectoryRecursively( + MakeQualifiedIdJoinIndexWorkingPath(options_.base_dir()).c_str())) { + return absl_ports::InternalError( + "Failed to discard qualified id join index"); + } } return libtextclassifier3::Status::OK; @@ -2816,7 +2871,8 @@ SuggestionResponse IcingSearchEngine::SearchSuggestions( // Create the suggestion processor. auto suggestion_processor_or = SuggestionProcessor::Create( index_.get(), integer_index_.get(), language_segmenter_.get(), - normalizer_.get(), document_store_.get(), schema_store_.get()); + normalizer_.get(), document_store_.get(), schema_store_.get(), + clock_.get()); if (!suggestion_processor_or.ok()) { TransformStatus(suggestion_processor_or.status(), response_status); return response; diff --git a/icing/icing-search-engine.h b/icing/icing-search-engine.h index d316350..b9df95d 100644 --- a/icing/icing-search-engine.h +++ b/icing/icing-search-engine.h @@ -17,7 +17,6 @@ #include <cstdint> #include <memory> -#include <string> #include <string_view> #include <utility> #include <vector> @@ -27,6 +26,7 @@ #include "icing/absl_ports/mutex.h" #include "icing/absl_ports/thread_annotations.h" #include "icing/file/filesystem.h" +#include "icing/file/version-util.h" #include "icing/index/data-indexing-handler.h" #include "icing/index/index.h" #include "icing/index/numeric/numeric-index.h" @@ -51,11 +51,11 @@ #include "icing/result/result-state-manager.h" #include "icing/schema/schema-store.h" #include "icing/scoring/scored-document-hit.h" +#include "icing/store/document-id.h" #include "icing/store/document-store.h" #include "icing/tokenization/language-segmenter.h" #include "icing/transform/normalizer.h" #include "icing/util/clock.h" -#include "icing/util/crc32.h" namespace icing { namespace lib { @@ -578,8 +578,8 @@ class IcingSearchEngine { // read-lock, allowing for parallel non-exclusive operations. // This implementation is used if search_spec.use_read_only_search is true. SearchResultProto SearchLockedShared(const SearchSpecProto& search_spec, - const ScoringSpecProto& scoring_spec, - const ResultSpecProto& result_spec) + const ScoringSpecProto& scoring_spec, + const ResultSpecProto& result_spec) ICING_LOCKS_EXCLUDED(mutex_); // Implementation of IcingSearchEngine::Search that requires the overall @@ -587,8 +587,8 @@ class IcingSearchEngine { // this version is used. // This implementation is used if search_spec.use_read_only_search is false. SearchResultProto SearchLockedExclusive(const SearchSpecProto& search_spec, - const ScoringSpecProto& scoring_spec, - const ResultSpecProto& result_spec) + const ScoringSpecProto& scoring_spec, + const ResultSpecProto& result_spec) ICING_LOCKS_EXCLUDED(mutex_); // Helper method for the actual work to Search. We need this separate @@ -641,13 +641,14 @@ class IcingSearchEngine { libtextclassifier3::Status CheckConsistency() ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_); - // Discards all derived data. + // Discards derived data that requires rebuild based on rebuild_result. // // Returns: // OK on success // FAILED_PRECONDITION_ERROR if those instances are valid (non nullptr) // INTERNAL_ERROR on any I/O errors - libtextclassifier3::Status DiscardDerivedFiles() + libtextclassifier3::Status DiscardDerivedFiles( + const version_util::DerivedFilesRebuildResult& rebuild_result) ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_); // Repopulates derived data off our ground truths. diff --git a/icing/icing-search-engine_initialization_test.cc b/icing/icing-search-engine_initialization_test.cc index 122e4af..d6316d4 100644 --- a/icing/icing-search-engine_initialization_test.cc +++ b/icing/icing-search-engine_initialization_test.cc @@ -19,6 +19,7 @@ #include <string> #include <string_view> #include <tuple> +#include <unordered_set> #include <utility> #include <vector> @@ -204,7 +205,7 @@ class IcingSearchEngineInitializationTest : public testing::Test { // Non-zero value so we don't override it to be the current time constexpr int64_t kDefaultCreationTimestampMs = 1575492852000; -std::string GetVersionFilename() { return GetTestBaseDir() + "/version"; } +std::string GetVersionFileDir() { return GetTestBaseDir(); } std::string GetDocumentDir() { return GetTestBaseDir() + "/document_dir"; } @@ -5566,12 +5567,26 @@ INSTANTIATE_TEST_SUITE_P(IcingSearchEngineInitializationSwitchJoinIndexTest, IcingSearchEngineInitializationSwitchJoinIndexTest, testing::Values(true, false)); +struct IcingSearchEngineInitializationVersionChangeTestParam { + version_util::VersionInfo existing_version_info; + std::unordered_set<IcingSearchEngineFeatureInfoProto::FlaggedFeatureType> + existing_enabled_features; + + explicit IcingSearchEngineInitializationVersionChangeTestParam( + version_util::VersionInfo version_info_in, + std::unordered_set<IcingSearchEngineFeatureInfoProto::FlaggedFeatureType> + existing_enabled_features_in) + : existing_version_info(std::move(version_info_in)), + existing_enabled_features(std::move(existing_enabled_features_in)) {} +}; + class IcingSearchEngineInitializationVersionChangeTest : public IcingSearchEngineInitializationTest, - public ::testing::WithParamInterface<version_util::VersionInfo> {}; + public ::testing::WithParamInterface< + IcingSearchEngineInitializationVersionChangeTestParam> {}; TEST_P(IcingSearchEngineInitializationVersionChangeTest, - RecoverFromVersionChange) { + RecoverFromVersionChangeOrUnknownFlagChange) { // TODO(b/280697513): test backup schema migration // Test the following scenario: version change. All derived data should be // rebuilt. We test this by manually adding some invalid derived data and @@ -5725,10 +5740,27 @@ TEST_P(IcingSearchEngineInitializationVersionChangeTest, std::move(incorrect_message))); ICING_ASSERT_OK(index_processor.IndexDocument(tokenized_document, doc_id)); - // Change existing data's version file - const version_util::VersionInfo& existing_version_info = GetParam(); - ICING_ASSERT_OK(version_util::WriteVersion( - *filesystem(), GetVersionFilename(), existing_version_info)); + // Rewrite existing data's version files + ICING_ASSERT_OK( + version_util::DiscardVersionFiles(*filesystem(), GetVersionFileDir())); + const version_util::VersionInfo& existing_version_info = + GetParam().existing_version_info; + ICING_ASSERT_OK(version_util::WriteV1Version( + *filesystem(), GetVersionFileDir(), existing_version_info)); + + if (existing_version_info.version >= version_util::kFirstV2Version) { + IcingSearchEngineVersionProto version_proto; + version_proto.set_version(existing_version_info.version); + version_proto.set_max_version(existing_version_info.max_version); + auto* enabled_features = version_proto.mutable_enabled_features(); + for (const auto& feature : GetParam().existing_enabled_features) { + enabled_features->Add(version_util::GetFeatureInfoProto(feature)); + } + version_util::WriteV2Version( + *filesystem(), GetVersionFileDir(), + std::make_unique<IcingSearchEngineVersionProto>( + std::move(version_proto))); + } } // Mock filesystem to observe and check the behavior of all indices. @@ -5738,28 +5770,48 @@ TEST_P(IcingSearchEngineInitializationVersionChangeTest, std::make_unique<FakeClock>(), GetTestJniCache()); InitializeResultProto initialize_result = icing.Initialize(); EXPECT_THAT(initialize_result.status(), ProtoIsOk()); - // Index Restoration should be triggered here. Incorrect data should be - // deleted and correct data of message should be indexed. + + // Derived files restoration should be triggered here. Incorrect data should + // be deleted and correct data of message should be indexed. + // Here we're recovering from a version change or a flag change that requires + // rebuilding all derived files. + // + // TODO(b/314816301): test individual derived files rebuilds due to change + // in trunk stable feature flags. + // i.e. Test individual rebuilding for each of: + // - document store + // - schema store + // - term index + // - numeric index + // - qualified id join index + InitializeStatsProto::RecoveryCause expected_recovery_cause = + GetParam().existing_version_info.version != version_util::kVersion + ? InitializeStatsProto::VERSION_CHANGED + : InitializeStatsProto::FEATURE_FLAG_CHANGED; EXPECT_THAT( initialize_result.initialize_stats().document_store_recovery_cause(), - Eq(InitializeStatsProto::VERSION_CHANGED)); + Eq(expected_recovery_cause)); + EXPECT_THAT( + initialize_result.initialize_stats().schema_store_recovery_cause(), + Eq(expected_recovery_cause)); EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(), - Eq(InitializeStatsProto::VERSION_CHANGED)); + Eq(expected_recovery_cause)); EXPECT_THAT( initialize_result.initialize_stats().integer_index_restoration_cause(), - Eq(InitializeStatsProto::VERSION_CHANGED)); + Eq(expected_recovery_cause)); EXPECT_THAT(initialize_result.initialize_stats() .qualified_id_join_index_restoration_cause(), - Eq(InitializeStatsProto::VERSION_CHANGED)); + Eq(expected_recovery_cause)); // Manually check version file ICING_ASSERT_OK_AND_ASSIGN( - version_util::VersionInfo version_info_after_init, - version_util::ReadVersion(*filesystem(), GetVersionFilename(), + IcingSearchEngineVersionProto version_proto_after_init, + version_util::ReadVersion(*filesystem(), GetVersionFileDir(), GetIndexDir())); - EXPECT_THAT(version_info_after_init.version, Eq(version_util::kVersion)); - EXPECT_THAT(version_info_after_init.max_version, - Eq(std::max(version_util::kVersion, GetParam().max_version))); + EXPECT_THAT(version_proto_after_init.version(), Eq(version_util::kVersion)); + EXPECT_THAT(version_proto_after_init.max_version(), + Eq(std::max(version_util::kVersion, + GetParam().existing_version_info.max_version))); SearchResultProto expected_search_result_proto; expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); @@ -5836,9 +5888,11 @@ INSTANTIATE_TEST_SUITE_P( testing::Values( // Manually change existing data set's version to kVersion + 1. When // initializing, it will detect "rollback". - version_util::VersionInfo( - /*version_in=*/version_util::kVersion + 1, - /*max_version_in=*/version_util::kVersion + 1), + IcingSearchEngineInitializationVersionChangeTestParam( + version_util::VersionInfo( + /*version_in=*/version_util::kVersion + 1, + /*max_version_in=*/version_util::kVersion + 1), + /*existing_enabled_features_in=*/{}), // Currently we don't have any "upgrade" that requires rebuild derived // files, so skip this case until we have a case for it. @@ -5846,27 +5900,45 @@ INSTANTIATE_TEST_SUITE_P( // Manually change existing data set's version to kVersion - 1 and // max_version to kVersion. When initializing, it will detect "roll // forward". - version_util::VersionInfo( - /*version_in=*/version_util::kVersion - 1, - /*max_version_in=*/version_util::kVersion), + IcingSearchEngineInitializationVersionChangeTestParam( + version_util::VersionInfo( + /*version_in=*/version_util::kVersion - 1, + /*max_version_in=*/version_util::kVersion), + /*existing_enabled_features_in=*/{}), // Manually change existing data set's version to 0 and max_version to // 0. When initializing, it will detect "version 0 upgrade". // // Note: in reality, version 0 won't be written into version file, but // it is ok here since it is hack to simulate version 0 situation. - version_util::VersionInfo( - /*version_in=*/0, - /*max_version_in=*/0), + IcingSearchEngineInitializationVersionChangeTestParam( + version_util::VersionInfo( + /*version_in=*/0, + /*max_version_in=*/0), + /*existing_enabled_features_in=*/{}), // Manually change existing data set's version to 0 and max_version to // kVersion. When initializing, it will detect "version 0 roll forward". // // Note: in reality, version 0 won't be written into version file, but // it is ok here since it is hack to simulate version 0 situation. - version_util::VersionInfo( - /*version_in=*/0, - /*max_version_in=*/version_util::kVersion))); + IcingSearchEngineInitializationVersionChangeTestParam( + version_util::VersionInfo( + /*version_in=*/0, + /*max_version_in=*/version_util::kVersion), + /*existing_enabled_features_in=*/{}), + + // Manually write an unknown feature in the version proto while keeping + // version the same as kVersion. + // + // Result: this will rebuild all derived files with restoration cause + // FEATURE_FLAG_CHANGED + IcingSearchEngineInitializationVersionChangeTestParam( + version_util::VersionInfo( + /*version_in=*/version_util::kVersion, + /*max_version_in=*/version_util::kVersion), + /*existing_enabled_features_in=*/{ + IcingSearchEngineFeatureInfoProto::UNKNOWN}))); class IcingSearchEngineInitializationChangePropertyExistenceHitsFlagTest : public IcingSearchEngineInitializationTest, @@ -5962,7 +6034,7 @@ TEST_P(IcingSearchEngineInitializationChangePropertyExistenceHitsFlagTest, ASSERT_THAT(initialize_result.status(), ProtoIsOk()); // Ensure that the term index is rebuilt if the flag is changed. EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(), - Eq(flag_changed ? InitializeStatsProto::IO_ERROR + Eq(flag_changed ? InitializeStatsProto::FEATURE_FLAG_CHANGED : InitializeStatsProto::NONE)); EXPECT_THAT( initialize_result.initialize_stats().integer_index_restoration_cause(), diff --git a/icing/icing-search-engine_search_test.cc b/icing/icing-search-engine_search_test.cc index 21512c6..22f42e7 100644 --- a/icing/icing-search-engine_search_test.cc +++ b/icing/icing-search-engine_search_test.cc @@ -4496,6 +4496,11 @@ TEST_P(IcingSearchEngineSearchTest, QueryStatsProtoTest) { exp_stats.set_document_retrieval_latency_ms(5); exp_stats.set_lock_acquisition_latency_ms(5); exp_stats.set_num_joined_results_returned_current_page(0); + // document4, document5's hits will remain in the lite index (# of hits: 4). + exp_stats.set_lite_index_hit_buffer_byte_size(4 * + sizeof(TermIdHitPair::Value)); + exp_stats.set_lite_index_hit_buffer_unsorted_byte_size( + 4 * sizeof(TermIdHitPair::Value)); QueryStatsProto::SearchStats* exp_parent_search_stats = exp_stats.mutable_parent_search_stats(); @@ -4511,6 +4516,14 @@ TEST_P(IcingSearchEngineSearchTest, QueryStatsProtoTest) { exp_parent_search_stats->set_num_fetched_hits_lite_index(2); exp_parent_search_stats->set_num_fetched_hits_main_index(3); exp_parent_search_stats->set_num_fetched_hits_integer_index(0); + if (GetParam() == + SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) { + exp_parent_search_stats->set_query_processor_lexer_extract_token_latency_ms( + 5); + exp_parent_search_stats + ->set_query_processor_parser_consume_query_latency_ms(5); + exp_parent_search_stats->set_query_processor_query_visitor_latency_ms(5); + } EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats)); @@ -4769,6 +4782,11 @@ TEST_P(IcingSearchEngineSearchTest, JoinQueryStatsProtoTest) { exp_stats.set_num_joined_results_returned_current_page(3); exp_stats.set_join_latency_ms(5); exp_stats.set_is_join_query(true); + // person3, email4's hits will remain in the lite index (# of hits: 5). + exp_stats.set_lite_index_hit_buffer_byte_size(5 * + sizeof(TermIdHitPair::Value)); + exp_stats.set_lite_index_hit_buffer_unsorted_byte_size( + 5 * sizeof(TermIdHitPair::Value)); QueryStatsProto::SearchStats* exp_parent_search_stats = exp_stats.mutable_parent_search_stats(); @@ -4784,6 +4802,14 @@ TEST_P(IcingSearchEngineSearchTest, JoinQueryStatsProtoTest) { exp_parent_search_stats->set_num_fetched_hits_lite_index(1); exp_parent_search_stats->set_num_fetched_hits_main_index(2); exp_parent_search_stats->set_num_fetched_hits_integer_index(0); + if (GetParam() == + SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) { + exp_parent_search_stats->set_query_processor_lexer_extract_token_latency_ms( + 5); + exp_parent_search_stats + ->set_query_processor_parser_consume_query_latency_ms(5); + exp_parent_search_stats->set_query_processor_query_visitor_latency_ms(5); + } QueryStatsProto::SearchStats* exp_child_search_stats = exp_stats.mutable_child_search_stats(); @@ -4799,6 +4825,14 @@ TEST_P(IcingSearchEngineSearchTest, JoinQueryStatsProtoTest) { exp_child_search_stats->set_num_fetched_hits_lite_index(1); exp_child_search_stats->set_num_fetched_hits_main_index(3); exp_child_search_stats->set_num_fetched_hits_integer_index(0); + if (GetParam() == + SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) { + exp_child_search_stats->set_query_processor_lexer_extract_token_latency_ms( + 5); + exp_child_search_stats->set_query_processor_parser_consume_query_latency_ms( + 5); + exp_child_search_stats->set_query_processor_query_visitor_latency_ms(5); + } EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats)); @@ -6638,6 +6672,8 @@ TEST_F(IcingSearchEngineSearchTest, NumericFilterQueryStatsProtoTest) { exp_stats.set_document_retrieval_latency_ms(5); exp_stats.set_lock_acquisition_latency_ms(5); exp_stats.set_num_joined_results_returned_current_page(0); + exp_stats.set_lite_index_hit_buffer_byte_size(0); + exp_stats.set_lite_index_hit_buffer_unsorted_byte_size(0); QueryStatsProto::SearchStats* exp_parent_search_stats = exp_stats.mutable_parent_search_stats(); @@ -6656,6 +6692,11 @@ TEST_F(IcingSearchEngineSearchTest, NumericFilterQueryStatsProtoTest) { // Since we will inspect 1 bucket from "price" in integer index and it // contains 3 hits, we will fetch 3 hits (but filter out one of them). exp_parent_search_stats->set_num_fetched_hits_integer_index(3); + exp_parent_search_stats->set_query_processor_lexer_extract_token_latency_ms( + 5); + exp_parent_search_stats->set_query_processor_parser_consume_query_latency_ms( + 5); + exp_parent_search_stats->set_query_processor_query_visitor_latency_ms(5); EXPECT_THAT(results.query_stats(), EqualsProto(exp_stats)); } diff --git a/icing/index/index.cc b/icing/index/index.cc index 98058be..31dcc7e 100644 --- a/icing/index/index.cc +++ b/icing/index/index.cc @@ -65,10 +65,10 @@ libtextclassifier3::StatusOr<LiteIndex::Options> CreateLiteIndexOptions( "Requested hit buffer size %d is too large.", options.index_merge_size)); } - return LiteIndex::Options( - options.base_dir + "/idx/lite.", options.index_merge_size, - options.lite_index_sort_at_indexing, options.lite_index_sort_size, - options.include_property_existence_metadata_hits); + return LiteIndex::Options(options.base_dir + "/idx/lite.", + options.index_merge_size, + options.lite_index_sort_at_indexing, + options.lite_index_sort_size); } std::string MakeMainIndexFilepath(const std::string& base_dir) { diff --git a/icing/index/index.h b/icing/index/index.h index a5d75c4..a09e28f 100644 --- a/icing/index/index.h +++ b/icing/index/index.h @@ -35,6 +35,7 @@ #include "icing/index/term-metadata.h" #include "icing/legacy/index/icing-filesystem.h" #include "icing/proto/debug.pb.h" +#include "icing/proto/logging.pb.h" #include "icing/proto/scoring.pb.h" #include "icing/proto/storage.pb.h" #include "icing/proto/term.pb.h" @@ -72,20 +73,16 @@ class Index { struct Options { explicit Options(const std::string& base_dir, uint32_t index_merge_size, bool lite_index_sort_at_indexing, - uint32_t lite_index_sort_size, - bool include_property_existence_metadata_hits = false) + uint32_t lite_index_sort_size) : base_dir(base_dir), index_merge_size(index_merge_size), lite_index_sort_at_indexing(lite_index_sort_at_indexing), - lite_index_sort_size(lite_index_sort_size), - include_property_existence_metadata_hits( - include_property_existence_metadata_hits) {} + lite_index_sort_size(lite_index_sort_size) {} std::string base_dir; int32_t index_merge_size; bool lite_index_sort_at_indexing; int32_t lite_index_sort_size; - bool include_property_existence_metadata_hits; }; // Creates an instance of Index in the directory pointed by file_dir. @@ -178,6 +175,13 @@ class Index { return debug_info; } + void PublishQueryStats(QueryStatsProto* query_stats) const { + query_stats->set_lite_index_hit_buffer_byte_size( + lite_index_->GetHitBufferByteSize()); + query_stats->set_lite_index_hit_buffer_unsorted_byte_size( + lite_index_->GetHitBufferUnsortedByteSize()); + } + // Returns the byte size of the all the elements held in the index. This // excludes the size of any internal metadata of the index, e.g. the index's // header. @@ -301,9 +305,7 @@ class Index { } // Sorts the LiteIndex HitBuffer. - void SortLiteIndex() { - lite_index_->SortHits(); - } + void SortLiteIndex() { lite_index_->SortHits(); } // Reduces internal file sizes by reclaiming space of deleted documents. // new_last_added_document_id will be used to update the last added document diff --git a/icing/index/index_test.cc b/icing/index/index_test.cc index 04a6bb7..ab550e1 100644 --- a/icing/index/index_test.cc +++ b/icing/index/index_test.cc @@ -33,9 +33,11 @@ #include "icing/file/filesystem.h" #include "icing/index/hit/doc-hit-info.h" #include "icing/index/iterator/doc-hit-info-iterator.h" +#include "icing/index/lite/term-id-hit-pair.h" #include "icing/legacy/index/icing-filesystem.h" #include "icing/legacy/index/icing-mock-filesystem.h" #include "icing/proto/debug.pb.h" +#include "icing/proto/logging.pb.h" #include "icing/proto/storage.pb.h" #include "icing/proto/term.pb.h" #include "icing/schema/section.h" @@ -2739,6 +2741,41 @@ TEST_F(IndexTest, IndexStorageInfoProto) { EXPECT_THAT(storage_info.min_free_fraction(), Ge(0)); } +TEST_F(IndexTest, PublishQueryStats) { + // Add two documents to the lite index without merging. + Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2, + TermMatchType::PREFIX, /*namespace_id=*/0); + ASSERT_THAT(edit.BufferTerm("foo"), IsOk()); + EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk()); + edit = index_->Edit(kDocumentId1, kSectionId2, TermMatchType::PREFIX, + /*namespace_id=*/0); + ASSERT_THAT(edit.BufferTerm("foul"), IsOk()); + EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk()); + + // Verify query stats. + QueryStatsProto query_stats1; + index_->PublishQueryStats(&query_stats1); + EXPECT_THAT(query_stats1.lite_index_hit_buffer_byte_size(), + Eq(2 * sizeof(TermIdHitPair::Value))); + EXPECT_THAT(query_stats1.lite_index_hit_buffer_unsorted_byte_size(), + Ge(2 * sizeof(TermIdHitPair::Value))); + + // Sort lite index. + index_->SortLiteIndex(); + QueryStatsProto query_stats2; + index_->PublishQueryStats(&query_stats2); + EXPECT_THAT(query_stats2.lite_index_hit_buffer_byte_size(), + Eq(2 * sizeof(TermIdHitPair::Value))); + EXPECT_THAT(query_stats2.lite_index_hit_buffer_unsorted_byte_size(), Eq(0)); + + // Merge lite index to main index. + ICING_ASSERT_OK(index_->Merge()); + QueryStatsProto query_stats3; + index_->PublishQueryStats(&query_stats3); + EXPECT_THAT(query_stats3.lite_index_hit_buffer_byte_size(), Eq(0)); + EXPECT_THAT(query_stats3.lite_index_hit_buffer_unsorted_byte_size(), Eq(0)); +} + } // namespace } // namespace lib diff --git a/icing/index/lite/lite-index-header.h b/icing/index/lite/lite-index-header.h index 75de8fa..6208cb6 100644 --- a/icing/index/lite/lite-index-header.h +++ b/icing/index/lite/lite-index-header.h @@ -53,14 +53,7 @@ class LiteIndex_Header { class LiteIndex_HeaderImpl : public LiteIndex_Header { public: struct HeaderData { - static uint32_t GetCurrentMagic( - bool include_property_existence_metadata_hits) { - if (!include_property_existence_metadata_hits) { - return 0x01c61418; - } else { - return 0x56e07d5b; - } - } + static const uint32_t kMagic = 0x01c61418; uint32_t lite_index_crc; uint32_t magic; @@ -76,15 +69,10 @@ class LiteIndex_HeaderImpl : public LiteIndex_Header { uint32_t searchable_end; }; - explicit LiteIndex_HeaderImpl(HeaderData *hdr, - bool include_property_existence_metadata_hits) - : hdr_(hdr), - include_property_existence_metadata_hits_( - include_property_existence_metadata_hits) {} + explicit LiteIndex_HeaderImpl(HeaderData *hdr) : hdr_(hdr) {} bool check_magic() const override { - return hdr_->magic == HeaderData::GetCurrentMagic( - include_property_existence_metadata_hits_); + return hdr_->magic == HeaderData::kMagic; } uint32_t lite_index_crc() const override { return hdr_->lite_index_crc; } @@ -111,8 +99,7 @@ class LiteIndex_HeaderImpl : public LiteIndex_Header { void Reset() override { hdr_->lite_index_crc = 0; - hdr_->magic = - HeaderData::GetCurrentMagic(include_property_existence_metadata_hits_); + hdr_->magic = HeaderData::kMagic; hdr_->last_added_docid = kInvalidDocumentId; hdr_->cur_size = 0; hdr_->searchable_end = 0; @@ -120,7 +107,6 @@ class LiteIndex_HeaderImpl : public LiteIndex_Header { private: HeaderData *hdr_; - bool include_property_existence_metadata_hits_; }; static_assert(24 == sizeof(LiteIndex_HeaderImpl::HeaderData), "sizeof(HeaderData) != 24"); diff --git a/icing/index/lite/lite-index-options.cc b/icing/index/lite/lite-index-options.cc index 7e6c076..b4810ea 100644 --- a/icing/index/lite/lite-index-options.cc +++ b/icing/index/lite/lite-index-options.cc @@ -69,16 +69,14 @@ IcingDynamicTrie::Options CalculateTrieOptions(uint32_t hit_buffer_size) { } // namespace -LiteIndexOptions::LiteIndexOptions( - const std::string& filename_base, uint32_t hit_buffer_want_merge_bytes, - bool hit_buffer_sort_at_indexing, uint32_t hit_buffer_sort_threshold_bytes, - bool include_property_existence_metadata_hits) +LiteIndexOptions::LiteIndexOptions(const std::string& filename_base, + uint32_t hit_buffer_want_merge_bytes, + bool hit_buffer_sort_at_indexing, + uint32_t hit_buffer_sort_threshold_bytes) : filename_base(filename_base), hit_buffer_want_merge_bytes(hit_buffer_want_merge_bytes), hit_buffer_sort_at_indexing(hit_buffer_sort_at_indexing), - hit_buffer_sort_threshold_bytes(hit_buffer_sort_threshold_bytes), - include_property_existence_metadata_hits( - include_property_existence_metadata_hits) { + hit_buffer_sort_threshold_bytes(hit_buffer_sort_threshold_bytes) { hit_buffer_size = CalculateHitBufferSize(hit_buffer_want_merge_bytes); lexicon_options = CalculateTrieOptions(hit_buffer_size); display_mappings_options = CalculateTrieOptions(hit_buffer_size); diff --git a/icing/index/lite/lite-index-options.h b/icing/index/lite/lite-index-options.h index 8b03449..5eae5c7 100644 --- a/icing/index/lite/lite-index-options.h +++ b/icing/index/lite/lite-index-options.h @@ -32,8 +32,7 @@ struct LiteIndexOptions { LiteIndexOptions(const std::string& filename_base, uint32_t hit_buffer_want_merge_bytes, bool hit_buffer_sort_at_indexing, - uint32_t hit_buffer_sort_threshold_bytes, - bool include_property_existence_metadata_hits = false); + uint32_t hit_buffer_sort_threshold_bytes); IcingDynamicTrie::Options lexicon_options; IcingDynamicTrie::Options display_mappings_options; @@ -43,7 +42,6 @@ struct LiteIndexOptions { uint32_t hit_buffer_size = 0; bool hit_buffer_sort_at_indexing = false; uint32_t hit_buffer_sort_threshold_bytes = 0; - bool include_property_existence_metadata_hits = false; }; } // namespace lib diff --git a/icing/index/lite/lite-index.cc b/icing/index/lite/lite-index.cc index 3f9cc93..4c5cb37 100644 --- a/icing/index/lite/lite-index.cc +++ b/icing/index/lite/lite-index.cc @@ -168,8 +168,7 @@ libtextclassifier3::Status LiteIndex::Initialize() { header_mmap_.Remap(hit_buffer_fd_.get(), kHeaderFileOffset, header_size()); header_ = std::make_unique<LiteIndex_HeaderImpl>( reinterpret_cast<LiteIndex_HeaderImpl::HeaderData*>( - header_mmap_.address()), - options_.include_property_existence_metadata_hits); + header_mmap_.address())); header_->Reset(); if (!hit_buffer_.Init(hit_buffer_fd_.get(), header_padded_size, true, @@ -184,8 +183,7 @@ libtextclassifier3::Status LiteIndex::Initialize() { header_mmap_.Remap(hit_buffer_fd_.get(), kHeaderFileOffset, header_size()); header_ = std::make_unique<LiteIndex_HeaderImpl>( reinterpret_cast<LiteIndex_HeaderImpl::HeaderData*>( - header_mmap_.address()), - options_.include_property_existence_metadata_hits); + header_mmap_.address())); if (!hit_buffer_.Init(hit_buffer_fd_.get(), header_padded_size, true, sizeof(TermIdHitPair::Value), header_->cur_size(), @@ -499,7 +497,7 @@ int LiteIndex::FetchHits( // When disabled, the entire HitBuffer should be sorted already and only // binary search is needed. if (options_.hit_buffer_sort_at_indexing) { - uint32_t unsorted_length = header_->cur_size() - header_->searchable_end(); + uint32_t unsorted_length = GetHitBufferUnsortedSizeImpl(); for (uint32_t i = 1; i <= unsorted_length; ++i) { TermIdHitPair term_id_hit_pair = array[header_->cur_size() - i]; if (term_id_hit_pair.term_id() == term_id) { @@ -607,13 +605,13 @@ IndexStorageInfoProto LiteIndex::GetStorageInfo( void LiteIndex::SortHitsImpl() { // Make searchable by sorting by hit buffer. - uint32_t sort_len = header_->cur_size() - header_->searchable_end(); - if (sort_len <= 0) { + uint32_t need_sort_len = GetHitBufferUnsortedSizeImpl(); + if (need_sort_len <= 0) { return; } IcingTimer timer; - auto* array_start = + TermIdHitPair::Value* array_start = hit_buffer_.GetMutableMem<TermIdHitPair::Value>(0, header_->cur_size()); TermIdHitPair::Value* sort_start = array_start + header_->searchable_end(); std::sort(sort_start, array_start + header_->cur_size()); @@ -625,7 +623,7 @@ void LiteIndex::SortHitsImpl() { std::inplace_merge(array_start, array_start + header_->searchable_end(), array_start + header_->cur_size()); } - ICING_VLOG(2) << "Lite index sort and merge " << sort_len << " into " + ICING_VLOG(2) << "Lite index sort and merge " << need_sort_len << " into " << header_->searchable_end() << " in " << timer.Elapsed() * 1000 << "ms"; diff --git a/icing/index/lite/lite-index.h b/icing/index/lite/lite-index.h index 288602a..45dc280 100644 --- a/icing/index/lite/lite-index.h +++ b/icing/index/lite/lite-index.h @@ -308,6 +308,24 @@ class LiteIndex { IndexStorageInfoProto GetStorageInfo(IndexStorageInfoProto storage_info) const ICING_LOCKS_EXCLUDED(mutex_); + // Returns the size of unsorted part of hit_buffer_. + uint32_t GetHitBufferByteSize() const ICING_LOCKS_EXCLUDED(mutex_) { + absl_ports::shared_lock l(&mutex_); + return size_impl() * sizeof(TermIdHitPair::Value); + } + + // Returns the size of unsorted part of hit_buffer_. + uint32_t GetHitBufferUnsortedSize() const ICING_LOCKS_EXCLUDED(mutex_) { + absl_ports::shared_lock l(&mutex_); + return GetHitBufferUnsortedSizeImpl(); + } + + // Returns the byte size of unsorted part of hit_buffer_. + uint64_t GetHitBufferUnsortedByteSize() const ICING_LOCKS_EXCLUDED(mutex_) { + absl_ports::shared_lock l(&mutex_); + return GetHitBufferUnsortedSizeImpl() * sizeof(TermIdHitPair::Value); + } + // Reduces internal file sizes by reclaiming space of deleted documents. // // This method also sets the last_added_docid of the index to @@ -377,13 +395,13 @@ class LiteIndex { bool NeedSortAtQuerying() const ICING_SHARED_LOCKS_REQUIRED(mutex_) { return HasUnsortedHitsExceedingSortThresholdImpl() || (!options_.hit_buffer_sort_at_indexing && - header_->cur_size() - header_->searchable_end() > 0); + GetHitBufferUnsortedSizeImpl() > 0); } // Non-locking implementation for HasUnsortedHitsExceedingSortThresholdImpl(). bool HasUnsortedHitsExceedingSortThresholdImpl() const ICING_SHARED_LOCKS_REQUIRED(mutex_) { - return header_->cur_size() - header_->searchable_end() >= + return GetHitBufferUnsortedSizeImpl() >= (options_.hit_buffer_sort_threshold_bytes / sizeof(TermIdHitPair::Value)); } @@ -408,6 +426,12 @@ class LiteIndex { std::vector<Hit::TermFrequencyArray>* term_frequency_out) const ICING_SHARED_LOCKS_REQUIRED(mutex_); + // Returns the size of unsorted part of hit_buffer_. + uint32_t GetHitBufferUnsortedSizeImpl() const + ICING_SHARED_LOCKS_REQUIRED(mutex_) { + return header_->cur_size() - header_->searchable_end(); + } + // File descriptor that points to where the header and hit buffer are written // to. ScopedFd hit_buffer_fd_ ICING_GUARDED_BY(mutex_); diff --git a/icing/index/lite/lite-index_test.cc b/icing/index/lite/lite-index_test.cc index 9811fa2..7ff006e 100644 --- a/icing/index/lite/lite-index_test.cc +++ b/icing/index/lite/lite-index_test.cc @@ -28,6 +28,7 @@ #include "icing/index/iterator/doc-hit-info-iterator.h" #include "icing/index/lite/doc-hit-info-iterator-term-lite.h" #include "icing/index/lite/lite-index-header.h" +#include "icing/index/lite/term-id-hit-pair.h" #include "icing/index/term-id-codec.h" #include "icing/legacy/index/icing-dynamic-trie.h" #include "icing/legacy/index/icing-filesystem.h" @@ -49,6 +50,7 @@ using ::testing::Eq; using ::testing::IsEmpty; using ::testing::IsFalse; using ::testing::IsTrue; +using ::testing::Pointee; using ::testing::SizeIs; class LiteIndexTest : public testing::Test { @@ -113,18 +115,12 @@ TEST_F(LiteIndexTest, ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, bar_hit0)); ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, bar_hit1)); + // Check the total size and unsorted size of the hit buffer. + EXPECT_THAT(lite_index, Pointee(SizeIs(4))); + EXPECT_THAT(lite_index->GetHitBufferUnsortedSize(), Eq(4)); // Check that unsorted hits does not exceed the sort threshold. EXPECT_THAT(lite_index->HasUnsortedHitsExceedingSortThreshold(), IsFalse()); - // Check that hits are unsorted. Persist the data and pread from - // LiteIndexHeader. - ASSERT_THAT(lite_index->PersistToDisk(), IsOk()); - LiteIndex_HeaderImpl::HeaderData header_data; - ASSERT_TRUE(filesystem_.PRead((lite_index_file_name + "hb").c_str(), - &header_data, sizeof(header_data), - LiteIndex::kHeaderFileOffset)); - EXPECT_THAT(header_data.cur_size - header_data.searchable_end, Eq(4)); - // Query the LiteIndex std::vector<DocHitInfo> hits1; lite_index->FetchHits( @@ -148,13 +144,10 @@ TEST_F(LiteIndexTest, // checker. EXPECT_THAT(hits2, IsEmpty()); - // Check that hits are sorted after querying LiteIndex. Persist the data and - // pread from LiteIndexHeader. - ASSERT_THAT(lite_index->PersistToDisk(), IsOk()); - ASSERT_TRUE(filesystem_.PRead((lite_index_file_name + "hb").c_str(), - &header_data, sizeof(header_data), - LiteIndex::kHeaderFileOffset)); - EXPECT_THAT(header_data.cur_size - header_data.searchable_end, Eq(0)); + // Check the total size and unsorted size of the hit buffer. Hits should be + // sorted after querying LiteIndex. + EXPECT_THAT(lite_index, Pointee(SizeIs(4))); + EXPECT_THAT(lite_index->GetHitBufferUnsortedSize(), Eq(0)); } TEST_F(LiteIndexTest, @@ -203,18 +196,12 @@ TEST_F(LiteIndexTest, ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, bar_hit0)); ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, bar_hit1)); + // Check the total size and unsorted size of the hit buffer. + EXPECT_THAT(lite_index, Pointee(SizeIs(4))); + EXPECT_THAT(lite_index->GetHitBufferUnsortedSize(), Eq(4)); // Check that unsorted hits does not exceed the sort threshold. EXPECT_THAT(lite_index->HasUnsortedHitsExceedingSortThreshold(), IsFalse()); - // Check that hits are unsorted. Persist the data and pread from - // LiteIndexHeader. - ASSERT_THAT(lite_index->PersistToDisk(), IsOk()); - LiteIndex_HeaderImpl::HeaderData header_data; - ASSERT_TRUE(filesystem_.PRead((lite_index_file_name + "hb").c_str(), - &header_data, sizeof(header_data), - LiteIndex::kHeaderFileOffset)); - EXPECT_THAT(header_data.cur_size - header_data.searchable_end, Eq(4)); - // Query the LiteIndex std::vector<DocHitInfo> hits1; lite_index->FetchHits( @@ -238,15 +225,11 @@ TEST_F(LiteIndexTest, // checker. EXPECT_THAT(hits2, IsEmpty()); - // Check that hits are still unsorted after querying LiteIndex because the - // HitBuffer unsorted size is still below the sort threshold, and we've - // enabled sort_at_indexing. - // Persist the data and performing a pread on LiteIndexHeader. - ASSERT_THAT(lite_index->PersistToDisk(), IsOk()); - ASSERT_TRUE(filesystem_.PRead((lite_index_file_name + "hb").c_str(), - &header_data, sizeof(header_data), - LiteIndex::kHeaderFileOffset)); - EXPECT_THAT(header_data.cur_size - header_data.searchable_end, Eq(4)); + // Check the total size and unsorted size of the hit buffer. Hits should be + // still unsorted after querying LiteIndex because the HitBuffer unsorted size + // is still below the sort threshold, and we've enabled sort_at_indexing. + EXPECT_THAT(lite_index, Pointee(SizeIs(4))); + EXPECT_THAT(lite_index->GetHitBufferUnsortedSize(), Eq(4)); } TEST_F( @@ -348,7 +331,10 @@ TEST_F( ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc2_hit3)); ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc3_hit0)); ICING_ASSERT_OK(lite_index->AddHit(baz_term_id, doc3_hit1)); - // Verify that the HitBuffer has not been sorted. + // Check the total size and unsorted size of the hit buffer. The HitBuffer has + // not been sorted. + EXPECT_THAT(lite_index, Pointee(SizeIs(14))); + EXPECT_THAT(lite_index->GetHitBufferUnsortedSize(), Eq(14)); EXPECT_THAT(lite_index->HasUnsortedHitsExceedingSortThreshold(), IsTrue()); // We now have the following in the hit buffer: @@ -400,7 +386,10 @@ TEST_F( EXPECT_THAT(hits3[1].document_id(), Eq(0)); EXPECT_THAT(hits3[1].hit_section_ids_mask(), Eq(0b1)); - // Check that the HitBuffer is sorted after the query call. + // Check the total size and unsorted size of the hit buffer. The HitBuffer + // should be sorted after the query call. + EXPECT_THAT(lite_index, Pointee(SizeIs(14))); + EXPECT_THAT(lite_index->GetHitBufferUnsortedSize(), Eq(0)); EXPECT_THAT(lite_index->HasUnsortedHitsExceedingSortThreshold(), IsFalse()); } @@ -511,13 +500,11 @@ TEST_F( // AddHit() itself, we need to invoke SortHits() manually. EXPECT_THAT(lite_index->HasUnsortedHitsExceedingSortThreshold(), IsTrue()); lite_index->SortHits(); - // Check that the HitBuffer is sorted. - ASSERT_THAT(lite_index->PersistToDisk(), IsOk()); - LiteIndex_HeaderImpl::HeaderData header_data; - ASSERT_TRUE(filesystem_.PRead((lite_index_file_name + "hb").c_str(), - &header_data, sizeof(header_data), - LiteIndex::kHeaderFileOffset)); - EXPECT_THAT(header_data.cur_size - header_data.searchable_end, Eq(0)); + // Check the total size and unsorted size of the hit buffer. The HitBuffer + // should be sorted after calling SortHits(). + EXPECT_THAT(lite_index, Pointee(SizeIs(8))); + EXPECT_THAT(lite_index->GetHitBufferUnsortedSize(), Eq(0)); + EXPECT_THAT(lite_index->HasUnsortedHitsExceedingSortThreshold(), IsFalse()); // Add 12 more hits so that sort threshold is exceeded again. ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc2_hit0)); @@ -536,6 +523,8 @@ TEST_F( // Adding these hits exceeds the sort threshold. However when sort_at_indexing // is enabled, sorting is done in the string-section-indexing-handler rather // than AddHit() itself. + EXPECT_THAT(lite_index, Pointee(SizeIs(20))); + EXPECT_THAT(lite_index->GetHitBufferUnsortedSize(), Eq(12)); EXPECT_THAT(lite_index->HasUnsortedHitsExceedingSortThreshold(), IsTrue()); // We now have the following in the hit buffer: @@ -589,15 +578,13 @@ TEST_F( EXPECT_THAT(hits3[1].document_id(), Eq(0)); EXPECT_THAT(hits3[1].hit_section_ids_mask(), Eq(0b1)); - // Check that the HitBuffer is sorted after the query call. FetchHits should - // sort before performing binary search if the HitBuffer unsorted size exceeds - // the sort threshold. Regardless of the sort_at_indexing config. + // Check the total size and unsorted size of the hit buffer. FetchHits should + // sort before performing search if the HitBuffer unsorted size exceeds the + // sort threshold, regardless of the sort_at_indexing config (to avoid + // sequential search on an extremely long unsorted tails). + EXPECT_THAT(lite_index, Pointee(SizeIs(20))); + EXPECT_THAT(lite_index->GetHitBufferUnsortedSize(), Eq(0)); EXPECT_THAT(lite_index->HasUnsortedHitsExceedingSortThreshold(), IsFalse()); - ASSERT_THAT(lite_index->PersistToDisk(), IsOk()); - ASSERT_TRUE(filesystem_.PRead((lite_index_file_name + "hb").c_str(), - &header_data, sizeof(header_data), - LiteIndex::kHeaderFileOffset)); - EXPECT_THAT(header_data.cur_size - header_data.searchable_end, Eq(0)); } TEST_F(LiteIndexTest, LiteIndexIterator) { @@ -736,6 +723,53 @@ TEST_F(LiteIndexTest, LiteIndexIterator_sortAtIndexingDisabled) { term, expected_section_ids_tf_map0))); } +TEST_F(LiteIndexTest, LiteIndexHitBufferSize) { + // Set up LiteIndex and TermIdCodec + std::string lite_index_file_name = index_dir_ + "/test_file.lite-idx.index"; + // At 64 bytes the unsorted tail can contain a max of 8 TermHitPairs. + LiteIndex::Options options(lite_index_file_name, + /*hit_buffer_want_merge_bytes=*/1024 * 1024, + /*hit_buffer_sort_at_indexing=*/true, + /*hit_buffer_sort_threshold_bytes=*/64); + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LiteIndex> lite_index, + LiteIndex::Create(options, &icing_filesystem_)); + ICING_ASSERT_OK_AND_ASSIGN( + term_id_codec_, + TermIdCodec::Create( + IcingDynamicTrie::max_value_index(IcingDynamicTrie::Options()), + IcingDynamicTrie::max_value_index(options.lexicon_options))); + + const std::string term = "foo"; + ICING_ASSERT_OK_AND_ASSIGN( + uint32_t tvi, + lite_index->InsertTerm(term, TermMatchType::PREFIX, kNamespace0)); + ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id, + term_id_codec_->EncodeTvi(tvi, TviType::LITE)); + Hit hit0(/*section_id=*/0, /*document_id=*/0, /*term_frequency=*/3, + /*is_in_prefix_section=*/false); + Hit hit1(/*section_id=*/1, /*document_id=*/0, /*term_frequency=*/5, + /*is_in_prefix_section=*/false); + ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, hit0)); + ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, hit1)); + + // Check the total size and byte size of the hit buffer. + EXPECT_THAT(lite_index, Pointee(SizeIs(2))); + EXPECT_THAT(lite_index->GetHitBufferByteSize(), + Eq(2 * sizeof(TermIdHitPair::Value))); + // Check the unsorted size and byte size of the hit buffer. + EXPECT_THAT(lite_index->GetHitBufferUnsortedSize(), Eq(2)); + EXPECT_THAT(lite_index->GetHitBufferUnsortedByteSize(), + Eq(2 * sizeof(TermIdHitPair::Value))); + + // Sort the hit buffer and check again. + lite_index->SortHits(); + EXPECT_THAT(lite_index, Pointee(SizeIs(2))); + EXPECT_THAT(lite_index->GetHitBufferByteSize(), + Eq(2 * sizeof(TermIdHitPair::Value))); + EXPECT_THAT(lite_index->GetHitBufferUnsortedSize(), Eq(0)); + EXPECT_THAT(lite_index->GetHitBufferUnsortedByteSize(), Eq(0)); +} + } // namespace } // namespace lib } // namespace icing diff --git a/icing/query/query-processor.cc b/icing/query/query-processor.cc index bbfbf3c..74e51cf 100644 --- a/icing/query/query-processor.cc +++ b/icing/query/query-processor.cc @@ -14,6 +14,7 @@ #include "icing/query/query-processor.h" +#include <cstdint> #include <deque> #include <memory> #include <stack> @@ -34,13 +35,14 @@ #include "icing/index/iterator/doc-hit-info-iterator-or.h" #include "icing/index/iterator/doc-hit-info-iterator-section-restrict.h" #include "icing/index/iterator/doc-hit-info-iterator.h" +#include "icing/index/numeric/numeric-index.h" +#include "icing/proto/logging.pb.h" #include "icing/proto/search.pb.h" #include "icing/query/advanced_query_parser/abstract-syntax-tree.h" #include "icing/query/advanced_query_parser/lexer.h" #include "icing/query/advanced_query_parser/parser.h" #include "icing/query/advanced_query_parser/query-visitor.h" #include "icing/query/query-features.h" -#include "icing/query/query-processor.h" #include "icing/query/query-results.h" #include "icing/query/query-terms.h" #include "icing/query/query-utils.h" @@ -49,11 +51,11 @@ #include "icing/store/document-id.h" #include "icing/store/document-store.h" #include "icing/tokenization/language-segmenter.h" -#include "icing/tokenization/raw-query-tokenizer.h" #include "icing/tokenization/token.h" #include "icing/tokenization/tokenizer-factory.h" #include "icing/tokenization/tokenizer.h" #include "icing/transform/normalizer.h" +#include "icing/util/clock.h" #include "icing/util/status-macros.h" namespace icing { @@ -112,17 +114,18 @@ QueryProcessor::Create(Index* index, const NumericIndex<int64_t>* numeric_index, const LanguageSegmenter* language_segmenter, const Normalizer* normalizer, const DocumentStore* document_store, - const SchemaStore* schema_store) { + const SchemaStore* schema_store, const Clock* clock) { ICING_RETURN_ERROR_IF_NULL(index); ICING_RETURN_ERROR_IF_NULL(numeric_index); ICING_RETURN_ERROR_IF_NULL(language_segmenter); ICING_RETURN_ERROR_IF_NULL(normalizer); ICING_RETURN_ERROR_IF_NULL(document_store); ICING_RETURN_ERROR_IF_NULL(schema_store); + ICING_RETURN_ERROR_IF_NULL(clock); return std::unique_ptr<QueryProcessor>( new QueryProcessor(index, numeric_index, language_segmenter, normalizer, - document_store, schema_store)); + document_store, schema_store, clock)); } QueryProcessor::QueryProcessor(Index* index, @@ -130,18 +133,20 @@ QueryProcessor::QueryProcessor(Index* index, const LanguageSegmenter* language_segmenter, const Normalizer* normalizer, const DocumentStore* document_store, - const SchemaStore* schema_store) + const SchemaStore* schema_store, + const Clock* clock) : index_(*index), numeric_index_(*numeric_index), language_segmenter_(*language_segmenter), normalizer_(*normalizer), document_store_(*document_store), - schema_store_(*schema_store) {} + schema_store_(*schema_store), + clock_(*clock) {} libtextclassifier3::StatusOr<QueryResults> QueryProcessor::ParseSearch( const SearchSpecProto& search_spec, ScoringSpecProto::RankingStrategy::Code ranking_strategy, - int64_t current_time_ms) { + int64_t current_time_ms, QueryStatsProto::SearchStats* search_stats) { if (search_spec.search_type() == SearchSpecProto::SearchType::UNDEFINED) { return absl_ports::InvalidArgumentError(absl_ports::StrCat( "Search type ", @@ -152,9 +157,9 @@ libtextclassifier3::StatusOr<QueryResults> QueryProcessor::ParseSearch( if (search_spec.search_type() == SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) { ICING_VLOG(1) << "Using EXPERIMENTAL_ICING_ADVANCED_QUERY parser!"; - ICING_ASSIGN_OR_RETURN( - results, - ParseAdvancedQuery(search_spec, ranking_strategy, current_time_ms)); + ICING_ASSIGN_OR_RETURN(results, + ParseAdvancedQuery(search_spec, ranking_strategy, + current_time_ms, search_stats)); } else { ICING_ASSIGN_OR_RETURN( results, ParseRawQuery(search_spec, ranking_strategy, current_time_ms)); @@ -188,17 +193,27 @@ libtextclassifier3::StatusOr<QueryResults> QueryProcessor::ParseSearch( libtextclassifier3::StatusOr<QueryResults> QueryProcessor::ParseAdvancedQuery( const SearchSpecProto& search_spec, ScoringSpecProto::RankingStrategy::Code ranking_strategy, - int64_t current_time_ms) const { - QueryResults results; + int64_t current_time_ms, QueryStatsProto::SearchStats* search_stats) const { + std::unique_ptr<Timer> lexer_timer = clock_.GetNewTimer(); Lexer lexer(search_spec.query(), Lexer::Language::QUERY); ICING_ASSIGN_OR_RETURN(std::vector<Lexer::LexerToken> lexer_tokens, lexer.ExtractTokens()); + if (search_stats != nullptr) { + search_stats->set_query_processor_lexer_extract_token_latency_ms( + lexer_timer->GetElapsedMilliseconds()); + } + std::unique_ptr<Timer> parser_timer = clock_.GetNewTimer(); Parser parser = Parser::Create(std::move(lexer_tokens)); ICING_ASSIGN_OR_RETURN(std::unique_ptr<Node> tree_root, parser.ConsumeQuery()); + if (search_stats != nullptr) { + search_stats->set_query_processor_parser_consume_query_latency_ms( + parser_timer->GetElapsedMilliseconds()); + } if (tree_root == nullptr) { + QueryResults results; results.root_iterator = std::make_unique<DocHitInfoIteratorAllDocumentId>( document_store_.last_added_document_id()); return results; @@ -210,13 +225,22 @@ libtextclassifier3::StatusOr<QueryResults> QueryProcessor::ParseAdvancedQuery( DocHitInfoIteratorFilter::Options options = GetFilterOptions(search_spec); bool needs_term_frequency_info = ranking_strategy == ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE; + + std::unique_ptr<Timer> query_visitor_timer = clock_.GetNewTimer(); QueryVisitor query_visitor(&index_, &numeric_index_, &document_store_, &schema_store_, &normalizer_, plain_tokenizer.get(), search_spec.query(), std::move(options), search_spec.term_match_type(), needs_term_frequency_info, current_time_ms); tree_root->Accept(&query_visitor); - return std::move(query_visitor).ConsumeResults(); + ICING_ASSIGN_OR_RETURN(QueryResults results, + std::move(query_visitor).ConsumeResults()); + if (search_stats != nullptr) { + search_stats->set_query_processor_query_visitor_latency_ms( + query_visitor_timer->GetElapsedMilliseconds()); + } + + return results; } // TODO(cassiewang): Collect query stats to populate the SearchResultsProto diff --git a/icing/query/query-processor.h b/icing/query/query-processor.h index d4c22dd..de256ee 100644 --- a/icing/query/query-processor.h +++ b/icing/query/query-processor.h @@ -20,16 +20,15 @@ #include "icing/text_classifier/lib3/utils/base/statusor.h" #include "icing/index/index.h" -#include "icing/index/iterator/doc-hit-info-iterator-filter.h" -#include "icing/index/iterator/doc-hit-info-iterator.h" #include "icing/index/numeric/numeric-index.h" +#include "icing/proto/logging.pb.h" #include "icing/proto/search.pb.h" #include "icing/query/query-results.h" -#include "icing/query/query-terms.h" #include "icing/schema/schema-store.h" #include "icing/store/document-store.h" #include "icing/tokenization/language-segmenter.h" #include "icing/transform/normalizer.h" +#include "icing/util/clock.h" namespace icing { namespace lib { @@ -49,7 +48,8 @@ class QueryProcessor { static libtextclassifier3::StatusOr<std::unique_ptr<QueryProcessor>> Create( Index* index, const NumericIndex<int64_t>* numeric_index, const LanguageSegmenter* language_segmenter, const Normalizer* normalizer, - const DocumentStore* document_store, const SchemaStore* schema_store); + const DocumentStore* document_store, const SchemaStore* schema_store, + const Clock* clock); // Parse the search configurations (including the query, any additional // filters, etc.) in the SearchSpecProto into one DocHitInfoIterator. @@ -68,7 +68,8 @@ class QueryProcessor { libtextclassifier3::StatusOr<QueryResults> ParseSearch( const SearchSpecProto& search_spec, ScoringSpecProto::RankingStrategy::Code ranking_strategy, - int64_t current_time_ms); + int64_t current_time_ms, + QueryStatsProto::SearchStats* search_stats = nullptr); private: explicit QueryProcessor(Index* index, @@ -76,7 +77,7 @@ class QueryProcessor { const LanguageSegmenter* language_segmenter, const Normalizer* normalizer, const DocumentStore* document_store, - const SchemaStore* schema_store); + const SchemaStore* schema_store, const Clock* clock); // Parse the query into a one DocHitInfoIterator that represents the root of a // query tree in our new Advanced Query Language. @@ -88,7 +89,8 @@ class QueryProcessor { libtextclassifier3::StatusOr<QueryResults> ParseAdvancedQuery( const SearchSpecProto& search_spec, ScoringSpecProto::RankingStrategy::Code ranking_strategy, - int64_t current_time_ms) const; + int64_t current_time_ms, + QueryStatsProto::SearchStats* search_stats) const; // Parse the query into a one DocHitInfoIterator that represents the root of a // query tree. @@ -106,12 +108,13 @@ class QueryProcessor { // Not const because we could modify/sort the hit buffer in the lite index at // query time. - Index& index_; - const NumericIndex<int64_t>& numeric_index_; - const LanguageSegmenter& language_segmenter_; - const Normalizer& normalizer_; - const DocumentStore& document_store_; - const SchemaStore& schema_store_; + Index& index_; // Does not own. + const NumericIndex<int64_t>& numeric_index_; // Does not own. + const LanguageSegmenter& language_segmenter_; // Does not own. + const Normalizer& normalizer_; // Does not own. + const DocumentStore& document_store_; // Does not own. + const SchemaStore& schema_store_; // Does not own. + const Clock& clock_; // Does not own. }; } // namespace lib diff --git a/icing/query/query-processor_benchmark.cc b/icing/query/query-processor_benchmark.cc index 025e8e6..3be74fd 100644 --- a/icing/query/query-processor_benchmark.cc +++ b/icing/query/query-processor_benchmark.cc @@ -173,7 +173,7 @@ void BM_QueryOneTerm(benchmark::State& state) { std::unique_ptr<QueryProcessor> query_processor, QueryProcessor::Create(index.get(), numeric_index.get(), language_segmenter.get(), normalizer.get(), - document_store.get(), schema_store.get())); + document_store.get(), schema_store.get(), &clock)); SearchSpecProto search_spec; search_spec.set_query(input_string); @@ -316,7 +316,7 @@ void BM_QueryFiveTerms(benchmark::State& state) { std::unique_ptr<QueryProcessor> query_processor, QueryProcessor::Create(index.get(), numeric_index.get(), language_segmenter.get(), normalizer.get(), - document_store.get(), schema_store.get())); + document_store.get(), schema_store.get(), &clock)); const std::string query_string = absl_ports::StrCat( input_string_a, " ", input_string_b, " ", input_string_c, " ", @@ -452,7 +452,7 @@ void BM_QueryDiacriticTerm(benchmark::State& state) { std::unique_ptr<QueryProcessor> query_processor, QueryProcessor::Create(index.get(), numeric_index.get(), language_segmenter.get(), normalizer.get(), - document_store.get(), schema_store.get())); + document_store.get(), schema_store.get(), &clock)); SearchSpecProto search_spec; search_spec.set_query(input_string); @@ -584,7 +584,7 @@ void BM_QueryHiragana(benchmark::State& state) { std::unique_ptr<QueryProcessor> query_processor, QueryProcessor::Create(index.get(), numeric_index.get(), language_segmenter.get(), normalizer.get(), - document_store.get(), schema_store.get())); + document_store.get(), schema_store.get(), &clock)); SearchSpecProto search_spec; search_spec.set_query(input_string); diff --git a/icing/query/query-processor_test.cc b/icing/query/query-processor_test.cc index 53e3035..43c9629 100644 --- a/icing/query/query-processor_test.cc +++ b/icing/query/query-processor_test.cc @@ -33,6 +33,7 @@ #include "icing/jni/jni-cache.h" #include "icing/legacy/index/icing-filesystem.h" #include "icing/portable/platform.h" +#include "icing/proto/logging.pb.h" #include "icing/proto/schema.pb.h" #include "icing/proto/search.pb.h" #include "icing/proto/term.pb.h" @@ -61,6 +62,7 @@ namespace lib { namespace { using ::testing::ElementsAre; +using ::testing::Eq; using ::testing::IsEmpty; using ::testing::SizeIs; using ::testing::UnorderedElementsAre; @@ -138,7 +140,8 @@ class QueryProcessorTest query_processor_, QueryProcessor::Create(index_.get(), numeric_index_.get(), language_segmenter_.get(), normalizer_.get(), - document_store_.get(), schema_store_.get())); + document_store_.get(), schema_store_.get(), + &fake_clock_)); } libtextclassifier3::Status AddTokenToIndex( @@ -188,35 +191,41 @@ class QueryProcessorTest }; TEST_P(QueryProcessorTest, CreationWithNullPointerShouldFail) { - EXPECT_THAT( - QueryProcessor::Create(/*index=*/nullptr, numeric_index_.get(), - language_segmenter_.get(), normalizer_.get(), - document_store_.get(), schema_store_.get()), - StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); - EXPECT_THAT( - QueryProcessor::Create(index_.get(), /*numeric_index_=*/nullptr, - language_segmenter_.get(), normalizer_.get(), - document_store_.get(), schema_store_.get()), - StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); - EXPECT_THAT( - QueryProcessor::Create(index_.get(), numeric_index_.get(), - /*language_segmenter=*/nullptr, normalizer_.get(), - document_store_.get(), schema_store_.get()), - StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); - EXPECT_THAT( - QueryProcessor::Create( - index_.get(), numeric_index_.get(), language_segmenter_.get(), - /*normalizer=*/nullptr, document_store_.get(), schema_store_.get()), - StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); + EXPECT_THAT(QueryProcessor::Create(/*index=*/nullptr, numeric_index_.get(), + language_segmenter_.get(), + normalizer_.get(), document_store_.get(), + schema_store_.get(), &fake_clock_), + StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); + EXPECT_THAT(QueryProcessor::Create(index_.get(), /*numeric_index_=*/nullptr, + language_segmenter_.get(), + normalizer_.get(), document_store_.get(), + schema_store_.get(), &fake_clock_), + StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); + EXPECT_THAT(QueryProcessor::Create(index_.get(), numeric_index_.get(), + /*language_segmenter=*/nullptr, + normalizer_.get(), document_store_.get(), + schema_store_.get(), &fake_clock_), + StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); + EXPECT_THAT(QueryProcessor::Create( + index_.get(), numeric_index_.get(), language_segmenter_.get(), + /*normalizer=*/nullptr, document_store_.get(), + schema_store_.get(), &fake_clock_), + StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); EXPECT_THAT( QueryProcessor::Create(index_.get(), numeric_index_.get(), language_segmenter_.get(), normalizer_.get(), - /*document_store=*/nullptr, schema_store_.get()), + /*document_store=*/nullptr, schema_store_.get(), + &fake_clock_), StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); EXPECT_THAT(QueryProcessor::Create(index_.get(), numeric_index_.get(), language_segmenter_.get(), normalizer_.get(), document_store_.get(), - /*schema_store=*/nullptr), + /*schema_store=*/nullptr, &fake_clock_), + StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); + EXPECT_THAT(QueryProcessor::Create(index_.get(), numeric_index_.get(), + language_segmenter_.get(), + normalizer_.get(), document_store_.get(), + schema_store_.get(), /*clock=*/nullptr), StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); } @@ -2948,7 +2957,8 @@ TEST_P(QueryProcessorTest, DocumentBeforeTtlNotFilteredOut) { std::unique_ptr<QueryProcessor> local_query_processor, QueryProcessor::Create(index_.get(), numeric_index_.get(), language_segmenter_.get(), normalizer_.get(), - document_store_.get(), schema_store_.get())); + document_store_.get(), schema_store_.get(), + &fake_clock_)); SearchSpecProto search_spec; search_spec.set_query("hello"); @@ -3010,7 +3020,8 @@ TEST_P(QueryProcessorTest, DocumentPastTtlFilteredOut) { std::unique_ptr<QueryProcessor> local_query_processor, QueryProcessor::Create(index_.get(), numeric_index_.get(), language_segmenter_.get(), normalizer_.get(), - document_store_.get(), schema_store_.get())); + document_store_.get(), schema_store_.get(), + &fake_clock_)); SearchSpecProto search_spec; search_spec.set_query("hello"); @@ -3320,6 +3331,65 @@ TEST_P(QueryProcessorTest, GroupingInSectionRestriction) { std::vector<SectionId>{prop1_section_id}))); } +TEST_P(QueryProcessorTest, ParseAdvancedQueryShouldSetSearchStats) { + if (GetParam() != + SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) { + GTEST_SKIP(); + } + + // Create the schema and document store + SchemaProto schema = SchemaBuilder() + .AddType(SchemaTypeConfigBuilder().SetType("email")) + .Build(); + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); + + // These documents don't actually match to the tokens in the index. We're + // inserting the documents to get the appropriate number of documents and + // namespaces populated. + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id, + document_store_->Put(DocumentBuilder() + .SetKey("namespace1", "1") + .SetSchema("email") + .Build())); + + // Populate the index + SectionId section_id = 0; + TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY; + + EXPECT_THAT( + AddTokenToIndex(document_id, section_id, term_match_type, "hello"), + IsOk()); + EXPECT_THAT( + AddTokenToIndex(document_id, section_id, term_match_type, "world"), + IsOk()); + + SearchSpecProto search_spec; + search_spec.set_query("hello world"); + search_spec.set_term_match_type(term_match_type); + search_spec.set_search_type(GetParam()); + + static constexpr int64_t kSearchStatsLatencyMs = 10; + fake_clock_.SetTimerElapsedMilliseconds(kSearchStatsLatencyMs); + + QueryStatsProto::SearchStats search_stats; + ICING_ASSERT_OK_AND_ASSIGN( + QueryResults results, + query_processor_->ParseSearch( + search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE, + fake_clock_.GetSystemTimeMilliseconds(), &search_stats)); + + ASSERT_THAT(results.root_iterator->Advance(), IsOk()); + EXPECT_THAT(search_stats.query_processor_lexer_extract_token_latency_ms(), + Eq(kSearchStatsLatencyMs)); + EXPECT_THAT(search_stats.query_processor_parser_consume_query_latency_ms(), + Eq(kSearchStatsLatencyMs)); + EXPECT_THAT(search_stats.query_processor_query_visitor_latency_ms(), + Eq(kSearchStatsLatencyMs)); +} + INSTANTIATE_TEST_SUITE_P( QueryProcessorTest, QueryProcessorTest, testing::Values( diff --git a/icing/query/suggestion-processor.cc b/icing/query/suggestion-processor.cc index eb86e3b..2d73c6b 100644 --- a/icing/query/suggestion-processor.cc +++ b/icing/query/suggestion-processor.cc @@ -14,14 +14,34 @@ #include "icing/query/suggestion-processor.h" -#include "icing/proto/schema.pb.h" +#include <cstdint> +#include <memory> +#include <string> +#include <string_view> +#include <unordered_map> +#include <unordered_set> +#include <utility> +#include <vector> + +#include "icing/text_classifier/lib3/utils/base/statusor.h" +#include "icing/absl_ports/canonical_errors.h" +#include "icing/absl_ports/str_cat.h" +#include "icing/index/index.h" +#include "icing/index/numeric/numeric-index.h" +#include "icing/index/term-metadata.h" #include "icing/proto/search.pb.h" #include "icing/query/query-processor.h" +#include "icing/schema/schema-store.h" +#include "icing/schema/section.h" +#include "icing/store/document-filter-data.h" #include "icing/store/document-id.h" +#include "icing/store/document-store.h" +#include "icing/store/namespace-id.h" #include "icing/store/suggestion-result-checker-impl.h" -#include "icing/tokenization/tokenizer-factory.h" -#include "icing/tokenization/tokenizer.h" +#include "icing/tokenization/language-segmenter.h" #include "icing/transform/normalizer.h" +#include "icing/util/clock.h" +#include "icing/util/status-macros.h" namespace icing { namespace lib { @@ -32,17 +52,19 @@ SuggestionProcessor::Create(Index* index, const LanguageSegmenter* language_segmenter, const Normalizer* normalizer, const DocumentStore* document_store, - const SchemaStore* schema_store) { + const SchemaStore* schema_store, + const Clock* clock) { ICING_RETURN_ERROR_IF_NULL(index); ICING_RETURN_ERROR_IF_NULL(numeric_index); ICING_RETURN_ERROR_IF_NULL(language_segmenter); ICING_RETURN_ERROR_IF_NULL(normalizer); ICING_RETURN_ERROR_IF_NULL(document_store); ICING_RETURN_ERROR_IF_NULL(schema_store); + ICING_RETURN_ERROR_IF_NULL(clock); return std::unique_ptr<SuggestionProcessor>( new SuggestionProcessor(index, numeric_index, language_segmenter, - normalizer, document_store, schema_store)); + normalizer, document_store, schema_store, clock)); } libtextclassifier3::StatusOr< @@ -225,7 +247,8 @@ SuggestionProcessor::QuerySuggestions( ICING_ASSIGN_OR_RETURN( std::unique_ptr<QueryProcessor> query_processor, QueryProcessor::Create(&index_, &numeric_index_, &language_segmenter_, - &normalizer_, &document_store_, &schema_store_)); + &normalizer_, &document_store_, &schema_store_, + &clock_)); SearchSpecProto search_spec; search_spec.set_query(suggestion_spec.prefix()); @@ -299,13 +322,15 @@ SuggestionProcessor::QuerySuggestions( SuggestionProcessor::SuggestionProcessor( Index* index, const NumericIndex<int64_t>* numeric_index, const LanguageSegmenter* language_segmenter, const Normalizer* normalizer, - const DocumentStore* document_store, const SchemaStore* schema_store) + const DocumentStore* document_store, const SchemaStore* schema_store, + const Clock* clock) : index_(*index), numeric_index_(*numeric_index), language_segmenter_(*language_segmenter), normalizer_(*normalizer), document_store_(*document_store), - schema_store_(*schema_store) {} + schema_store_(*schema_store), + clock_(*clock) {} } // namespace lib } // namespace icing diff --git a/icing/query/suggestion-processor.h b/icing/query/suggestion-processor.h index e100031..fe1437c 100644 --- a/icing/query/suggestion-processor.h +++ b/icing/query/suggestion-processor.h @@ -15,6 +15,10 @@ #ifndef ICING_QUERY_SUGGESTION_PROCESSOR_H_ #define ICING_QUERY_SUGGESTION_PROCESSOR_H_ +#include <cstdint> +#include <memory> +#include <vector> + #include "icing/text_classifier/lib3/utils/base/statusor.h" #include "icing/index/index.h" #include "icing/index/numeric/numeric-index.h" @@ -23,6 +27,7 @@ #include "icing/store/document-store.h" #include "icing/tokenization/language-segmenter.h" #include "icing/transform/normalizer.h" +#include "icing/util/clock.h" namespace icing { namespace lib { @@ -43,7 +48,7 @@ class SuggestionProcessor { Create(Index* index, const NumericIndex<int64_t>* numeric_index, const LanguageSegmenter* language_segmenter, const Normalizer* normalizer, const DocumentStore* document_store, - const SchemaStore* schema_store); + const SchemaStore* schema_store, const Clock* clock); // Query suggestions based on the given SuggestionSpecProto. // @@ -60,16 +65,18 @@ class SuggestionProcessor { const LanguageSegmenter* language_segmenter, const Normalizer* normalizer, const DocumentStore* document_store, - const SchemaStore* schema_store); + const SchemaStore* schema_store, + const Clock* clock); // Not const because we could modify/sort the TermMetaData buffer in the lite // index. - Index& index_; - const NumericIndex<int64_t>& numeric_index_; - const LanguageSegmenter& language_segmenter_; - const Normalizer& normalizer_; - const DocumentStore& document_store_; - const SchemaStore& schema_store_; + Index& index_; // Does not own. + const NumericIndex<int64_t>& numeric_index_; // Does not own. + const LanguageSegmenter& language_segmenter_; // Does not own. + const Normalizer& normalizer_; // Does not own. + const DocumentStore& document_store_; // Does not own. + const SchemaStore& schema_store_; // Does not own. + const Clock& clock_; // Does not own. }; } // namespace lib diff --git a/icing/query/suggestion-processor_test.cc b/icing/query/suggestion-processor_test.cc index 9f9094d..4c5e4ac 100644 --- a/icing/query/suggestion-processor_test.cc +++ b/icing/query/suggestion-processor_test.cc @@ -117,9 +117,10 @@ class SuggestionProcessorTest : public Test { ICING_ASSERT_OK_AND_ASSIGN( suggestion_processor_, - SuggestionProcessor::Create( - index_.get(), numeric_index_.get(), language_segmenter_.get(), - normalizer_.get(), document_store_.get(), schema_store_.get())); + SuggestionProcessor::Create(index_.get(), numeric_index_.get(), + language_segmenter_.get(), + normalizer_.get(), document_store_.get(), + schema_store_.get(), &fake_clock_)); } libtextclassifier3::Status AddTokenToIndex( diff --git a/icing/schema/schema-store.cc b/icing/schema/schema-store.cc index e17e388..6830787 100644 --- a/icing/schema/schema-store.cc +++ b/icing/schema/schema-store.cc @@ -486,7 +486,7 @@ libtextclassifier3::Status SchemaStore::RegenerateDerivedFiles( ICING_RETURN_IF_ERROR(schema_file_->Write(std::move(base_schema_ptr))); // LINT.IfChange(min_overlay_version_compatibility) - // Although the current version is 3, the schema is compatible with + // Although the current version is 4, the schema is compatible with // version 1, so min_overlay_version_compatibility should be 1. int32_t min_overlay_version_compatibility = version_util::kVersionOne; // LINT.ThenChange(//depot/google3/icing/file/version-util.h:kVersion) diff --git a/icing/text_classifier/lib3/utils/base/logging.h b/icing/text_classifier/lib3/utils/base/logging.h index 92d775e..39e09eb 100644 --- a/icing/text_classifier/lib3/utils/base/logging.h +++ b/icing/text_classifier/lib3/utils/base/logging.h @@ -16,9 +16,9 @@ #define ICING_TEXT_CLASSIFIER_LIB3_UTILS_BASE_LOGGING_H_ #include <cassert> +#include <cstdint> #include <string> -#include "icing/text_classifier/lib3/utils/base/integral_types.h" #include "icing/text_classifier/lib3/utils/base/logging_levels.h" #include "icing/text_classifier/lib3/utils/base/port.h" @@ -45,7 +45,8 @@ inline LoggingStringStream& operator<<(LoggingStringStream& stream, template <typename T> inline LoggingStringStream& operator<<(LoggingStringStream& stream, T* const entry) { - stream.message.append(std::to_string(reinterpret_cast<const uint64>(entry))); + stream.message.append( + std::to_string(reinterpret_cast<const uint64_t>(entry))); return stream; } diff --git a/icing/text_classifier/lib3/utils/java/jni-helper.cc b/icing/text_classifier/lib3/utils/java/jni-helper.cc index 60a9dfb..cb0b899 100644 --- a/icing/text_classifier/lib3/utils/java/jni-helper.cc +++ b/icing/text_classifier/lib3/utils/java/jni-helper.cc @@ -14,6 +14,8 @@ #include "icing/text_classifier/lib3/utils/java/jni-helper.h" +#include <cstdint> + #include "icing/text_classifier/lib3/utils/base/status_macros.h" namespace libtextclassifier3 { @@ -121,8 +123,8 @@ StatusOr<bool> JniHelper::CallBooleanMethod(JNIEnv* env, jobject object, return result; } -StatusOr<int32> JniHelper::CallIntMethod(JNIEnv* env, jobject object, - jmethodID method_id, ...) { +StatusOr<int32_t> JniHelper::CallIntMethod(JNIEnv* env, jobject object, + jmethodID method_id, ...) { va_list args; va_start(args, method_id); jint result = env->CallIntMethodV(object, method_id, args); @@ -132,8 +134,8 @@ StatusOr<int32> JniHelper::CallIntMethod(JNIEnv* env, jobject object, return result; } -StatusOr<int64> JniHelper::CallLongMethod(JNIEnv* env, jobject object, - jmethodID method_id, ...) { +StatusOr<int64_t> JniHelper::CallLongMethod(JNIEnv* env, jobject object, + jmethodID method_id, ...) { va_list args; va_start(args, method_id); jlong result = env->CallLongMethodV(object, method_id, args); diff --git a/icing/text_classifier/lib3/utils/java/jni-helper.h b/icing/text_classifier/lib3/utils/java/jni-helper.h index 4e548ec..8b57c11 100644 --- a/icing/text_classifier/lib3/utils/java/jni-helper.h +++ b/icing/text_classifier/lib3/utils/java/jni-helper.h @@ -20,6 +20,7 @@ #include <jni.h> +#include <cstdint> #include <string> #include "icing/text_classifier/lib3/utils/base/status.h" @@ -140,10 +141,10 @@ class JniHelper { ...); static StatusOr<bool> CallBooleanMethod(JNIEnv* env, jobject object, jmethodID method_id, ...); - static StatusOr<int32> CallIntMethod(JNIEnv* env, jobject object, - jmethodID method_id, ...); - static StatusOr<int64> CallLongMethod(JNIEnv* env, jobject object, - jmethodID method_id, ...); + static StatusOr<int32_t> CallIntMethod(JNIEnv* env, jobject object, + jmethodID method_id, ...); + static StatusOr<int64_t> CallLongMethod(JNIEnv* env, jobject object, + jmethodID method_id, ...); static StatusOr<float> CallFloatMethod(JNIEnv* env, jobject object, jmethodID method_id, ...); static StatusOr<double> CallDoubleMethod(JNIEnv* env, jobject object, diff --git a/java/src/com/google/android/icing/IcingSearchEngine.java b/java/src/com/google/android/icing/IcingSearchEngine.java index 47b94a5..a8a571e 100644 --- a/java/src/com/google/android/icing/IcingSearchEngine.java +++ b/java/src/com/google/android/icing/IcingSearchEngine.java @@ -77,12 +77,6 @@ public class IcingSearchEngine implements IcingSearchEngineInterface { icingSearchEngineImpl.close(); } - @Override - protected void finalize() throws Throwable { - icingSearchEngineImpl.close(); - super.finalize(); - } - @NonNull @Override public InitializeResultProto initialize() { diff --git a/java/src/com/google/android/icing/IcingSearchEngineImpl.java b/java/src/com/google/android/icing/IcingSearchEngineImpl.java index 8e79a88..7994162 100644 --- a/java/src/com/google/android/icing/IcingSearchEngineImpl.java +++ b/java/src/com/google/android/icing/IcingSearchEngineImpl.java @@ -71,12 +71,6 @@ public class IcingSearchEngineImpl implements Closeable { closed = true; } - @Override - protected void finalize() throws Throwable { - close(); - super.finalize(); - } - @Nullable public byte[] initialize() { throwIfClosed(); diff --git a/proto/icing/proto/initialize.proto b/proto/icing/proto/initialize.proto index 9dd9e88..7c58d2c 100644 --- a/proto/icing/proto/initialize.proto +++ b/proto/icing/proto/initialize.proto @@ -23,6 +23,56 @@ option java_package = "com.google.android.icing.proto"; option java_multiple_files = true; option objc_class_prefix = "ICNG"; +// Next tag: 7 +message IcingSearchEngineFeatureInfoProto { + // REQUIRED: Enum representing an IcingLib feature flagged using + // IcingSearchEngineOptions + optional FlaggedFeatureType feature_type = 1; + + enum FlaggedFeatureType { + // This value should never purposely be used. This is used for backwards + // compatibility reasons. + UNKNOWN = 0; + + // Feature for flag + // IcingSearchEngineOptions::build_property_existence_metadata_hits. + // + // This feature covers the kHasPropertyFunctionFeature advanced query + // feature, and related metadata hits indexing used for property existence + // check. + FEATURE_HAS_PROPERTY_OPERATOR = 1; + } + + // Whether the feature requires the document store to be rebuilt. + // The default value is false. + optional bool needs_document_store_rebuild = 2; + + // Whether the feature requires the schema store to be rebuilt. + // The default value is false. + optional bool needs_schema_store_rebuild = 3; + + // Whether the feature requires the term index to be rebuilt. + // The default value is false. + optional bool needs_term_index_rebuild = 4; + + // Whether the feature requires the integer index to be rebuilt. + // The default value is false. + optional bool needs_integer_index_rebuild = 5; + + // Whether the feature requires the qualified id join index to be rebuilt. + // The default value is false. + optional bool needs_qualified_id_join_index_rebuild = 6; +} + +// Next tag: 4 +message IcingSearchEngineVersionProto { + // version and max_version are from the original version file. + optional int32 version = 1; + optional int32 max_version = 2; + // Features that are enabled in an icing version at initialization. + repeated IcingSearchEngineFeatureInfoProto enabled_features = 3; +} + // Next tag: 16 message IcingSearchEngineOptions { // Directory to persist files for Icing. Required. @@ -131,9 +181,6 @@ message IcingSearchEngineOptions { // Whether to build the metadata hits used for property existence check, which // is required to support the hasProperty function in advanced query. - // - // TODO(b/309826655): Implement the feature flag derived files rebuild - // mechanism to handle index rebuild, instead of using index's magic value. optional bool build_property_existence_metadata_hits = 15; reserved 2; diff --git a/proto/icing/proto/logging.proto b/proto/icing/proto/logging.proto index fcedeed..4854521 100644 --- a/proto/icing/proto/logging.proto +++ b/proto/icing/proto/logging.proto @@ -55,6 +55,10 @@ message InitializeStatsProto { // Any dependencies have changed. DEPENDENCIES_CHANGED = 7; + + // Change detected in Icing's feature flags since last initialization that + // requires recovery. + FEATURE_FLAG_CHANGED = 8; } // Possible recovery causes for document store: @@ -170,7 +174,7 @@ message PutDocumentStatsProto { // Stats of the top-level function IcingSearchEngine::Search() and // IcingSearchEngine::GetNextPage(). -// Next tag: 26 +// Next tag: 28 message QueryStatsProto { // TODO(b/305098009): deprecate. Use parent_search_stats instead. // The UTF-8 length of the query string @@ -252,7 +256,7 @@ message QueryStatsProto { optional bool is_join_query = 23; // Stats of the search. Only valid for first page. - // Next tag: 13 + // Next tag: 16 message SearchStats { // The UTF-8 length of the query string optional int32 query_length = 1; @@ -290,6 +294,15 @@ message QueryStatsProto { // Number of hits fetched by integer index before applying any filters. optional int32 num_fetched_hits_integer_index = 12; + + // Time used in Lexer to extract lexer tokens from the query. + optional int32 query_processor_lexer_extract_token_latency_ms = 13; + + // Time used in Parser to consume lexer tokens extracted from the query. + optional int32 query_processor_parser_consume_query_latency_ms = 14; + + // Time used in QueryVisitor to visit and build (nested) DocHitInfoIterator. + optional int32 query_processor_query_visitor_latency_ms = 15; } // Search stats for parent. Only valid for first page. @@ -298,6 +311,12 @@ message QueryStatsProto { // Search stats for child. optional SearchStats child_search_stats = 25; + // Byte size of the lite index hit buffer. + optional int64 lite_index_hit_buffer_byte_size = 26; + + // Byte size of the unsorted tail of the lite index hit buffer. + optional int64 lite_index_hit_buffer_unsorted_byte_size = 27; + reserved 9; } diff --git a/proto/icing/proto/storage.proto b/proto/icing/proto/storage.proto index 39dab6b..e0323a1 100644 --- a/proto/icing/proto/storage.proto +++ b/proto/icing/proto/storage.proto @@ -22,6 +22,8 @@ option java_package = "com.google.android.icing.proto"; option java_multiple_files = true; option objc_class_prefix = "ICNG"; +// TODO(b/305098009): fix byte size vs size naming issue. + // Next tag: 10 message NamespaceStorageInfoProto { // Name of the namespace diff --git a/synced_AOSP_CL_number.txt b/synced_AOSP_CL_number.txt index dd08fd1..a36a007 100644 --- a/synced_AOSP_CL_number.txt +++ b/synced_AOSP_CL_number.txt @@ -1 +1 @@ -set(synced_AOSP_CL_number=587883838) +set(synced_AOSP_CL_number=596096845) |