aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGrace Zhao <gracezrx@google.com>2024-01-05 15:33:48 -0800
committerGrace Zhao <gracezrx@google.com>2024-01-05 23:44:48 +0000
commit030b99d2648aea03e5c2e3809a13fdfacdccde11 (patch)
tree0db961d8ad94bbf346f5b5a701d6fa3fb86fe7e1
parenta3e306de05f977e27ae8e9ce840b54fa5b838366 (diff)
downloadicing-030b99d2648aea03e5c2e3809a13fdfacdccde11.tar.gz
Update Icing from upstream.
Descriptions: ======================================================================== [Trunk Stable Flags and Files Rebuild #1] Implement v2 version file in version-util ======================================================================== [Trunk Stable Flags and Files Rebuild #2] Integrate v2 version file with IcingSearchEngine ======================================================================== Remove dead code in index initialization for HasPropertyOperator after introducing v2 version file ======================================================================== [Icing version 4] Bump Icing kVersion to 4 for Android V ======================================================================== [Icing][Expand QueryStats][4/x] Add lite index hit buffer info into QueryStats ======================================================================== [Icing][Expand QueryStats][5/x] Add query processor advanced query components latencies into QueryStats::SearchStats ======================================================================== Add instructions to error message for AppSearch advanced query features backward compatibility ======================================================================== NO_IFTTT="Path is only valid in G3." Bug: 314816301 Bug: 309826655 Bug: 305098009 Change-Id: I6eddda6dee8362aed5c56844b69dd159a715aaec
-rw-r--r--icing/file/version-util.cc263
-rw-r--r--icing/file/version-util.h188
-rw-r--r--icing/file/version-util_test.cc498
-rw-r--r--icing/icing-search-engine.cc168
-rw-r--r--icing/icing-search-engine.h17
-rw-r--r--icing/icing-search-engine_initialization_test.cc134
-rw-r--r--icing/icing-search-engine_search_test.cc41
-rw-r--r--icing/index/index.cc8
-rw-r--r--icing/index/index.h20
-rw-r--r--icing/index/index_test.cc37
-rw-r--r--icing/index/lite/lite-index-header.h22
-rw-r--r--icing/index/lite/lite-index-options.cc12
-rw-r--r--icing/index/lite/lite-index-options.h4
-rw-r--r--icing/index/lite/lite-index.cc16
-rw-r--r--icing/index/lite/lite-index.h28
-rw-r--r--icing/index/lite/lite-index_test.cc136
-rw-r--r--icing/query/query-processor.cc50
-rw-r--r--icing/query/query-processor.h29
-rw-r--r--icing/query/query-processor_benchmark.cc8
-rw-r--r--icing/query/query-processor_test.cc120
-rw-r--r--icing/query/suggestion-processor.cc41
-rw-r--r--icing/query/suggestion-processor.h23
-rw-r--r--icing/query/suggestion-processor_test.cc7
-rw-r--r--icing/schema/schema-store.cc2
-rw-r--r--icing/text_classifier/lib3/utils/base/logging.h5
-rw-r--r--icing/text_classifier/lib3/utils/java/jni-helper.cc10
-rw-r--r--icing/text_classifier/lib3/utils/java/jni-helper.h9
-rw-r--r--java/src/com/google/android/icing/IcingSearchEngine.java6
-rw-r--r--java/src/com/google/android/icing/IcingSearchEngineImpl.java6
-rw-r--r--proto/icing/proto/initialize.proto53
-rw-r--r--proto/icing/proto/logging.proto23
-rw-r--r--proto/icing/proto/storage.proto2
-rw-r--r--synced_AOSP_CL_number.txt2
33 files changed, 1631 insertions, 357 deletions
diff --git a/icing/file/version-util.cc b/icing/file/version-util.cc
index dd233e0..e750b0c 100644
--- a/icing/file/version-util.cc
+++ b/icing/file/version-util.cc
@@ -15,36 +15,47 @@
#include "icing/file/version-util.h"
#include <cstdint>
+#include <memory>
#include <string>
+#include <string_view>
+#include <unordered_set>
#include <utility>
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/file/file-backed-proto.h"
#include "icing/file/filesystem.h"
#include "icing/index/index.h"
+#include "icing/proto/initialize.pb.h"
+#include "icing/util/status-macros.h"
namespace icing {
namespace lib {
namespace version_util {
-libtextclassifier3::StatusOr<VersionInfo> ReadVersion(
- const Filesystem& filesystem, const std::string& version_file_path,
+namespace {
+
+libtextclassifier3::StatusOr<VersionInfo> ReadV1VersionInfo(
+ const Filesystem& filesystem, const std::string& version_file_dir,
const std::string& index_base_dir) {
// 1. Read the version info.
+ const std::string v1_version_filepath =
+ MakeVersionFilePath(version_file_dir, kVersionFilenameV1);
VersionInfo existing_version_info(-1, -1);
- if (filesystem.FileExists(version_file_path.c_str()) &&
- !filesystem.PRead(version_file_path.c_str(), &existing_version_info,
+ if (filesystem.FileExists(v1_version_filepath.c_str()) &&
+ !filesystem.PRead(v1_version_filepath.c_str(), &existing_version_info,
sizeof(VersionInfo), /*offset=*/0)) {
- return absl_ports::InternalError("Fail to read version");
+ return absl_ports::InternalError("Failed to read v1 version file");
}
// 2. Check the Index magic to see if we're actually on version 0.
- libtextclassifier3::StatusOr<int> existing_flash_index_magic_or =
+ libtextclassifier3::StatusOr<int> existing_flash_index_magic =
Index::ReadFlashIndexMagic(&filesystem, index_base_dir);
- if (!existing_flash_index_magic_or.ok()) {
- if (absl_ports::IsNotFound(existing_flash_index_magic_or.status())) {
+ if (!existing_flash_index_magic.ok()) {
+ if (absl_ports::IsNotFound(existing_flash_index_magic.status())) {
// Flash index magic doesn't exist. In this case, we're unable to
// determine the version change state correctly (regardless of the
// existence of the version file), so invalidate VersionInfo by setting
@@ -53,9 +64,9 @@ libtextclassifier3::StatusOr<VersionInfo> ReadVersion(
return existing_version_info;
}
// Real error.
- return std::move(existing_flash_index_magic_or).status();
+ return std::move(existing_flash_index_magic).status();
}
- if (existing_flash_index_magic_or.ValueOrDie() ==
+ if (existing_flash_index_magic.ValueOrDie() ==
kVersionZeroFlashIndexMagic) {
existing_version_info.version = 0;
if (existing_version_info.max_version == -1) {
@@ -66,15 +77,124 @@ libtextclassifier3::StatusOr<VersionInfo> ReadVersion(
return existing_version_info;
}
-libtextclassifier3::Status WriteVersion(const Filesystem& filesystem,
- const std::string& version_file_path,
- const VersionInfo& version_info) {
- ScopedFd scoped_fd(filesystem.OpenForWrite(version_file_path.c_str()));
+libtextclassifier3::StatusOr<IcingSearchEngineVersionProto> ReadV2VersionInfo(
+ const Filesystem& filesystem, const std::string& version_file_dir) {
+ // Read the v2 version file. V2 version file stores the
+ // IcingSearchEngineVersionProto as a file-backed proto.
+ const std::string v2_version_filepath =
+ MakeVersionFilePath(version_file_dir, kVersionFilenameV2);
+ FileBackedProto<IcingSearchEngineVersionProto> v2_version_file(
+ filesystem, v2_version_filepath);
+ ICING_ASSIGN_OR_RETURN(const IcingSearchEngineVersionProto* v2_version_proto,
+ v2_version_file.Read());
+
+ return *v2_version_proto;
+}
+
+} // namespace
+
+libtextclassifier3::StatusOr<IcingSearchEngineVersionProto> ReadVersion(
+ const Filesystem& filesystem, const std::string& version_file_dir,
+ const std::string& index_base_dir) {
+ // 1. Read the v1 version file
+ ICING_ASSIGN_OR_RETURN(
+ VersionInfo v1_version_info,
+ ReadV1VersionInfo(filesystem, version_file_dir, index_base_dir));
+ if (!v1_version_info.IsValid()) {
+ // This happens if IcingLib's state is invalid (e.g. flash index header file
+ // is missing). Return the invalid version numbers in this case.
+ IcingSearchEngineVersionProto version_proto;
+ version_proto.set_version(v1_version_info.version);
+ version_proto.set_max_version(v1_version_info.max_version);
+ return version_proto;
+ }
+
+ // 2. Read the v2 version file
+ auto v2_version_proto = ReadV2VersionInfo(filesystem, version_file_dir);
+ if (!v2_version_proto.ok()) {
+ if (!absl_ports::IsNotFound(v2_version_proto.status())) {
+ // Real error.
+ return std::move(v2_version_proto).status();
+ }
+ // The v2 version file has not been written
+ IcingSearchEngineVersionProto version_proto;
+ if (v1_version_info.version < kFirstV2Version) {
+ // There are two scenarios for this case:
+ // 1. It's the first time that we're upgrading from a lower version to a
+ // version >= kFirstV2Version.
+ // - It's expected that the v2 version file has not been written yet in
+ // this case and we return the v1 version numbers instead.
+ // 2. We're rolling forward from a version < kFirstV2Version, after
+ // rolling back from a previous version >= kFirstV2Version, and for
+ // some unknown reason we lost the v2 version file in the previous
+ // version.
+ // - e.g. version #4 -> version #1 -> version #4, but we lost the v2
+ // file during version #1.
+ // - This is a rollforward case, but it's still fine to return the v1
+ // version number here as ShouldRebuildDerivedFiles can handle
+ // rollforwards correctly.
+ version_proto.set_version(v1_version_info.version);
+ version_proto.set_max_version(v1_version_info.max_version);
+ } else {
+ // Something weird has happened. During last initialization we were
+ // already on a version >= kFirstV2Version, so the v2 version file
+ // should have been written.
+ // Return an invalid version number in this case and trigger rebuilding
+ // everything.
+ version_proto.set_version(-1);
+ version_proto.set_max_version(v1_version_info.max_version);
+ }
+ return version_proto;
+ }
+
+ // 3. Check if versions match. If not, it means that we're rolling forward
+ // from a version < kFirstV2Version. In order to trigger rebuilding
+ // everything, we return an invalid version number in this case.
+ IcingSearchEngineVersionProto v2_version_proto_value =
+ std::move(v2_version_proto).ValueOrDie();
+ if (v1_version_info.version != v2_version_proto_value.version()) {
+ v2_version_proto_value.set_version(-1);
+ v2_version_proto_value.mutable_enabled_features()->Clear();
+ }
+
+ return v2_version_proto_value;
+}
+
+libtextclassifier3::Status WriteV1Version(const Filesystem& filesystem,
+ const std::string& version_file_dir,
+ const VersionInfo& version_info) {
+ ScopedFd scoped_fd(filesystem.OpenForWrite(
+ MakeVersionFilePath(version_file_dir, kVersionFilenameV1).c_str()));
if (!scoped_fd.is_valid() ||
!filesystem.PWrite(scoped_fd.get(), /*offset=*/0, &version_info,
sizeof(VersionInfo)) ||
!filesystem.DataSync(scoped_fd.get())) {
- return absl_ports::InternalError("Fail to write version");
+ return absl_ports::InternalError("Failed to write v1 version file");
+ }
+ return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status WriteV2Version(
+ const Filesystem& filesystem, const std::string& version_file_dir,
+ std::unique_ptr<IcingSearchEngineVersionProto> version_proto) {
+ FileBackedProto<IcingSearchEngineVersionProto> v2_version_file(
+ filesystem, MakeVersionFilePath(version_file_dir, kVersionFilenameV2));
+ libtextclassifier3::Status v2_write_status =
+ v2_version_file.Write(std::move(version_proto));
+ if (!v2_write_status.ok()) {
+ return absl_ports::InternalError(absl_ports::StrCat(
+ "Failed to write v2 version file: ", v2_write_status.error_message()));
+ }
+ return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status DiscardVersionFiles(
+ const Filesystem& filesystem, std::string_view version_file_dir) {
+ if (!filesystem.DeleteFile(
+ MakeVersionFilePath(version_file_dir, kVersionFilenameV1).c_str()) ||
+ !filesystem.DeleteFile(
+ MakeVersionFilePath(version_file_dir, kVersionFilenameV2).c_str())) {
+ return absl_ports::InternalError("Failed to discard version files");
}
return libtextclassifier3::Status::OK;
}
@@ -102,6 +222,65 @@ StateChange GetVersionStateChange(const VersionInfo& existing_version_info,
}
}
+DerivedFilesRebuildResult CalculateRequiredDerivedFilesRebuild(
+ const IcingSearchEngineVersionProto& prev_version_proto,
+ const IcingSearchEngineVersionProto& curr_version_proto) {
+ // 1. Do version check using version and max_version numbers
+ if (ShouldRebuildDerivedFiles(GetVersionInfoFromProto(prev_version_proto),
+ curr_version_proto.version())) {
+ return DerivedFilesRebuildResult(
+ /*needs_document_store_derived_files_rebuild=*/true,
+ /*needs_schema_store_derived_files_rebuild=*/true,
+ /*needs_term_index_rebuild=*/true,
+ /*needs_integer_index_rebuild=*/true,
+ /*needs_qualified_id_join_index_rebuild=*/true);
+ }
+
+ // 2. Compare the previous enabled features with the current enabled features
+ // and rebuild if there are differences.
+ std::unordered_set<IcingSearchEngineFeatureInfoProto::FlaggedFeatureType>
+ prev_features;
+ for (const auto& feature : prev_version_proto.enabled_features()) {
+ prev_features.insert(feature.feature_type());
+ }
+ std::unordered_set<IcingSearchEngineFeatureInfoProto::FlaggedFeatureType>
+ curr_features;
+ for (const auto& feature : curr_version_proto.enabled_features()) {
+ curr_features.insert(feature.feature_type());
+ }
+ DerivedFilesRebuildResult result;
+ for (const auto& prev_feature : prev_features) {
+ // If there is an UNKNOWN feature in the previous feature set (note that we
+ // never use UNKNOWN when writing the version proto), it means that:
+ // - The previous version proto contains a feature enum that is only defined
+ // in a newer version.
+ // - We've now rolled back to an old version that doesn't understand this
+ // new enum value, and proto serialization defaults it to 0 (UNKNOWN).
+ // - In this case we need to rebuild everything.
+ if (prev_feature == IcingSearchEngineFeatureInfoProto::UNKNOWN) {
+ return DerivedFilesRebuildResult(
+ /*needs_document_store_derived_files_rebuild=*/true,
+ /*needs_schema_store_derived_files_rebuild=*/true,
+ /*needs_term_index_rebuild=*/true,
+ /*needs_integer_index_rebuild=*/true,
+ /*needs_qualified_id_join_index_rebuild=*/true);
+ }
+ if (curr_features.find(prev_feature) == curr_features.end()) {
+ DerivedFilesRebuildResult required_rebuilds =
+ GetFeatureDerivedFilesRebuildResult(prev_feature);
+ result.CombineWithOtherRebuildResultOr(required_rebuilds);
+ }
+ }
+ for (const auto& curr_feature : curr_features) {
+ if (prev_features.find(curr_feature) == prev_features.end()) {
+ DerivedFilesRebuildResult required_rebuilds =
+ GetFeatureDerivedFilesRebuildResult(curr_feature);
+ result.CombineWithOtherRebuildResultOr(required_rebuilds);
+ }
+ }
+ return result;
+}
+
bool ShouldRebuildDerivedFiles(const VersionInfo& existing_version_info,
int32_t curr_version) {
StateChange state_change =
@@ -135,6 +314,10 @@ bool ShouldRebuildDerivedFiles(const VersionInfo& existing_version_info,
// version 2 -> version 3 upgrade, no need to rebuild
break;
}
+ case 3: {
+ // version 3 -> version 4 upgrade, no need to rebuild
+ break;
+ }
default:
// This should not happen. Rebuild anyway if unsure.
should_rebuild |= true;
@@ -144,6 +327,56 @@ bool ShouldRebuildDerivedFiles(const VersionInfo& existing_version_info,
return should_rebuild;
}
+DerivedFilesRebuildResult GetFeatureDerivedFilesRebuildResult(
+ IcingSearchEngineFeatureInfoProto::FlaggedFeatureType feature) {
+ switch (feature) {
+ case IcingSearchEngineFeatureInfoProto::FEATURE_HAS_PROPERTY_OPERATOR: {
+ return DerivedFilesRebuildResult(
+ /*needs_document_store_derived_files_rebuild=*/false,
+ /*needs_schema_store_derived_files_rebuild=*/false,
+ /*needs_term_index_rebuild=*/true,
+ /*needs_integer_index_rebuild=*/false,
+ /*needs_qualified_id_join_index_rebuild=*/false);
+ }
+ case IcingSearchEngineFeatureInfoProto::UNKNOWN:
+ return DerivedFilesRebuildResult(
+ /*needs_document_store_derived_files_rebuild=*/true,
+ /*needs_schema_store_derived_files_rebuild=*/true,
+ /*needs_term_index_rebuild=*/true,
+ /*needs_integer_index_rebuild=*/true,
+ /*needs_qualified_id_join_index_rebuild=*/true);
+ }
+}
+
+IcingSearchEngineFeatureInfoProto GetFeatureInfoProto(
+ IcingSearchEngineFeatureInfoProto::FlaggedFeatureType feature) {
+ IcingSearchEngineFeatureInfoProto info;
+ info.set_feature_type(feature);
+
+ DerivedFilesRebuildResult result =
+ GetFeatureDerivedFilesRebuildResult(feature);
+ info.set_needs_document_store_rebuild(
+ result.needs_document_store_derived_files_rebuild);
+ info.set_needs_schema_store_rebuild(
+ result.needs_schema_store_derived_files_rebuild);
+ info.set_needs_term_index_rebuild(result.needs_term_index_rebuild);
+ info.set_needs_integer_index_rebuild(result.needs_integer_index_rebuild);
+ info.set_needs_qualified_id_join_index_rebuild(
+ result.needs_qualified_id_join_index_rebuild);
+
+ return info;
+}
+
+void AddEnabledFeatures(const IcingSearchEngineOptions& options,
+ IcingSearchEngineVersionProto* version_proto) {
+ auto* enabled_features = version_proto->mutable_enabled_features();
+ // HasPropertyOperator feature
+ if (options.build_property_existence_metadata_hits()) {
+ enabled_features->Add(GetFeatureInfoProto(
+ IcingSearchEngineFeatureInfoProto::FEATURE_HAS_PROPERTY_OPERATOR));
+ }
+}
+
} // namespace version_util
} // namespace lib
diff --git a/icing/file/version-util.h b/icing/file/version-util.h
index b2d51df..feadaf6 100644
--- a/icing/file/version-util.h
+++ b/icing/file/version-util.h
@@ -16,11 +16,15 @@
#define ICING_FILE_VERSION_UTIL_H_
#include <cstdint>
+#include <memory>
#include <string>
+#include <string_view>
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/str_cat.h"
#include "icing/file/filesystem.h"
+#include "icing/proto/initialize.pb.h"
namespace icing {
namespace lib {
@@ -32,16 +36,26 @@ namespace version_util {
// - Version 2: M-2023-09, M-2023-11, M-2024-01. Schema is compatible with v1.
// (There were no M-2023-10, M-2023-12).
// - Version 3: M-2024-02. Schema is compatible with v1 and v2.
+// - Version 4: Android V base. Schema is compatible with v1, v2 and v3.
//
+// TODO(b/314816301): Bump kVersion to 4 for Android V rollout with v2 version
+// detection
// LINT.IfChange(kVersion)
-inline static constexpr int32_t kVersion = 3;
+inline static constexpr int32_t kVersion = 4;
// LINT.ThenChange(//depot/google3/icing/schema/schema-store.cc:min_overlay_version_compatibility)
inline static constexpr int32_t kVersionOne = 1;
inline static constexpr int32_t kVersionTwo = 2;
inline static constexpr int32_t kVersionThree = 3;
+inline static constexpr int32_t kVersionFour = 4;
+
+// Version at which v2 version file is introduced.
+inline static constexpr int32_t kFirstV2Version = kVersionFour;
inline static constexpr int kVersionZeroFlashIndexMagic = 0x6dfba6ae;
+inline static constexpr std::string_view kVersionFilenameV1 = "version";
+inline static constexpr std::string_view kVersionFilenameV2 = "version2";
+
struct VersionInfo {
int32_t version;
int32_t max_version;
@@ -67,28 +81,144 @@ enum class StateChange {
kVersionZeroRollForward,
};
-// Helper method to read version info (using version file and flash index header
-// magic) from the existing data. If the state is invalid (e.g. flash index
-// header file is missing), then return an invalid VersionInfo.
+// Contains information about which derived files need to be rebuild.
+//
+// These flags only reflect whether each component should be rebuilt, but do not
+// handle any dependencies. The caller should handle the dependencies by
+// themselves.
+// e.g. - qualified id join index depends on document store derived files, but
+// it's possible to have needs_document_store_derived_files_rebuild =
+// true and needs_qualified_id_join_index_rebuild = false.
+// - The caller should know that join index should also be rebuilt in this
+// case even though needs_qualified_id_join_index_rebuild = false.
+struct DerivedFilesRebuildResult {
+ bool needs_document_store_derived_files_rebuild = false;
+ bool needs_schema_store_derived_files_rebuild = false;
+ bool needs_term_index_rebuild = false;
+ bool needs_integer_index_rebuild = false;
+ bool needs_qualified_id_join_index_rebuild = false;
+
+ DerivedFilesRebuildResult() = default;
+
+ explicit DerivedFilesRebuildResult(
+ bool needs_document_store_derived_files_rebuild_in,
+ bool needs_schema_store_derived_files_rebuild_in,
+ bool needs_term_index_rebuild_in, bool needs_integer_index_rebuild_in,
+ bool needs_qualified_id_join_index_rebuild_in)
+ : needs_document_store_derived_files_rebuild(
+ needs_document_store_derived_files_rebuild_in),
+ needs_schema_store_derived_files_rebuild(
+ needs_schema_store_derived_files_rebuild_in),
+ needs_term_index_rebuild(needs_term_index_rebuild_in),
+ needs_integer_index_rebuild(needs_integer_index_rebuild_in),
+ needs_qualified_id_join_index_rebuild(
+ needs_qualified_id_join_index_rebuild_in) {}
+
+ bool IsRebuildNeeded() const {
+ return needs_document_store_derived_files_rebuild ||
+ needs_schema_store_derived_files_rebuild ||
+ needs_term_index_rebuild || needs_integer_index_rebuild ||
+ needs_qualified_id_join_index_rebuild;
+ }
+
+ bool operator==(const DerivedFilesRebuildResult& other) const {
+ return needs_document_store_derived_files_rebuild ==
+ other.needs_document_store_derived_files_rebuild &&
+ needs_schema_store_derived_files_rebuild ==
+ other.needs_schema_store_derived_files_rebuild &&
+ needs_term_index_rebuild == other.needs_term_index_rebuild &&
+ needs_integer_index_rebuild == other.needs_integer_index_rebuild &&
+ needs_qualified_id_join_index_rebuild ==
+ other.needs_qualified_id_join_index_rebuild;
+ }
+
+ void CombineWithOtherRebuildResultOr(const DerivedFilesRebuildResult& other) {
+ needs_document_store_derived_files_rebuild =
+ needs_document_store_derived_files_rebuild ||
+ other.needs_document_store_derived_files_rebuild;
+ needs_schema_store_derived_files_rebuild =
+ needs_schema_store_derived_files_rebuild ||
+ other.needs_schema_store_derived_files_rebuild;
+ needs_term_index_rebuild =
+ needs_term_index_rebuild || other.needs_term_index_rebuild;
+ needs_integer_index_rebuild =
+ needs_integer_index_rebuild || other.needs_integer_index_rebuild;
+ needs_qualified_id_join_index_rebuild =
+ needs_qualified_id_join_index_rebuild ||
+ other.needs_qualified_id_join_index_rebuild;
+ }
+};
+
+// There are two icing version files:
+// 1. V1 version file contains version and max_version info of the existing
+// data.
+// 2. V2 version file writes the version information using
+// FileBackedProto<IcingSearchEngineVersionProto>. This contains information
+// about the version's enabled trunk stable features in addition to the
+// version numbers written for V1.
+//
+// Both version files must be written to maintain backwards compatibility.
+inline std::string MakeVersionFilePath(std::string_view version_file_dir,
+ std::string_view version_file_name) {
+ return absl_ports::StrCat(version_file_dir, "/", version_file_name);
+}
+
+// Returns a VersionInfo from a given IcingSearchEngineVersionProto.
+inline VersionInfo GetVersionInfoFromProto(
+ const IcingSearchEngineVersionProto& version_proto) {
+ return VersionInfo(version_proto.version(), version_proto.max_version());
+}
+
+
+// Reads the IcingSearchEngineVersionProto from the version files of the
+// existing data.
+//
+// This method reads both the v1 and v2 version files, and returns the v1
+// version numbers in the absence of the v2 version file. If there is a mismatch
+// between the v1 and v2 version numbers, or if the state is invalid (e.g. flash
+// index header file is missing), then an invalid VersionInfo is returned.
//
// RETURNS:
-// - Existing data's VersionInfo on success
+// - Existing data's IcingSearchEngineVersionProto on success
// - INTERNAL_ERROR on I/O errors
-libtextclassifier3::StatusOr<VersionInfo> ReadVersion(
- const Filesystem& filesystem, const std::string& version_file_path,
+libtextclassifier3::StatusOr<IcingSearchEngineVersionProto> ReadVersion(
+ const Filesystem& filesystem, const std::string& version_file_dir,
const std::string& index_base_dir);
-// Helper method to write version file.
+// Writes the v1 version file. V1 version file is written for all versions and
+// contains only Icing's VersionInfo (version number and max_version)
//
// RETURNS:
// - OK on success
// - INTERNAL_ERROR on I/O errors
-libtextclassifier3::Status WriteVersion(const Filesystem& filesystem,
- const std::string& version_file_path,
- const VersionInfo& version_info);
+libtextclassifier3::Status WriteV1Version(const Filesystem& filesystem,
+ const std::string& version_file_dir,
+ const VersionInfo& version_info);
-// Helper method to determine the change state between the existing data version
-// and the current code version.
+// Writes the v2 version file. V2 version file writes the version information
+// using FileBackedProto<IcingSearchEngineVersionProto>.
+//
+// REQUIRES: version_proto.version >= kFirstV2Version. We implement v2 version
+// checking in kFirstV2Version, so callers will always use a version # greater
+// than this.
+//
+// RETURNS:
+// - OK on success
+// - INTERNAL_ERROR on I/O errors
+libtextclassifier3::Status WriteV2Version(
+ const Filesystem& filesystem, const std::string& version_file_dir,
+ std::unique_ptr<IcingSearchEngineVersionProto> version_proto);
+
+// Deletes Icing's version files from version_file_dir.
+//
+// Returns:
+// - OK on success
+// - INTERNAL_ERROR on I/O error
+libtextclassifier3::Status DiscardVersionFiles(
+ const Filesystem& filesystem, std::string_view version_file_dir);
+
+// Determines the change state between the existing data version and the current
+// code version.
//
// REQUIRES: curr_version > 0. We implement version checking in version 1, so
// the callers (except unit tests) will always use a version # greater than 0.
@@ -97,7 +227,19 @@ libtextclassifier3::Status WriteVersion(const Filesystem& filesystem,
StateChange GetVersionStateChange(const VersionInfo& existing_version_info,
int32_t curr_version = kVersion);
-// Helper method to determine whether Icing should rebuild all derived files.
+// Determines the derived files that need to be rebuilt between Icing's existing
+// data based on previous data version and enabled features, and the current
+// code version and enabled features.
+//
+// REQUIRES: curr_version >= kFirstV2Version. We implement v2 version checking
+// in kFirstV2Version, so callers will always use a version # greater than this.
+//
+// RETURNS: DerivedFilesRebuildResult
+DerivedFilesRebuildResult CalculateRequiredDerivedFilesRebuild(
+ const IcingSearchEngineVersionProto& prev_version_proto,
+ const IcingSearchEngineVersionProto& curr_version_proto);
+
+// Determines whether Icing should rebuild all derived files.
// Sometimes it is not required to rebuild derived files when
// roll-forward/upgrading. This function "encodes" upgrade paths and checks if
// the roll-forward/upgrading requires derived files to be rebuilt or not.
@@ -107,6 +249,24 @@ StateChange GetVersionStateChange(const VersionInfo& existing_version_info,
bool ShouldRebuildDerivedFiles(const VersionInfo& existing_version_info,
int32_t curr_version = kVersion);
+// Returns the derived files rebuilds required for a given feature.
+DerivedFilesRebuildResult GetFeatureDerivedFilesRebuildResult(
+ IcingSearchEngineFeatureInfoProto::FlaggedFeatureType feature);
+
+// Constructs the IcingSearchEngineFeatureInfoProto for a given feature.
+IcingSearchEngineFeatureInfoProto GetFeatureInfoProto(
+ IcingSearchEngineFeatureInfoProto::FlaggedFeatureType feature);
+
+// Populates the enabled_features field for an IcingSearchEngineFeatureInfoProto
+// based on icing's initialization flag options.
+//
+// All enabled features are converted into an IcingSearchEngineFeatureInfoProto
+// and returned in IcingSearchEngineVersionProto::enabled_features. A conversion
+// should be added for each trunk stable feature flag defined in
+// IcingSearchEngineOptions.
+void AddEnabledFeatures(const IcingSearchEngineOptions& options,
+ IcingSearchEngineVersionProto* version_proto);
+
} // namespace version_util
} // namespace lib
diff --git a/icing/file/version-util_test.cc b/icing/file/version-util_test.cc
index 9dedb1d..f2922e2 100644
--- a/icing/file/version-util_test.cc
+++ b/icing/file/version-util_test.cc
@@ -14,14 +14,19 @@
#include "icing/file/version-util.h"
+#include <cstdint>
+#include <memory>
#include <optional>
#include <string>
+#include <unordered_set>
#include <utility>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "icing/file/filesystem.h"
#include "icing/file/posting_list/flash-index-storage-header.h"
+#include "icing/portable/equals-proto.h"
+#include "icing/proto/initialize.pb.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/tmp-directory.h"
@@ -32,19 +37,38 @@ namespace version_util {
namespace {
using ::testing::Eq;
+using ::testing::IsEmpty;
using ::testing::IsFalse;
using ::testing::IsTrue;
+IcingSearchEngineVersionProto MakeTestVersionProto(
+ const VersionInfo& version_info,
+ const std::unordered_set<
+ IcingSearchEngineFeatureInfoProto::FlaggedFeatureType>& features_set) {
+ IcingSearchEngineVersionProto version_proto;
+ version_proto.set_version(version_info.version);
+ version_proto.set_max_version(version_info.max_version);
+
+ auto* enabled_features = version_proto.mutable_enabled_features();
+ for (const auto& feature : features_set) {
+ enabled_features->Add(GetFeatureInfoProto(feature));
+ }
+ return version_proto;
+}
+
struct VersionUtilReadVersionTestParam {
- std::optional<VersionInfo> existing_version_info;
+ std::optional<VersionInfo> existing_v1_version_info;
+ std::optional<VersionInfo> existing_v2_version_info;
std::optional<int> existing_flash_index_magic;
VersionInfo expected_version_info;
explicit VersionUtilReadVersionTestParam(
- std::optional<VersionInfo> existing_version_info_in,
+ std::optional<VersionInfo> existing_v1_version_info_in,
+ std::optional<VersionInfo> existing_v2_version_info_in,
std::optional<int> existing_flash_index_magic_in,
VersionInfo expected_version_info_in)
- : existing_version_info(std::move(existing_version_info_in)),
+ : existing_v1_version_info(std::move(existing_v1_version_info_in)),
+ existing_v2_version_info(std::move(existing_v2_version_info_in)),
existing_flash_index_magic(std::move(existing_flash_index_magic_in)),
expected_version_info(std::move(expected_version_info_in)) {}
};
@@ -54,7 +78,6 @@ class VersionUtilReadVersionTest
protected:
void SetUp() override {
base_dir_ = GetTestTempDir() + "/version_util_test";
- version_file_path_ = base_dir_ + "/version";
index_path_ = base_dir_ + "/index";
ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(base_dir_.c_str()));
@@ -68,19 +91,28 @@ class VersionUtilReadVersionTest
Filesystem filesystem_;
std::string base_dir_;
- std::string version_file_path_;
std::string index_path_;
};
TEST_P(VersionUtilReadVersionTest, ReadVersion) {
const VersionUtilReadVersionTestParam& param = GetParam();
+ IcingSearchEngineVersionProto dummy_version_proto;
- // Prepare version file and flash index file.
- if (param.existing_version_info.has_value()) {
- ICING_ASSERT_OK(WriteVersion(filesystem_, version_file_path_,
- param.existing_version_info.value()));
+ if (param.existing_v1_version_info.has_value()) {
+ ICING_ASSERT_OK(WriteV1Version(filesystem_, base_dir_,
+ param.existing_v1_version_info.value()));
+ }
+ if (param.existing_v2_version_info.has_value()) {
+ dummy_version_proto = MakeTestVersionProto(
+ param.existing_v2_version_info.value(),
+ {IcingSearchEngineFeatureInfoProto::FEATURE_HAS_PROPERTY_OPERATOR,
+ IcingSearchEngineFeatureInfoProto::UNKNOWN});
+ ICING_ASSERT_OK(WriteV2Version(
+ filesystem_, base_dir_,
+ std::make_unique<IcingSearchEngineVersionProto>(dummy_version_proto)));
}
+ // Prepare flash index file.
if (param.existing_flash_index_magic.has_value()) {
HeaderBlock header_block(&filesystem_, /*block_size=*/4096);
header_block.header()->magic = param.existing_flash_index_magic.value();
@@ -94,10 +126,22 @@ TEST_P(VersionUtilReadVersionTest, ReadVersion) {
ASSERT_TRUE(header_block.Write(sfd.get()));
}
- ICING_ASSERT_OK_AND_ASSIGN(
- VersionInfo version_info,
- ReadVersion(filesystem_, version_file_path_, index_path_));
- EXPECT_THAT(version_info, Eq(param.expected_version_info));
+ ICING_ASSERT_OK_AND_ASSIGN(IcingSearchEngineVersionProto version_proto,
+ ReadVersion(filesystem_, base_dir_, index_path_));
+ if (param.existing_v2_version_info.has_value() &&
+ param.expected_version_info.version ==
+ param.existing_v2_version_info.value().version) {
+ EXPECT_THAT(version_proto,
+ portable_equals_proto::EqualsProto(dummy_version_proto));
+ } else {
+ // We're returning the version from v1 version file, or an invalid version.
+ // version_proto.enabled_features should be empty in this case.
+ EXPECT_THAT(version_proto.version(),
+ Eq(param.expected_version_info.version));
+ EXPECT_THAT(version_proto.max_version(),
+ Eq(param.expected_version_info.max_version));
+ EXPECT_THAT(version_proto.enabled_features(), IsEmpty());
+ }
}
INSTANTIATE_TEST_SUITE_P(
@@ -107,7 +151,8 @@ INSTANTIATE_TEST_SUITE_P(
// - Flash index doesn't exist
// - Result: version -1, max_version -1 (invalid)
VersionUtilReadVersionTestParam(
- /*existing_version_info_in=*/std::nullopt,
+ /*existing_v1_version_info_in=*/std::nullopt,
+ /*existing_v2_version_info_in=*/std::nullopt,
/*existing_flash_index_magic_in=*/std::nullopt,
/*expected_version_info_in=*/
VersionInfo(/*version_in=*/-1, /*max_version=*/-1)),
@@ -116,7 +161,8 @@ INSTANTIATE_TEST_SUITE_P(
// - Flash index exists with version 0 magic
// - Result: version 0, max_version 0
VersionUtilReadVersionTestParam(
- /*existing_version_info_in=*/std::nullopt,
+ /*existing_v1_version_info_in=*/std::nullopt,
+ /*existing_v2_version_info_in=*/std::nullopt,
/*existing_flash_index_magic_in=*/
std::make_optional<int>(kVersionZeroFlashIndexMagic),
/*expected_version_info_in=*/
@@ -126,65 +172,157 @@ INSTANTIATE_TEST_SUITE_P(
// - Flash index exists with non version 0 magic
// - Result: version -1, max_version -1 (invalid)
VersionUtilReadVersionTestParam(
- /*existing_version_info_in=*/std::nullopt,
+ /*existing_v1_version_info_in=*/std::nullopt,
+ /*existing_v2_version_info_in=*/std::nullopt,
/*existing_flash_index_magic_in=*/
std::make_optional<int>(kVersionZeroFlashIndexMagic + 1),
/*expected_version_info_in=*/
VersionInfo(/*version_in=*/-1, /*max_version=*/-1)),
- // - Version file exists
+ // - Version file v1 exists
// - Flash index doesn't exist
// - Result: version -1, max_version 1 (invalid)
VersionUtilReadVersionTestParam(
- /*existing_version_info_in=*/std::make_optional<VersionInfo>(
+ /*existing_v1_version_info_in=*/std::make_optional<VersionInfo>(
/*version_in=*/1, /*max_version=*/1),
+ /*existing_v2_version_info_in=*/std::nullopt,
/*existing_flash_index_magic_in=*/std::nullopt,
/*expected_version_info_in=*/
VersionInfo(/*version_in=*/-1, /*max_version=*/1)),
- // - Version file exists: version 1, max_version 1
+ // - Version file v1 exists: version 1, max_version 1
// - Flash index exists with version 0 magic
// - Result: version 0, max_version 1
VersionUtilReadVersionTestParam(
- /*existing_version_info_in=*/std::make_optional<VersionInfo>(
+ /*existing_v1_version_info_in=*/std::make_optional<VersionInfo>(
/*version_in=*/1, /*max_version=*/1),
+ /*existing_v2_version_info_in=*/std::nullopt,
/*existing_flash_index_magic_in=*/
std::make_optional<int>(kVersionZeroFlashIndexMagic),
/*expected_version_info_in=*/
VersionInfo(/*version_in=*/0, /*max_version=*/1)),
- // - Version file exists: version 2, max_version 3
+ // - Version file v1 exists: version 2, max_version 3
// - Flash index exists with version 0 magic
// - Result: version 0, max_version 3
VersionUtilReadVersionTestParam(
- /*existing_version_info_in=*/std::make_optional<VersionInfo>(
+ /*existing_v1_version_info_in=*/std::make_optional<VersionInfo>(
/*version_in=*/2, /*max_version=*/3),
+ /*existing_v2_version_info_in=*/std::nullopt,
/*existing_flash_index_magic_in=*/
std::make_optional<int>(kVersionZeroFlashIndexMagic),
/*expected_version_info_in=*/
VersionInfo(/*version_in=*/0, /*max_version=*/3)),
- // - Version file exists: version 1, max_version 1
+ // - Version file v1 exists: version 1, max_version 1
// - Flash index exists with non version 0 magic
// - Result: version 1, max_version 1
VersionUtilReadVersionTestParam(
- /*existing_version_info_in=*/std::make_optional<VersionInfo>(
+ /*existing_v1_version_info_in=*/std::make_optional<VersionInfo>(
/*version_in=*/1, /*max_version=*/1),
+ /*existing_v2_version_info_in=*/std::nullopt,
/*existing_flash_index_magic_in=*/
std::make_optional<int>(kVersionZeroFlashIndexMagic + 1),
/*expected_version_info_in=*/
VersionInfo(/*version_in=*/1, /*max_version=*/1)),
- // - Version file exists: version 2, max_version 3
+ // - Version file v1 exists: version 2, max_version 3
// - Flash index exists with non version 0 magic
// - Result: version 2, max_version 3
VersionUtilReadVersionTestParam(
- /*existing_version_info_in=*/std::make_optional<VersionInfo>(
+ /*existing_v1_version_info_in=*/std::make_optional<VersionInfo>(
/*version_in=*/2, /*max_version=*/3),
+ /*existing_v2_version_info_in=*/std::nullopt,
+ /*existing_flash_index_magic_in=*/
+ std::make_optional<int>(kVersionZeroFlashIndexMagic + 1),
+ /*expected_version_info_in=*/
+ VersionInfo(/*version_in=*/2, /*max_version=*/3)),
+
+ // - Version file v1 exists: version 2, max_version 4
+ // - Version file v2 exists: version 4, max_version 4
+ // - Flash index exists with non version 0 magic
+ // - Result: version -1, max_version 4
+ VersionUtilReadVersionTestParam(
+ /*existing_v1_version_info_in=*/std::make_optional<VersionInfo>(
+ /*version_in=*/2, /*max_version=*/4),
+ /*existing_v2_version_info_in=*/
+ std::make_optional<VersionInfo>(
+ /*version_in=*/4, /*max_version=*/4),
+ /*existing_flash_index_magic_in=*/
+ std::make_optional<int>(kVersionZeroFlashIndexMagic + 1),
+ /*expected_version_info_in=*/
+ VersionInfo(/*version_in=*/-1, /*max_version=*/4)),
+
+ // - Version file v1 exists: version 4, max_version 4
+ // - Version file v2 exists: version 4, max_version 4
+ // - Flash index exists with version 0 magic
+ // - Result: version -1, max_version 4
+ VersionUtilReadVersionTestParam(
+ /*existing_v1_version_info_in=*/std::make_optional<VersionInfo>(
+ /*version_in=*/4, /*max_version=*/4),
+ /*existing_v2_version_info_in=*/
+ std::make_optional<VersionInfo>(
+ /*version_in=*/4, /*max_version=*/4),
+ /*existing_flash_index_magic_in=*/
+ std::make_optional<int>(kVersionZeroFlashIndexMagic),
+ /*expected_version_info_in=*/
+ VersionInfo(/*version_in=*/-1, /*max_version=*/4)),
+
+ // - Version file v1 exists: version 4, max_version 4
+ // - Version file v2 exists: version 4, max_version 4
+ // - Flash index exists with non version 0 magic
+ // - Result: version 4, max_version 4
+ VersionUtilReadVersionTestParam(
+ /*existing_v1_version_info_in=*/std::make_optional<VersionInfo>(
+ /*version_in=*/4, /*max_version=*/4),
+ /*existing_v2_version_info_in=*/
+ std::make_optional<VersionInfo>(
+ /*version_in=*/4, /*max_version=*/4),
+ /*existing_flash_index_magic_in=*/
+ std::make_optional<int>(kVersionZeroFlashIndexMagic + 1),
+ /*expected_version_info_in=*/
+ VersionInfo(/*version_in=*/4, /*max_version=*/4)),
+
+ // - Version file v1 exists: version 4, max_version 4
+ // - Version file v2 does not exist
+ // - Flash index exists with non version 0 magic
+ // - Result: version -1, max_version 4
+ VersionUtilReadVersionTestParam(
+ /*existing_v1_version_info_in=*/std::make_optional<VersionInfo>(
+ /*version_in=*/4, /*max_version=*/4),
+ /*existing_v2_version_info_in=*/std::nullopt,
/*existing_flash_index_magic_in=*/
std::make_optional<int>(kVersionZeroFlashIndexMagic + 1),
/*expected_version_info_in=*/
- VersionInfo(/*version_in=*/2, /*max_version=*/3))));
+ VersionInfo(/*version_in=*/-1, /*max_version=*/4)),
+
+ // - Version file v1 does not exist
+ // - Version file v2 exists: version 4, max_version 4
+ // - Flash index exists with non version 0 magic
+ // - Result: version -1, max_version -1
+ VersionUtilReadVersionTestParam(
+ /*existing_v1_version_info_in=*/std::nullopt,
+ /*existing_v2_version_info_in=*/
+ std::make_optional<VersionInfo>(
+ /*version_in=*/4, /*max_version=*/4),
+ /*existing_flash_index_magic_in=*/
+ std::make_optional<int>(kVersionZeroFlashIndexMagic + 1),
+ /*expected_version_info_in=*/
+ VersionInfo(/*version_in=*/-1, /*max_version=*/-1)),
+
+ // - Version file v1 doesn't exist
+ // - Version file v2 exists: version 4, max_version 4
+ // - Flash index doesn't exist
+ // - Result: version -1, max_version -1 (invalid since flash index
+ // doesn't exist)
+ VersionUtilReadVersionTestParam(
+ /*existing_v1_version_info_in=*/std::nullopt,
+ /*existing_v2_version_info_in=*/
+ std::make_optional<VersionInfo>(
+ /*version_in=*/4, /*max_version=*/4),
+ /*existing_flash_index_magic_in=*/std::nullopt,
+ /*expected_version_info_in=*/
+ VersionInfo(/*version_in=*/-1, /*max_version=*/-1))));
struct VersionUtilStateChangeTestParam {
VersionInfo existing_version_info;
@@ -389,6 +527,240 @@ INSTANTIATE_TEST_SUITE_P(
/*curr_version_in=*/2,
/*expected_state_change_in=*/StateChange::kRollBack)));
+struct VersionUtilDerivedFilesRebuildTestParam {
+ int32_t existing_version;
+ int32_t max_version;
+ std::unordered_set<IcingSearchEngineFeatureInfoProto::FlaggedFeatureType>
+ existing_enabled_features;
+ int32_t curr_version;
+ std::unordered_set<IcingSearchEngineFeatureInfoProto::FlaggedFeatureType>
+ curr_enabled_features;
+ DerivedFilesRebuildResult expected_derived_files_rebuild_result;
+
+ explicit VersionUtilDerivedFilesRebuildTestParam(
+ int32_t existing_version_in, int32_t max_version_in,
+ std::unordered_set<IcingSearchEngineFeatureInfoProto::FlaggedFeatureType>
+ existing_enabled_features_in,
+ int32_t curr_version_in,
+ std::unordered_set<IcingSearchEngineFeatureInfoProto::FlaggedFeatureType>
+ curr_enabled_features_in,
+ DerivedFilesRebuildResult expected_derived_files_rebuild_result_in)
+ : existing_version(existing_version_in),
+ max_version(max_version_in),
+ existing_enabled_features(std::move(existing_enabled_features_in)),
+ curr_version(curr_version_in),
+ curr_enabled_features(std::move(curr_enabled_features_in)),
+ expected_derived_files_rebuild_result(
+ std::move(expected_derived_files_rebuild_result_in)) {}
+};
+
+class VersionUtilDerivedFilesRebuildTest
+ : public ::testing::TestWithParam<VersionUtilDerivedFilesRebuildTestParam> {
+};
+
+TEST_P(VersionUtilDerivedFilesRebuildTest,
+ CalculateRequiredDerivedFilesRebuild) {
+ const VersionUtilDerivedFilesRebuildTestParam& param = GetParam();
+
+ EXPECT_THAT(CalculateRequiredDerivedFilesRebuild(
+ /*prev_version_proto=*/MakeTestVersionProto(
+ VersionInfo(param.existing_version, param.max_version),
+ param.existing_enabled_features),
+ /*curr_version_proto=*/
+ MakeTestVersionProto(
+ VersionInfo(param.curr_version, param.max_version),
+ param.curr_enabled_features)),
+ Eq(param.expected_derived_files_rebuild_result));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+ VersionUtilDerivedFilesRebuildTest, VersionUtilDerivedFilesRebuildTest,
+ testing::Values(
+ // - Existing version -1, max_version -1 (invalid)
+ // - Existing enabled features = {}
+ // - Current version = 4
+ // - Current enabled features = {}
+ //
+ // - Result: rebuild everything
+ VersionUtilDerivedFilesRebuildTestParam(
+ /*existing_version_in=*/-1, /*max_version_in=*/-1,
+ /*existing_enabled_features_in=*/{}, /*curr_version_in=*/4,
+ /*curr_enabled_features_in=*/{},
+ /*expected_derived_files_rebuild_result_in=*/
+ DerivedFilesRebuildResult(
+ /*needs_document_store_derived_files_rebuild=*/true,
+ /*needs_schema_store_derived_files_rebuild=*/true,
+ /*needs_term_index_rebuild=*/true,
+ /*needs_integer_index_rebuild=*/true,
+ /*needs_qualified_id_join_index_rebuild=*/true)),
+
+ // - Existing version -1, max_version 2 (invalid)
+ // - Existing enabled features = {}
+ // - Current version = 4
+ // - Current enabled features = {}
+ //
+ // - Result: rebuild everything
+ VersionUtilDerivedFilesRebuildTestParam(
+ /*existing_version_in=*/-1, /*max_version_in=*/-1,
+ /*existing_enabled_features_in=*/{}, /*curr_version_in=*/4,
+ /*curr_enabled_features_in=*/{},
+ /*expected_derived_files_rebuild_result_in=*/
+ DerivedFilesRebuildResult(
+ /*needs_document_store_derived_files_rebuild=*/true,
+ /*needs_schema_store_derived_files_rebuild=*/true,
+ /*needs_term_index_rebuild=*/true,
+ /*needs_integer_index_rebuild=*/true,
+ /*needs_qualified_id_join_index_rebuild=*/true)),
+
+ // - Existing version 3, max_version 3 (pre v2 version check)
+ // - Existing enabled features = {}
+ // - Current version = 4
+ // - Current enabled features = {}
+ //
+ // - Result: don't rebuild anything
+ VersionUtilDerivedFilesRebuildTestParam(
+ /*existing_version_in=*/3, /*max_version_in=*/3,
+ /*existing_enabled_features_in=*/{}, /*curr_version_in=*/4,
+ /*curr_enabled_features_in=*/{},
+ /*expected_derived_files_rebuild_result_in=*/
+ DerivedFilesRebuildResult(
+ /*needs_document_store_derived_files_rebuild=*/false,
+ /*needs_schema_store_derived_files_rebuild=*/false,
+ /*needs_term_index_rebuild=*/false,
+ /*needs_integer_index_rebuild=*/false,
+ /*needs_qualified_id_join_index_rebuild=*/false)),
+
+ // - Existing version 3, max_version 3 (pre v2 version check)
+ // - Existing enabled features = {}
+ // - Current version = 4
+ // - Current enabled features = {FEATURE_HAS_PROPERTY_OPERATOR}
+ //
+ // - Result: rebuild term index
+ VersionUtilDerivedFilesRebuildTestParam(
+ /*existing_version_in=*/3, /*max_version_in=*/3,
+ /*existing_enabled_features_in=*/{}, /*curr_version_in=*/4,
+ /*curr_enabled_features_in=*/
+ {IcingSearchEngineFeatureInfoProto::FEATURE_HAS_PROPERTY_OPERATOR},
+ /*expected_derived_files_rebuild_result_in=*/
+ DerivedFilesRebuildResult(
+ /*needs_document_store_derived_files_rebuild=*/false,
+ /*needs_schema_store_derived_files_rebuild=*/false,
+ /*needs_term_index_rebuild=*/true,
+ /*needs_integer_index_rebuild=*/false,
+ /*needs_qualified_id_join_index_rebuild=*/false)),
+
+ // - Existing version 4, max_version 4
+ // - Existing enabled features = {}
+ // - Current version = 4
+ // - Current enabled features = {}
+ //
+ // - Result: don't rebuild anything
+ VersionUtilDerivedFilesRebuildTestParam(
+ /*existing_version_in=*/4, /*max_version_in=*/4,
+ /*existing_enabled_features_in=*/{}, /*curr_version_in=*/4,
+ /*curr_enabled_features_in=*/{},
+ /*expected_derived_files_rebuild_result_in=*/
+ DerivedFilesRebuildResult(
+ /*needs_document_store_derived_files_rebuild=*/false,
+ /*needs_schema_store_derived_files_rebuild=*/false,
+ /*needs_term_index_rebuild=*/false,
+ /*needs_integer_index_rebuild=*/false,
+ /*needs_qualified_id_join_index_rebuild=*/false)),
+
+ // - Existing version 4, max_version 5
+ // - Existing enabled features = {}
+ // - Current version = 5
+ // - Current enabled features = {}
+ //
+ // - Result: Rollforward -- rebuild everything
+ VersionUtilDerivedFilesRebuildTestParam(
+ /*existing_version_in=*/4, /*max_version_in=*/5,
+ /*existing_enabled_features_in=*/{}, /*curr_version_in=*/5,
+ /*curr_enabled_features_in=*/{},
+ /*expected_derived_files_rebuild_result_in=*/
+ DerivedFilesRebuildResult(
+ /*needs_document_store_derived_files_rebuild=*/true,
+ /*needs_schema_store_derived_files_rebuild=*/true,
+ /*needs_term_index_rebuild=*/true,
+ /*needs_integer_index_rebuild=*/true,
+ /*needs_qualified_id_join_index_rebuild=*/true)),
+
+ // - Existing version 5, max_version 5
+ // - Existing enabled features = {}
+ // - Current version = 4
+ // - Current enabled features = {}
+ //
+ // - Result: Rollback -- rebuild everything
+ VersionUtilDerivedFilesRebuildTestParam(
+ /*existing_version_in=*/5, /*max_version_in=*/5,
+ /*existing_enabled_features_in=*/{}, /*curr_version_in=*/4,
+ /*curr_enabled_features_in=*/{},
+ /*expected_derived_files_rebuild_result_in=*/
+ DerivedFilesRebuildResult(
+ /*needs_document_store_derived_files_rebuild=*/true,
+ /*needs_schema_store_derived_files_rebuild=*/true,
+ /*needs_term_index_rebuild=*/true,
+ /*needs_integer_index_rebuild=*/true,
+ /*needs_qualified_id_join_index_rebuild=*/true)),
+
+ // - Existing version 4, max_version 4
+ // - Existing enabled features = {}
+ // - Current version = 4
+ // - Current enabled features = {FEATURE_HAS_PROPERTY_OPERATOR}
+ //
+ // - Result: rebuild term index
+ VersionUtilDerivedFilesRebuildTestParam(
+ /*existing_version_in=*/4, /*max_version_in=*/4,
+ /*existing_enabled_features_in=*/{}, /*curr_version_in=*/4,
+ /*curr_enabled_features_in=*/
+ {IcingSearchEngineFeatureInfoProto::FEATURE_HAS_PROPERTY_OPERATOR},
+ /*expected_derived_files_rebuild_result_in=*/
+ DerivedFilesRebuildResult(
+ /*needs_document_store_derived_files_rebuild=*/false,
+ /*needs_schema_store_derived_files_rebuild=*/false,
+ /*needs_term_index_rebuild=*/true,
+ /*needs_integer_index_rebuild=*/false,
+ /*needs_qualified_id_join_index_rebuild=*/false)),
+
+ // - Existing version 4, max_version 4
+ // - Existing enabled features = {FEATURE_HAS_PROPERTY_OPERATOR}
+ // - Current version = 4
+ // - Current enabled features = {}
+ //
+ // - Result: rebuild term index
+ VersionUtilDerivedFilesRebuildTestParam(
+ /*existing_version_in=*/4, /*max_version_in=*/4,
+ /*existing_enabled_features_in=*/
+ {IcingSearchEngineFeatureInfoProto::FEATURE_HAS_PROPERTY_OPERATOR},
+ /*curr_version_in=*/4, /*curr_enabled_features_in=*/{},
+ /*expected_derived_files_rebuild_result_in=*/
+ DerivedFilesRebuildResult(
+ /*needs_document_store_derived_files_rebuild=*/false,
+ /*needs_schema_store_derived_files_rebuild=*/false,
+ /*needs_term_index_rebuild=*/true,
+ /*needs_integer_index_rebuild=*/false,
+ /*needs_qualified_id_join_index_rebuild=*/false)),
+
+ // - Existing version 4, max_version 4
+ // - Existing enabled features = {UNKNOWN}
+ // - Current version = 4
+ // - Current enabled features = {FEATURE_HAS_PROPERTY_OPERATOR}
+ //
+ // - Result: rebuild everything
+ VersionUtilDerivedFilesRebuildTestParam(
+ /*existing_version_in=*/4, /*max_version_in=*/4,
+ /*existing_enabled_features_in=*/
+ {IcingSearchEngineFeatureInfoProto::UNKNOWN}, /*curr_version_in=*/4,
+ /*curr_enabled_features_in=*/
+ {IcingSearchEngineFeatureInfoProto::FEATURE_HAS_PROPERTY_OPERATOR},
+ /*expected_derived_files_rebuild_result_in=*/
+ DerivedFilesRebuildResult(
+ /*needs_document_store_derived_files_rebuild=*/true,
+ /*needs_schema_store_derived_files_rebuild=*/true,
+ /*needs_term_index_rebuild=*/true,
+ /*needs_integer_index_rebuild=*/true,
+ /*needs_qualified_id_join_index_rebuild=*/true))));
+
TEST(VersionUtilTest, ShouldRebuildDerivedFilesUndeterminedVersion) {
EXPECT_THAT(
ShouldRebuildDerivedFiles(VersionInfo(-1, -1), /*curr_version=*/1),
@@ -475,8 +847,80 @@ TEST(VersionUtilTest, Upgrade) {
EXPECT_THAT(ShouldRebuildDerivedFiles(VersionInfo(kVersionOne, kVersionOne),
/*curr_version=*/kVersionThree),
IsFalse());
+
+ // kVersionThree -> kVersionFour.
+ EXPECT_THAT(
+ ShouldRebuildDerivedFiles(VersionInfo(kVersionThree, kVersionThree),
+ /*curr_version=*/kVersionFour),
+ IsFalse());
+
+ // kVersionTwo -> kVersionFour
+ EXPECT_THAT(ShouldRebuildDerivedFiles(VersionInfo(kVersionTwo, kVersionTwo),
+ /*curr_version=*/kVersionFour),
+ IsFalse());
+
+ // kVersionOne -> kVersionFour.
+ EXPECT_THAT(ShouldRebuildDerivedFiles(VersionInfo(kVersionOne, kVersionOne),
+ /*curr_version=*/kVersionFour),
+ IsFalse());
+}
+
+TEST(VersionUtilTest, GetFeatureDerivedFilesRebuildResult_unknown) {
+ EXPECT_THAT(GetFeatureDerivedFilesRebuildResult(
+ IcingSearchEngineFeatureInfoProto::UNKNOWN),
+ Eq(DerivedFilesRebuildResult(
+ /*needs_document_store_derived_files_rebuild=*/true,
+ /*needs_schema_store_derived_files_rebuild=*/true,
+ /*needs_term_index_rebuild=*/true,
+ /*needs_integer_index_rebuild=*/true,
+ /*needs_qualified_id_join_index_rebuild=*/true)));
}
+TEST(VersionUtilTest,
+ GetFeatureDerivedFilesRebuildResult_featureHasPropertyOperator) {
+ EXPECT_THAT(
+ GetFeatureDerivedFilesRebuildResult(
+ IcingSearchEngineFeatureInfoProto::FEATURE_HAS_PROPERTY_OPERATOR),
+ Eq(DerivedFilesRebuildResult(
+ /*needs_document_store_derived_files_rebuild=*/false,
+ /*needs_schema_store_derived_files_rebuild=*/false,
+ /*needs_term_index_rebuild=*/true,
+ /*needs_integer_index_rebuild=*/false,
+ /*needs_qualified_id_join_index_rebuild=*/false)));
+}
+
+class VersionUtilFeatureProtoTest
+ : public ::testing::TestWithParam<
+ IcingSearchEngineFeatureInfoProto::FlaggedFeatureType> {};
+
+TEST_P(VersionUtilFeatureProtoTest, GetFeatureInfoProto) {
+ IcingSearchEngineFeatureInfoProto::FlaggedFeatureType feature_type =
+ GetParam();
+ DerivedFilesRebuildResult rebuild_result =
+ GetFeatureDerivedFilesRebuildResult(feature_type);
+
+ IcingSearchEngineFeatureInfoProto feature_info =
+ GetFeatureInfoProto(feature_type);
+ EXPECT_THAT(feature_info.feature_type(), Eq(feature_type));
+
+ EXPECT_THAT(feature_info.needs_document_store_rebuild(),
+ Eq(rebuild_result.needs_document_store_derived_files_rebuild));
+ EXPECT_THAT(feature_info.needs_schema_store_rebuild(),
+ Eq(rebuild_result.needs_schema_store_derived_files_rebuild));
+ EXPECT_THAT(feature_info.needs_term_index_rebuild(),
+ Eq(rebuild_result.needs_term_index_rebuild));
+ EXPECT_THAT(feature_info.needs_integer_index_rebuild(),
+ Eq(rebuild_result.needs_integer_index_rebuild));
+ EXPECT_THAT(feature_info.needs_qualified_id_join_index_rebuild(),
+ Eq(rebuild_result.needs_qualified_id_join_index_rebuild));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+ VersionUtilFeatureProtoTest, VersionUtilFeatureProtoTest,
+ testing::Values(
+ IcingSearchEngineFeatureInfoProto::UNKNOWN,
+ IcingSearchEngineFeatureInfoProto::FEATURE_HAS_PROPERTY_OPERATOR));
+
} // namespace
} // namespace version_util
diff --git a/icing/icing-search-engine.cc b/icing/icing-search-engine.cc
index 72be4e9..72e744b 100644
--- a/icing/icing-search-engine.cc
+++ b/icing/icing-search-engine.cc
@@ -14,7 +14,10 @@
#include "icing/icing-search-engine.h"
+#include <algorithm>
+#include <cstddef>
#include <cstdint>
+#include <functional>
#include <memory>
#include <string>
#include <string_view>
@@ -41,12 +44,16 @@
#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/index/numeric/integer-index.h"
#include "icing/index/term-indexing-handler.h"
+#include "icing/index/term-metadata.h"
+#include "icing/jni/jni-cache.h"
+#include "icing/join/join-children-fetcher.h"
#include "icing/join/join-processor.h"
#include "icing/join/qualified-id-join-index-impl-v1.h"
#include "icing/join/qualified-id-join-index-impl-v2.h"
#include "icing/join/qualified-id-join-index.h"
#include "icing/join/qualified-id-join-indexing-handler.h"
#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/performance-configuration.h"
#include "icing/portable/endian.h"
#include "icing/proto/debug.pb.h"
#include "icing/proto/document.pb.h"
@@ -73,9 +80,8 @@
#include "icing/result/projector.h"
#include "icing/result/result-adjustment-info.h"
#include "icing/result/result-retriever-v2.h"
+#include "icing/result/result-state-manager.h"
#include "icing/schema/schema-store.h"
-#include "icing/schema/schema-util.h"
-#include "icing/schema/section.h"
#include "icing/scoring/advanced_scoring/score-expression.h"
#include "icing/scoring/priority-queue-scored-document-hits-ranker.h"
#include "icing/scoring/scored-document-hit.h"
@@ -84,11 +90,8 @@
#include "icing/store/document-id.h"
#include "icing/store/document-store.h"
#include "icing/tokenization/language-segmenter-factory.h"
-#include "icing/tokenization/language-segmenter.h"
#include "icing/transform/normalizer-factory.h"
-#include "icing/transform/normalizer.h"
#include "icing/util/clock.h"
-#include "icing/util/crc32.h"
#include "icing/util/data-loss.h"
#include "icing/util/logging.h"
#include "icing/util/status-macros.h"
@@ -100,7 +103,6 @@ namespace lib {
namespace {
-constexpr std::string_view kVersionFilename = "version";
constexpr std::string_view kDocumentSubfolderName = "document_dir";
constexpr std::string_view kIndexSubfolderName = "index_dir";
constexpr std::string_view kIntegerIndexSubfolderName = "integer_index_dir";
@@ -253,12 +255,6 @@ CreateQualifiedIdJoinIndex(const Filesystem& filesystem,
}
}
-// Version file is a single file under base_dir containing version info of the
-// existing data.
-std::string MakeVersionFilePath(const std::string& base_dir) {
- return absl_ports::StrCat(base_dir, "/", kVersionFilename);
-}
-
// Document store files are in a standalone subfolder for easier file
// management. We can delete and recreate the subfolder and not touch/affect
// anything else.
@@ -604,32 +600,49 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers(
return status;
}
- // Read version file and determine the state change.
- const std::string version_filepath = MakeVersionFilePath(options_.base_dir());
+ // Do version and flags compatibility check
+ // Read version file, determine the state change and rebuild derived files if
+ // needed.
const std::string index_dir = MakeIndexDirectoryPath(options_.base_dir());
ICING_ASSIGN_OR_RETURN(
- version_util::VersionInfo version_info,
- version_util::ReadVersion(*filesystem_, version_filepath, index_dir));
+ IcingSearchEngineVersionProto stored_version_proto,
+ version_util::ReadVersion(
+ *filesystem_, /*version_file_dir=*/options_.base_dir(), index_dir));
+ version_util::VersionInfo stored_version_info =
+ version_util::GetVersionInfoFromProto(stored_version_proto);
version_util::StateChange version_state_change =
- version_util::GetVersionStateChange(version_info);
+ version_util::GetVersionStateChange(stored_version_info);
+
+ // Construct icing's current version proto based on the current code version
+ IcingSearchEngineVersionProto current_version_proto;
+ current_version_proto.set_version(version_util::kVersion);
+ current_version_proto.set_max_version(
+ std::max(stored_version_info.max_version, version_util::kVersion));
+ version_util::AddEnabledFeatures(options_, &current_version_proto);
+
+ // Step 1: If versions are incompatible, migrate schema according to the
+ // version state change.
if (version_state_change != version_util::StateChange::kCompatible) {
- // Step 1: migrate schema according to the version state change.
ICING_RETURN_IF_ERROR(SchemaStore::MigrateSchema(
filesystem_.get(), MakeSchemaDirectoryPath(options_.base_dir()),
version_state_change, version_util::kVersion));
+ }
- // Step 2: discard all derived data if needed rebuild.
- if (version_util::ShouldRebuildDerivedFiles(version_info)) {
- ICING_RETURN_IF_ERROR(DiscardDerivedFiles());
- }
+ // Step 2: Discard derived files that need to be rebuilt
+ version_util::DerivedFilesRebuildResult required_derived_files_rebuild =
+ version_util::CalculateRequiredDerivedFilesRebuild(stored_version_proto,
+ current_version_proto);
+ ICING_RETURN_IF_ERROR(DiscardDerivedFiles(required_derived_files_rebuild));
- // Step 3: update version file
- version_util::VersionInfo new_version_info(
- version_util::kVersion,
- std::max(version_info.max_version, version_util::kVersion));
- ICING_RETURN_IF_ERROR(version_util::WriteVersion(
- *filesystem_, version_filepath, new_version_info));
- }
+ // Step 3: update version files. We need to update both the V1 and V2
+ // version files.
+ ICING_RETURN_IF_ERROR(version_util::WriteV1Version(
+ *filesystem_, /*version_file_dir=*/options_.base_dir(),
+ version_util::GetVersionInfoFromProto(current_version_proto)));
+ ICING_RETURN_IF_ERROR(version_util::WriteV2Version(
+ *filesystem_, /*version_file_dir=*/options_.base_dir(),
+ std::make_unique<IcingSearchEngineVersionProto>(
+ std::move(current_version_proto))));
ICING_RETURN_IF_ERROR(InitializeSchemaStore(initialize_stats));
@@ -688,10 +701,9 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers(
// We're going to need to build the index from scratch. So just delete its
// directory now.
// Discard index directory and instantiate a new one.
- Index::Options index_options(
- index_dir, options_.index_merge_size(),
- options_.lite_index_sort_at_indexing(), options_.lite_index_sort_size(),
- options_.build_property_existence_metadata_hits());
+ Index::Options index_options(index_dir, options_.index_merge_size(),
+ options_.lite_index_sort_at_indexing(),
+ options_.lite_index_sort_size());
if (!filesystem_->DeleteDirectoryRecursively(index_dir.c_str()) ||
!filesystem_->CreateDirectoryRecursively(index_dir.c_str())) {
return absl_ports::InternalError(
@@ -776,6 +788,31 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers(
if (!index_init_status.ok() && !absl_ports::IsDataLoss(index_init_status)) {
return index_init_status;
}
+
+ // Set recovery cause to FEATURE_FLAG_CHANGED according to the calculated
+ // required_derived_files_rebuild
+ if (required_derived_files_rebuild
+ .needs_document_store_derived_files_rebuild) {
+ initialize_stats->set_document_store_recovery_cause(
+ InitializeStatsProto::FEATURE_FLAG_CHANGED);
+ }
+ if (required_derived_files_rebuild
+ .needs_schema_store_derived_files_rebuild) {
+ initialize_stats->set_schema_store_recovery_cause(
+ InitializeStatsProto::FEATURE_FLAG_CHANGED);
+ }
+ if (required_derived_files_rebuild.needs_term_index_rebuild) {
+ initialize_stats->set_index_restoration_cause(
+ InitializeStatsProto::FEATURE_FLAG_CHANGED);
+ }
+ if (required_derived_files_rebuild.needs_integer_index_rebuild) {
+ initialize_stats->set_integer_index_restoration_cause(
+ InitializeStatsProto::FEATURE_FLAG_CHANGED);
+ }
+ if (required_derived_files_rebuild.needs_qualified_id_join_index_rebuild) {
+ initialize_stats->set_qualified_id_join_index_restoration_cause(
+ InitializeStatsProto::FEATURE_FLAG_CHANGED);
+ }
}
if (status.ok()) {
@@ -842,10 +879,9 @@ libtextclassifier3::Status IcingSearchEngine::InitializeIndex(
return absl_ports::InternalError(
absl_ports::StrCat("Could not create directory: ", index_dir));
}
- Index::Options index_options(
- index_dir, options_.index_merge_size(),
- options_.lite_index_sort_at_indexing(), options_.lite_index_sort_size(),
- options_.build_property_existence_metadata_hits());
+ Index::Options index_options(index_dir, options_.index_merge_size(),
+ options_.lite_index_sort_at_indexing(),
+ options_.lite_index_sort_size());
// Term index
InitializeStatsProto::RecoveryCause index_recovery_cause;
@@ -1480,7 +1516,8 @@ DeleteByQueryResultProto IcingSearchEngine::DeleteByQuery(
// Gets unordered results from query processor
auto query_processor_or = QueryProcessor::Create(
index_.get(), integer_index_.get(), language_segmenter_.get(),
- normalizer_.get(), document_store_.get(), schema_store_.get());
+ normalizer_.get(), document_store_.get(), schema_store_.get(),
+ clock_.get());
if (!query_processor_or.ok()) {
TransformStatus(query_processor_or.status(), result_status);
delete_stats->set_parse_query_latency_ms(
@@ -1979,6 +2016,7 @@ SearchResultProto IcingSearchEngine::InternalSearch(
result_status->set_message("IcingSearchEngine has not been initialized!");
return result_proto;
}
+ index_->PublishQueryStats(query_stats);
libtextclassifier3::Status status =
ValidateResultSpec(document_store_.get(), result_spec);
@@ -2183,7 +2221,8 @@ IcingSearchEngine::QueryScoringResults IcingSearchEngine::ProcessQueryAndScore(
// Gets unordered results from query processor
auto query_processor_or = QueryProcessor::Create(
index_.get(), integer_index_.get(), language_segmenter_.get(),
- normalizer_.get(), document_store_.get(), schema_store_.get());
+ normalizer_.get(), document_store_.get(), schema_store_.get(),
+ clock_.get());
if (!query_processor_or.ok()) {
search_stats->set_parse_query_latency_ms(
component_timer->GetElapsedMilliseconds());
@@ -2198,7 +2237,8 @@ IcingSearchEngine::QueryScoringResults IcingSearchEngine::ProcessQueryAndScore(
libtextclassifier3::StatusOr<QueryResults> query_results_or;
if (ranking_strategy_or.ok()) {
query_results_or = query_processor->ParseSearch(
- search_spec, ranking_strategy_or.ValueOrDie(), current_time_ms);
+ search_spec, ranking_strategy_or.ValueOrDie(), current_time_ms,
+ search_stats);
} else {
query_results_or = ranking_strategy_or.status();
}
@@ -2702,7 +2742,12 @@ IcingSearchEngine::TruncateIndicesTo(DocumentId last_stored_document_id) {
qualified_id_join_index_needed_restoration);
}
-libtextclassifier3::Status IcingSearchEngine::DiscardDerivedFiles() {
+libtextclassifier3::Status IcingSearchEngine::DiscardDerivedFiles(
+ const version_util::DerivedFilesRebuildResult& rebuild_result) {
+ if (!rebuild_result.IsRebuildNeeded()) {
+ return libtextclassifier3::Status::OK;
+ }
+
if (schema_store_ != nullptr || document_store_ != nullptr ||
index_ != nullptr || integer_index_ != nullptr ||
qualified_id_join_index_ != nullptr) {
@@ -2711,30 +2756,40 @@ libtextclassifier3::Status IcingSearchEngine::DiscardDerivedFiles() {
}
// Schema store
- ICING_RETURN_IF_ERROR(
- SchemaStore::DiscardDerivedFiles(filesystem_.get(), options_.base_dir()));
+ if (rebuild_result.needs_schema_store_derived_files_rebuild) {
+ ICING_RETURN_IF_ERROR(SchemaStore::DiscardDerivedFiles(
+ filesystem_.get(), options_.base_dir()));
+ }
// Document store
- ICING_RETURN_IF_ERROR(DocumentStore::DiscardDerivedFiles(
- filesystem_.get(), options_.base_dir()));
+ if (rebuild_result.needs_document_store_derived_files_rebuild) {
+ ICING_RETURN_IF_ERROR(DocumentStore::DiscardDerivedFiles(
+ filesystem_.get(), options_.base_dir()));
+ }
// Term index
- if (!filesystem_->DeleteDirectoryRecursively(
- MakeIndexDirectoryPath(options_.base_dir()).c_str())) {
- return absl_ports::InternalError("Failed to discard index");
+ if (rebuild_result.needs_term_index_rebuild) {
+ if (!filesystem_->DeleteDirectoryRecursively(
+ MakeIndexDirectoryPath(options_.base_dir()).c_str())) {
+ return absl_ports::InternalError("Failed to discard index");
+ }
}
// Integer index
- if (!filesystem_->DeleteDirectoryRecursively(
- MakeIntegerIndexWorkingPath(options_.base_dir()).c_str())) {
- return absl_ports::InternalError("Failed to discard integer index");
+ if (rebuild_result.needs_integer_index_rebuild) {
+ if (!filesystem_->DeleteDirectoryRecursively(
+ MakeIntegerIndexWorkingPath(options_.base_dir()).c_str())) {
+ return absl_ports::InternalError("Failed to discard integer index");
+ }
}
// Qualified id join index
- if (!filesystem_->DeleteDirectoryRecursively(
- MakeQualifiedIdJoinIndexWorkingPath(options_.base_dir()).c_str())) {
- return absl_ports::InternalError(
- "Failed to discard qualified id join index");
+ if (rebuild_result.needs_qualified_id_join_index_rebuild) {
+ if (!filesystem_->DeleteDirectoryRecursively(
+ MakeQualifiedIdJoinIndexWorkingPath(options_.base_dir()).c_str())) {
+ return absl_ports::InternalError(
+ "Failed to discard qualified id join index");
+ }
}
return libtextclassifier3::Status::OK;
@@ -2816,7 +2871,8 @@ SuggestionResponse IcingSearchEngine::SearchSuggestions(
// Create the suggestion processor.
auto suggestion_processor_or = SuggestionProcessor::Create(
index_.get(), integer_index_.get(), language_segmenter_.get(),
- normalizer_.get(), document_store_.get(), schema_store_.get());
+ normalizer_.get(), document_store_.get(), schema_store_.get(),
+ clock_.get());
if (!suggestion_processor_or.ok()) {
TransformStatus(suggestion_processor_or.status(), response_status);
return response;
diff --git a/icing/icing-search-engine.h b/icing/icing-search-engine.h
index d316350..b9df95d 100644
--- a/icing/icing-search-engine.h
+++ b/icing/icing-search-engine.h
@@ -17,7 +17,6 @@
#include <cstdint>
#include <memory>
-#include <string>
#include <string_view>
#include <utility>
#include <vector>
@@ -27,6 +26,7 @@
#include "icing/absl_ports/mutex.h"
#include "icing/absl_ports/thread_annotations.h"
#include "icing/file/filesystem.h"
+#include "icing/file/version-util.h"
#include "icing/index/data-indexing-handler.h"
#include "icing/index/index.h"
#include "icing/index/numeric/numeric-index.h"
@@ -51,11 +51,11 @@
#include "icing/result/result-state-manager.h"
#include "icing/schema/schema-store.h"
#include "icing/scoring/scored-document-hit.h"
+#include "icing/store/document-id.h"
#include "icing/store/document-store.h"
#include "icing/tokenization/language-segmenter.h"
#include "icing/transform/normalizer.h"
#include "icing/util/clock.h"
-#include "icing/util/crc32.h"
namespace icing {
namespace lib {
@@ -578,8 +578,8 @@ class IcingSearchEngine {
// read-lock, allowing for parallel non-exclusive operations.
// This implementation is used if search_spec.use_read_only_search is true.
SearchResultProto SearchLockedShared(const SearchSpecProto& search_spec,
- const ScoringSpecProto& scoring_spec,
- const ResultSpecProto& result_spec)
+ const ScoringSpecProto& scoring_spec,
+ const ResultSpecProto& result_spec)
ICING_LOCKS_EXCLUDED(mutex_);
// Implementation of IcingSearchEngine::Search that requires the overall
@@ -587,8 +587,8 @@ class IcingSearchEngine {
// this version is used.
// This implementation is used if search_spec.use_read_only_search is false.
SearchResultProto SearchLockedExclusive(const SearchSpecProto& search_spec,
- const ScoringSpecProto& scoring_spec,
- const ResultSpecProto& result_spec)
+ const ScoringSpecProto& scoring_spec,
+ const ResultSpecProto& result_spec)
ICING_LOCKS_EXCLUDED(mutex_);
// Helper method for the actual work to Search. We need this separate
@@ -641,13 +641,14 @@ class IcingSearchEngine {
libtextclassifier3::Status CheckConsistency()
ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
- // Discards all derived data.
+ // Discards derived data that requires rebuild based on rebuild_result.
//
// Returns:
// OK on success
// FAILED_PRECONDITION_ERROR if those instances are valid (non nullptr)
// INTERNAL_ERROR on any I/O errors
- libtextclassifier3::Status DiscardDerivedFiles()
+ libtextclassifier3::Status DiscardDerivedFiles(
+ const version_util::DerivedFilesRebuildResult& rebuild_result)
ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
// Repopulates derived data off our ground truths.
diff --git a/icing/icing-search-engine_initialization_test.cc b/icing/icing-search-engine_initialization_test.cc
index 122e4af..d6316d4 100644
--- a/icing/icing-search-engine_initialization_test.cc
+++ b/icing/icing-search-engine_initialization_test.cc
@@ -19,6 +19,7 @@
#include <string>
#include <string_view>
#include <tuple>
+#include <unordered_set>
#include <utility>
#include <vector>
@@ -204,7 +205,7 @@ class IcingSearchEngineInitializationTest : public testing::Test {
// Non-zero value so we don't override it to be the current time
constexpr int64_t kDefaultCreationTimestampMs = 1575492852000;
-std::string GetVersionFilename() { return GetTestBaseDir() + "/version"; }
+std::string GetVersionFileDir() { return GetTestBaseDir(); }
std::string GetDocumentDir() { return GetTestBaseDir() + "/document_dir"; }
@@ -5566,12 +5567,26 @@ INSTANTIATE_TEST_SUITE_P(IcingSearchEngineInitializationSwitchJoinIndexTest,
IcingSearchEngineInitializationSwitchJoinIndexTest,
testing::Values(true, false));
+struct IcingSearchEngineInitializationVersionChangeTestParam {
+ version_util::VersionInfo existing_version_info;
+ std::unordered_set<IcingSearchEngineFeatureInfoProto::FlaggedFeatureType>
+ existing_enabled_features;
+
+ explicit IcingSearchEngineInitializationVersionChangeTestParam(
+ version_util::VersionInfo version_info_in,
+ std::unordered_set<IcingSearchEngineFeatureInfoProto::FlaggedFeatureType>
+ existing_enabled_features_in)
+ : existing_version_info(std::move(version_info_in)),
+ existing_enabled_features(std::move(existing_enabled_features_in)) {}
+};
+
class IcingSearchEngineInitializationVersionChangeTest
: public IcingSearchEngineInitializationTest,
- public ::testing::WithParamInterface<version_util::VersionInfo> {};
+ public ::testing::WithParamInterface<
+ IcingSearchEngineInitializationVersionChangeTestParam> {};
TEST_P(IcingSearchEngineInitializationVersionChangeTest,
- RecoverFromVersionChange) {
+ RecoverFromVersionChangeOrUnknownFlagChange) {
// TODO(b/280697513): test backup schema migration
// Test the following scenario: version change. All derived data should be
// rebuilt. We test this by manually adding some invalid derived data and
@@ -5725,10 +5740,27 @@ TEST_P(IcingSearchEngineInitializationVersionChangeTest,
std::move(incorrect_message)));
ICING_ASSERT_OK(index_processor.IndexDocument(tokenized_document, doc_id));
- // Change existing data's version file
- const version_util::VersionInfo& existing_version_info = GetParam();
- ICING_ASSERT_OK(version_util::WriteVersion(
- *filesystem(), GetVersionFilename(), existing_version_info));
+ // Rewrite existing data's version files
+ ICING_ASSERT_OK(
+ version_util::DiscardVersionFiles(*filesystem(), GetVersionFileDir()));
+ const version_util::VersionInfo& existing_version_info =
+ GetParam().existing_version_info;
+ ICING_ASSERT_OK(version_util::WriteV1Version(
+ *filesystem(), GetVersionFileDir(), existing_version_info));
+
+ if (existing_version_info.version >= version_util::kFirstV2Version) {
+ IcingSearchEngineVersionProto version_proto;
+ version_proto.set_version(existing_version_info.version);
+ version_proto.set_max_version(existing_version_info.max_version);
+ auto* enabled_features = version_proto.mutable_enabled_features();
+ for (const auto& feature : GetParam().existing_enabled_features) {
+ enabled_features->Add(version_util::GetFeatureInfoProto(feature));
+ }
+ version_util::WriteV2Version(
+ *filesystem(), GetVersionFileDir(),
+ std::make_unique<IcingSearchEngineVersionProto>(
+ std::move(version_proto)));
+ }
}
// Mock filesystem to observe and check the behavior of all indices.
@@ -5738,28 +5770,48 @@ TEST_P(IcingSearchEngineInitializationVersionChangeTest,
std::make_unique<FakeClock>(), GetTestJniCache());
InitializeResultProto initialize_result = icing.Initialize();
EXPECT_THAT(initialize_result.status(), ProtoIsOk());
- // Index Restoration should be triggered here. Incorrect data should be
- // deleted and correct data of message should be indexed.
+
+ // Derived files restoration should be triggered here. Incorrect data should
+ // be deleted and correct data of message should be indexed.
+ // Here we're recovering from a version change or a flag change that requires
+ // rebuilding all derived files.
+ //
+ // TODO(b/314816301): test individual derived files rebuilds due to change
+ // in trunk stable feature flags.
+ // i.e. Test individual rebuilding for each of:
+ // - document store
+ // - schema store
+ // - term index
+ // - numeric index
+ // - qualified id join index
+ InitializeStatsProto::RecoveryCause expected_recovery_cause =
+ GetParam().existing_version_info.version != version_util::kVersion
+ ? InitializeStatsProto::VERSION_CHANGED
+ : InitializeStatsProto::FEATURE_FLAG_CHANGED;
EXPECT_THAT(
initialize_result.initialize_stats().document_store_recovery_cause(),
- Eq(InitializeStatsProto::VERSION_CHANGED));
+ Eq(expected_recovery_cause));
+ EXPECT_THAT(
+ initialize_result.initialize_stats().schema_store_recovery_cause(),
+ Eq(expected_recovery_cause));
EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
- Eq(InitializeStatsProto::VERSION_CHANGED));
+ Eq(expected_recovery_cause));
EXPECT_THAT(
initialize_result.initialize_stats().integer_index_restoration_cause(),
- Eq(InitializeStatsProto::VERSION_CHANGED));
+ Eq(expected_recovery_cause));
EXPECT_THAT(initialize_result.initialize_stats()
.qualified_id_join_index_restoration_cause(),
- Eq(InitializeStatsProto::VERSION_CHANGED));
+ Eq(expected_recovery_cause));
// Manually check version file
ICING_ASSERT_OK_AND_ASSIGN(
- version_util::VersionInfo version_info_after_init,
- version_util::ReadVersion(*filesystem(), GetVersionFilename(),
+ IcingSearchEngineVersionProto version_proto_after_init,
+ version_util::ReadVersion(*filesystem(), GetVersionFileDir(),
GetIndexDir()));
- EXPECT_THAT(version_info_after_init.version, Eq(version_util::kVersion));
- EXPECT_THAT(version_info_after_init.max_version,
- Eq(std::max(version_util::kVersion, GetParam().max_version)));
+ EXPECT_THAT(version_proto_after_init.version(), Eq(version_util::kVersion));
+ EXPECT_THAT(version_proto_after_init.max_version(),
+ Eq(std::max(version_util::kVersion,
+ GetParam().existing_version_info.max_version)));
SearchResultProto expected_search_result_proto;
expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
@@ -5836,9 +5888,11 @@ INSTANTIATE_TEST_SUITE_P(
testing::Values(
// Manually change existing data set's version to kVersion + 1. When
// initializing, it will detect "rollback".
- version_util::VersionInfo(
- /*version_in=*/version_util::kVersion + 1,
- /*max_version_in=*/version_util::kVersion + 1),
+ IcingSearchEngineInitializationVersionChangeTestParam(
+ version_util::VersionInfo(
+ /*version_in=*/version_util::kVersion + 1,
+ /*max_version_in=*/version_util::kVersion + 1),
+ /*existing_enabled_features_in=*/{}),
// Currently we don't have any "upgrade" that requires rebuild derived
// files, so skip this case until we have a case for it.
@@ -5846,27 +5900,45 @@ INSTANTIATE_TEST_SUITE_P(
// Manually change existing data set's version to kVersion - 1 and
// max_version to kVersion. When initializing, it will detect "roll
// forward".
- version_util::VersionInfo(
- /*version_in=*/version_util::kVersion - 1,
- /*max_version_in=*/version_util::kVersion),
+ IcingSearchEngineInitializationVersionChangeTestParam(
+ version_util::VersionInfo(
+ /*version_in=*/version_util::kVersion - 1,
+ /*max_version_in=*/version_util::kVersion),
+ /*existing_enabled_features_in=*/{}),
// Manually change existing data set's version to 0 and max_version to
// 0. When initializing, it will detect "version 0 upgrade".
//
// Note: in reality, version 0 won't be written into version file, but
// it is ok here since it is hack to simulate version 0 situation.
- version_util::VersionInfo(
- /*version_in=*/0,
- /*max_version_in=*/0),
+ IcingSearchEngineInitializationVersionChangeTestParam(
+ version_util::VersionInfo(
+ /*version_in=*/0,
+ /*max_version_in=*/0),
+ /*existing_enabled_features_in=*/{}),
// Manually change existing data set's version to 0 and max_version to
// kVersion. When initializing, it will detect "version 0 roll forward".
//
// Note: in reality, version 0 won't be written into version file, but
// it is ok here since it is hack to simulate version 0 situation.
- version_util::VersionInfo(
- /*version_in=*/0,
- /*max_version_in=*/version_util::kVersion)));
+ IcingSearchEngineInitializationVersionChangeTestParam(
+ version_util::VersionInfo(
+ /*version_in=*/0,
+ /*max_version_in=*/version_util::kVersion),
+ /*existing_enabled_features_in=*/{}),
+
+ // Manually write an unknown feature in the version proto while keeping
+ // version the same as kVersion.
+ //
+ // Result: this will rebuild all derived files with restoration cause
+ // FEATURE_FLAG_CHANGED
+ IcingSearchEngineInitializationVersionChangeTestParam(
+ version_util::VersionInfo(
+ /*version_in=*/version_util::kVersion,
+ /*max_version_in=*/version_util::kVersion),
+ /*existing_enabled_features_in=*/{
+ IcingSearchEngineFeatureInfoProto::UNKNOWN})));
class IcingSearchEngineInitializationChangePropertyExistenceHitsFlagTest
: public IcingSearchEngineInitializationTest,
@@ -5962,7 +6034,7 @@ TEST_P(IcingSearchEngineInitializationChangePropertyExistenceHitsFlagTest,
ASSERT_THAT(initialize_result.status(), ProtoIsOk());
// Ensure that the term index is rebuilt if the flag is changed.
EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
- Eq(flag_changed ? InitializeStatsProto::IO_ERROR
+ Eq(flag_changed ? InitializeStatsProto::FEATURE_FLAG_CHANGED
: InitializeStatsProto::NONE));
EXPECT_THAT(
initialize_result.initialize_stats().integer_index_restoration_cause(),
diff --git a/icing/icing-search-engine_search_test.cc b/icing/icing-search-engine_search_test.cc
index 21512c6..22f42e7 100644
--- a/icing/icing-search-engine_search_test.cc
+++ b/icing/icing-search-engine_search_test.cc
@@ -4496,6 +4496,11 @@ TEST_P(IcingSearchEngineSearchTest, QueryStatsProtoTest) {
exp_stats.set_document_retrieval_latency_ms(5);
exp_stats.set_lock_acquisition_latency_ms(5);
exp_stats.set_num_joined_results_returned_current_page(0);
+ // document4, document5's hits will remain in the lite index (# of hits: 4).
+ exp_stats.set_lite_index_hit_buffer_byte_size(4 *
+ sizeof(TermIdHitPair::Value));
+ exp_stats.set_lite_index_hit_buffer_unsorted_byte_size(
+ 4 * sizeof(TermIdHitPair::Value));
QueryStatsProto::SearchStats* exp_parent_search_stats =
exp_stats.mutable_parent_search_stats();
@@ -4511,6 +4516,14 @@ TEST_P(IcingSearchEngineSearchTest, QueryStatsProtoTest) {
exp_parent_search_stats->set_num_fetched_hits_lite_index(2);
exp_parent_search_stats->set_num_fetched_hits_main_index(3);
exp_parent_search_stats->set_num_fetched_hits_integer_index(0);
+ if (GetParam() ==
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
+ exp_parent_search_stats->set_query_processor_lexer_extract_token_latency_ms(
+ 5);
+ exp_parent_search_stats
+ ->set_query_processor_parser_consume_query_latency_ms(5);
+ exp_parent_search_stats->set_query_processor_query_visitor_latency_ms(5);
+ }
EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats));
@@ -4769,6 +4782,11 @@ TEST_P(IcingSearchEngineSearchTest, JoinQueryStatsProtoTest) {
exp_stats.set_num_joined_results_returned_current_page(3);
exp_stats.set_join_latency_ms(5);
exp_stats.set_is_join_query(true);
+ // person3, email4's hits will remain in the lite index (# of hits: 5).
+ exp_stats.set_lite_index_hit_buffer_byte_size(5 *
+ sizeof(TermIdHitPair::Value));
+ exp_stats.set_lite_index_hit_buffer_unsorted_byte_size(
+ 5 * sizeof(TermIdHitPair::Value));
QueryStatsProto::SearchStats* exp_parent_search_stats =
exp_stats.mutable_parent_search_stats();
@@ -4784,6 +4802,14 @@ TEST_P(IcingSearchEngineSearchTest, JoinQueryStatsProtoTest) {
exp_parent_search_stats->set_num_fetched_hits_lite_index(1);
exp_parent_search_stats->set_num_fetched_hits_main_index(2);
exp_parent_search_stats->set_num_fetched_hits_integer_index(0);
+ if (GetParam() ==
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
+ exp_parent_search_stats->set_query_processor_lexer_extract_token_latency_ms(
+ 5);
+ exp_parent_search_stats
+ ->set_query_processor_parser_consume_query_latency_ms(5);
+ exp_parent_search_stats->set_query_processor_query_visitor_latency_ms(5);
+ }
QueryStatsProto::SearchStats* exp_child_search_stats =
exp_stats.mutable_child_search_stats();
@@ -4799,6 +4825,14 @@ TEST_P(IcingSearchEngineSearchTest, JoinQueryStatsProtoTest) {
exp_child_search_stats->set_num_fetched_hits_lite_index(1);
exp_child_search_stats->set_num_fetched_hits_main_index(3);
exp_child_search_stats->set_num_fetched_hits_integer_index(0);
+ if (GetParam() ==
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
+ exp_child_search_stats->set_query_processor_lexer_extract_token_latency_ms(
+ 5);
+ exp_child_search_stats->set_query_processor_parser_consume_query_latency_ms(
+ 5);
+ exp_child_search_stats->set_query_processor_query_visitor_latency_ms(5);
+ }
EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats));
@@ -6638,6 +6672,8 @@ TEST_F(IcingSearchEngineSearchTest, NumericFilterQueryStatsProtoTest) {
exp_stats.set_document_retrieval_latency_ms(5);
exp_stats.set_lock_acquisition_latency_ms(5);
exp_stats.set_num_joined_results_returned_current_page(0);
+ exp_stats.set_lite_index_hit_buffer_byte_size(0);
+ exp_stats.set_lite_index_hit_buffer_unsorted_byte_size(0);
QueryStatsProto::SearchStats* exp_parent_search_stats =
exp_stats.mutable_parent_search_stats();
@@ -6656,6 +6692,11 @@ TEST_F(IcingSearchEngineSearchTest, NumericFilterQueryStatsProtoTest) {
// Since we will inspect 1 bucket from "price" in integer index and it
// contains 3 hits, we will fetch 3 hits (but filter out one of them).
exp_parent_search_stats->set_num_fetched_hits_integer_index(3);
+ exp_parent_search_stats->set_query_processor_lexer_extract_token_latency_ms(
+ 5);
+ exp_parent_search_stats->set_query_processor_parser_consume_query_latency_ms(
+ 5);
+ exp_parent_search_stats->set_query_processor_query_visitor_latency_ms(5);
EXPECT_THAT(results.query_stats(), EqualsProto(exp_stats));
}
diff --git a/icing/index/index.cc b/icing/index/index.cc
index 98058be..31dcc7e 100644
--- a/icing/index/index.cc
+++ b/icing/index/index.cc
@@ -65,10 +65,10 @@ libtextclassifier3::StatusOr<LiteIndex::Options> CreateLiteIndexOptions(
"Requested hit buffer size %d is too large.",
options.index_merge_size));
}
- return LiteIndex::Options(
- options.base_dir + "/idx/lite.", options.index_merge_size,
- options.lite_index_sort_at_indexing, options.lite_index_sort_size,
- options.include_property_existence_metadata_hits);
+ return LiteIndex::Options(options.base_dir + "/idx/lite.",
+ options.index_merge_size,
+ options.lite_index_sort_at_indexing,
+ options.lite_index_sort_size);
}
std::string MakeMainIndexFilepath(const std::string& base_dir) {
diff --git a/icing/index/index.h b/icing/index/index.h
index a5d75c4..a09e28f 100644
--- a/icing/index/index.h
+++ b/icing/index/index.h
@@ -35,6 +35,7 @@
#include "icing/index/term-metadata.h"
#include "icing/legacy/index/icing-filesystem.h"
#include "icing/proto/debug.pb.h"
+#include "icing/proto/logging.pb.h"
#include "icing/proto/scoring.pb.h"
#include "icing/proto/storage.pb.h"
#include "icing/proto/term.pb.h"
@@ -72,20 +73,16 @@ class Index {
struct Options {
explicit Options(const std::string& base_dir, uint32_t index_merge_size,
bool lite_index_sort_at_indexing,
- uint32_t lite_index_sort_size,
- bool include_property_existence_metadata_hits = false)
+ uint32_t lite_index_sort_size)
: base_dir(base_dir),
index_merge_size(index_merge_size),
lite_index_sort_at_indexing(lite_index_sort_at_indexing),
- lite_index_sort_size(lite_index_sort_size),
- include_property_existence_metadata_hits(
- include_property_existence_metadata_hits) {}
+ lite_index_sort_size(lite_index_sort_size) {}
std::string base_dir;
int32_t index_merge_size;
bool lite_index_sort_at_indexing;
int32_t lite_index_sort_size;
- bool include_property_existence_metadata_hits;
};
// Creates an instance of Index in the directory pointed by file_dir.
@@ -178,6 +175,13 @@ class Index {
return debug_info;
}
+ void PublishQueryStats(QueryStatsProto* query_stats) const {
+ query_stats->set_lite_index_hit_buffer_byte_size(
+ lite_index_->GetHitBufferByteSize());
+ query_stats->set_lite_index_hit_buffer_unsorted_byte_size(
+ lite_index_->GetHitBufferUnsortedByteSize());
+ }
+
// Returns the byte size of the all the elements held in the index. This
// excludes the size of any internal metadata of the index, e.g. the index's
// header.
@@ -301,9 +305,7 @@ class Index {
}
// Sorts the LiteIndex HitBuffer.
- void SortLiteIndex() {
- lite_index_->SortHits();
- }
+ void SortLiteIndex() { lite_index_->SortHits(); }
// Reduces internal file sizes by reclaiming space of deleted documents.
// new_last_added_document_id will be used to update the last added document
diff --git a/icing/index/index_test.cc b/icing/index/index_test.cc
index 04a6bb7..ab550e1 100644
--- a/icing/index/index_test.cc
+++ b/icing/index/index_test.cc
@@ -33,9 +33,11 @@
#include "icing/file/filesystem.h"
#include "icing/index/hit/doc-hit-info.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/index/lite/term-id-hit-pair.h"
#include "icing/legacy/index/icing-filesystem.h"
#include "icing/legacy/index/icing-mock-filesystem.h"
#include "icing/proto/debug.pb.h"
+#include "icing/proto/logging.pb.h"
#include "icing/proto/storage.pb.h"
#include "icing/proto/term.pb.h"
#include "icing/schema/section.h"
@@ -2739,6 +2741,41 @@ TEST_F(IndexTest, IndexStorageInfoProto) {
EXPECT_THAT(storage_info.min_free_fraction(), Ge(0));
}
+TEST_F(IndexTest, PublishQueryStats) {
+ // Add two documents to the lite index without merging.
+ Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
+ TermMatchType::PREFIX, /*namespace_id=*/0);
+ ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+ edit = index_->Edit(kDocumentId1, kSectionId2, TermMatchType::PREFIX,
+ /*namespace_id=*/0);
+ ASSERT_THAT(edit.BufferTerm("foul"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+ // Verify query stats.
+ QueryStatsProto query_stats1;
+ index_->PublishQueryStats(&query_stats1);
+ EXPECT_THAT(query_stats1.lite_index_hit_buffer_byte_size(),
+ Eq(2 * sizeof(TermIdHitPair::Value)));
+ EXPECT_THAT(query_stats1.lite_index_hit_buffer_unsorted_byte_size(),
+ Ge(2 * sizeof(TermIdHitPair::Value)));
+
+ // Sort lite index.
+ index_->SortLiteIndex();
+ QueryStatsProto query_stats2;
+ index_->PublishQueryStats(&query_stats2);
+ EXPECT_THAT(query_stats2.lite_index_hit_buffer_byte_size(),
+ Eq(2 * sizeof(TermIdHitPair::Value)));
+ EXPECT_THAT(query_stats2.lite_index_hit_buffer_unsorted_byte_size(), Eq(0));
+
+ // Merge lite index to main index.
+ ICING_ASSERT_OK(index_->Merge());
+ QueryStatsProto query_stats3;
+ index_->PublishQueryStats(&query_stats3);
+ EXPECT_THAT(query_stats3.lite_index_hit_buffer_byte_size(), Eq(0));
+ EXPECT_THAT(query_stats3.lite_index_hit_buffer_unsorted_byte_size(), Eq(0));
+}
+
} // namespace
} // namespace lib
diff --git a/icing/index/lite/lite-index-header.h b/icing/index/lite/lite-index-header.h
index 75de8fa..6208cb6 100644
--- a/icing/index/lite/lite-index-header.h
+++ b/icing/index/lite/lite-index-header.h
@@ -53,14 +53,7 @@ class LiteIndex_Header {
class LiteIndex_HeaderImpl : public LiteIndex_Header {
public:
struct HeaderData {
- static uint32_t GetCurrentMagic(
- bool include_property_existence_metadata_hits) {
- if (!include_property_existence_metadata_hits) {
- return 0x01c61418;
- } else {
- return 0x56e07d5b;
- }
- }
+ static const uint32_t kMagic = 0x01c61418;
uint32_t lite_index_crc;
uint32_t magic;
@@ -76,15 +69,10 @@ class LiteIndex_HeaderImpl : public LiteIndex_Header {
uint32_t searchable_end;
};
- explicit LiteIndex_HeaderImpl(HeaderData *hdr,
- bool include_property_existence_metadata_hits)
- : hdr_(hdr),
- include_property_existence_metadata_hits_(
- include_property_existence_metadata_hits) {}
+ explicit LiteIndex_HeaderImpl(HeaderData *hdr) : hdr_(hdr) {}
bool check_magic() const override {
- return hdr_->magic == HeaderData::GetCurrentMagic(
- include_property_existence_metadata_hits_);
+ return hdr_->magic == HeaderData::kMagic;
}
uint32_t lite_index_crc() const override { return hdr_->lite_index_crc; }
@@ -111,8 +99,7 @@ class LiteIndex_HeaderImpl : public LiteIndex_Header {
void Reset() override {
hdr_->lite_index_crc = 0;
- hdr_->magic =
- HeaderData::GetCurrentMagic(include_property_existence_metadata_hits_);
+ hdr_->magic = HeaderData::kMagic;
hdr_->last_added_docid = kInvalidDocumentId;
hdr_->cur_size = 0;
hdr_->searchable_end = 0;
@@ -120,7 +107,6 @@ class LiteIndex_HeaderImpl : public LiteIndex_Header {
private:
HeaderData *hdr_;
- bool include_property_existence_metadata_hits_;
};
static_assert(24 == sizeof(LiteIndex_HeaderImpl::HeaderData),
"sizeof(HeaderData) != 24");
diff --git a/icing/index/lite/lite-index-options.cc b/icing/index/lite/lite-index-options.cc
index 7e6c076..b4810ea 100644
--- a/icing/index/lite/lite-index-options.cc
+++ b/icing/index/lite/lite-index-options.cc
@@ -69,16 +69,14 @@ IcingDynamicTrie::Options CalculateTrieOptions(uint32_t hit_buffer_size) {
} // namespace
-LiteIndexOptions::LiteIndexOptions(
- const std::string& filename_base, uint32_t hit_buffer_want_merge_bytes,
- bool hit_buffer_sort_at_indexing, uint32_t hit_buffer_sort_threshold_bytes,
- bool include_property_existence_metadata_hits)
+LiteIndexOptions::LiteIndexOptions(const std::string& filename_base,
+ uint32_t hit_buffer_want_merge_bytes,
+ bool hit_buffer_sort_at_indexing,
+ uint32_t hit_buffer_sort_threshold_bytes)
: filename_base(filename_base),
hit_buffer_want_merge_bytes(hit_buffer_want_merge_bytes),
hit_buffer_sort_at_indexing(hit_buffer_sort_at_indexing),
- hit_buffer_sort_threshold_bytes(hit_buffer_sort_threshold_bytes),
- include_property_existence_metadata_hits(
- include_property_existence_metadata_hits) {
+ hit_buffer_sort_threshold_bytes(hit_buffer_sort_threshold_bytes) {
hit_buffer_size = CalculateHitBufferSize(hit_buffer_want_merge_bytes);
lexicon_options = CalculateTrieOptions(hit_buffer_size);
display_mappings_options = CalculateTrieOptions(hit_buffer_size);
diff --git a/icing/index/lite/lite-index-options.h b/icing/index/lite/lite-index-options.h
index 8b03449..5eae5c7 100644
--- a/icing/index/lite/lite-index-options.h
+++ b/icing/index/lite/lite-index-options.h
@@ -32,8 +32,7 @@ struct LiteIndexOptions {
LiteIndexOptions(const std::string& filename_base,
uint32_t hit_buffer_want_merge_bytes,
bool hit_buffer_sort_at_indexing,
- uint32_t hit_buffer_sort_threshold_bytes,
- bool include_property_existence_metadata_hits = false);
+ uint32_t hit_buffer_sort_threshold_bytes);
IcingDynamicTrie::Options lexicon_options;
IcingDynamicTrie::Options display_mappings_options;
@@ -43,7 +42,6 @@ struct LiteIndexOptions {
uint32_t hit_buffer_size = 0;
bool hit_buffer_sort_at_indexing = false;
uint32_t hit_buffer_sort_threshold_bytes = 0;
- bool include_property_existence_metadata_hits = false;
};
} // namespace lib
diff --git a/icing/index/lite/lite-index.cc b/icing/index/lite/lite-index.cc
index 3f9cc93..4c5cb37 100644
--- a/icing/index/lite/lite-index.cc
+++ b/icing/index/lite/lite-index.cc
@@ -168,8 +168,7 @@ libtextclassifier3::Status LiteIndex::Initialize() {
header_mmap_.Remap(hit_buffer_fd_.get(), kHeaderFileOffset, header_size());
header_ = std::make_unique<LiteIndex_HeaderImpl>(
reinterpret_cast<LiteIndex_HeaderImpl::HeaderData*>(
- header_mmap_.address()),
- options_.include_property_existence_metadata_hits);
+ header_mmap_.address()));
header_->Reset();
if (!hit_buffer_.Init(hit_buffer_fd_.get(), header_padded_size, true,
@@ -184,8 +183,7 @@ libtextclassifier3::Status LiteIndex::Initialize() {
header_mmap_.Remap(hit_buffer_fd_.get(), kHeaderFileOffset, header_size());
header_ = std::make_unique<LiteIndex_HeaderImpl>(
reinterpret_cast<LiteIndex_HeaderImpl::HeaderData*>(
- header_mmap_.address()),
- options_.include_property_existence_metadata_hits);
+ header_mmap_.address()));
if (!hit_buffer_.Init(hit_buffer_fd_.get(), header_padded_size, true,
sizeof(TermIdHitPair::Value), header_->cur_size(),
@@ -499,7 +497,7 @@ int LiteIndex::FetchHits(
// When disabled, the entire HitBuffer should be sorted already and only
// binary search is needed.
if (options_.hit_buffer_sort_at_indexing) {
- uint32_t unsorted_length = header_->cur_size() - header_->searchable_end();
+ uint32_t unsorted_length = GetHitBufferUnsortedSizeImpl();
for (uint32_t i = 1; i <= unsorted_length; ++i) {
TermIdHitPair term_id_hit_pair = array[header_->cur_size() - i];
if (term_id_hit_pair.term_id() == term_id) {
@@ -607,13 +605,13 @@ IndexStorageInfoProto LiteIndex::GetStorageInfo(
void LiteIndex::SortHitsImpl() {
// Make searchable by sorting by hit buffer.
- uint32_t sort_len = header_->cur_size() - header_->searchable_end();
- if (sort_len <= 0) {
+ uint32_t need_sort_len = GetHitBufferUnsortedSizeImpl();
+ if (need_sort_len <= 0) {
return;
}
IcingTimer timer;
- auto* array_start =
+ TermIdHitPair::Value* array_start =
hit_buffer_.GetMutableMem<TermIdHitPair::Value>(0, header_->cur_size());
TermIdHitPair::Value* sort_start = array_start + header_->searchable_end();
std::sort(sort_start, array_start + header_->cur_size());
@@ -625,7 +623,7 @@ void LiteIndex::SortHitsImpl() {
std::inplace_merge(array_start, array_start + header_->searchable_end(),
array_start + header_->cur_size());
}
- ICING_VLOG(2) << "Lite index sort and merge " << sort_len << " into "
+ ICING_VLOG(2) << "Lite index sort and merge " << need_sort_len << " into "
<< header_->searchable_end() << " in " << timer.Elapsed() * 1000
<< "ms";
diff --git a/icing/index/lite/lite-index.h b/icing/index/lite/lite-index.h
index 288602a..45dc280 100644
--- a/icing/index/lite/lite-index.h
+++ b/icing/index/lite/lite-index.h
@@ -308,6 +308,24 @@ class LiteIndex {
IndexStorageInfoProto GetStorageInfo(IndexStorageInfoProto storage_info) const
ICING_LOCKS_EXCLUDED(mutex_);
+ // Returns the size of unsorted part of hit_buffer_.
+ uint32_t GetHitBufferByteSize() const ICING_LOCKS_EXCLUDED(mutex_) {
+ absl_ports::shared_lock l(&mutex_);
+ return size_impl() * sizeof(TermIdHitPair::Value);
+ }
+
+ // Returns the size of unsorted part of hit_buffer_.
+ uint32_t GetHitBufferUnsortedSize() const ICING_LOCKS_EXCLUDED(mutex_) {
+ absl_ports::shared_lock l(&mutex_);
+ return GetHitBufferUnsortedSizeImpl();
+ }
+
+ // Returns the byte size of unsorted part of hit_buffer_.
+ uint64_t GetHitBufferUnsortedByteSize() const ICING_LOCKS_EXCLUDED(mutex_) {
+ absl_ports::shared_lock l(&mutex_);
+ return GetHitBufferUnsortedSizeImpl() * sizeof(TermIdHitPair::Value);
+ }
+
// Reduces internal file sizes by reclaiming space of deleted documents.
//
// This method also sets the last_added_docid of the index to
@@ -377,13 +395,13 @@ class LiteIndex {
bool NeedSortAtQuerying() const ICING_SHARED_LOCKS_REQUIRED(mutex_) {
return HasUnsortedHitsExceedingSortThresholdImpl() ||
(!options_.hit_buffer_sort_at_indexing &&
- header_->cur_size() - header_->searchable_end() > 0);
+ GetHitBufferUnsortedSizeImpl() > 0);
}
// Non-locking implementation for HasUnsortedHitsExceedingSortThresholdImpl().
bool HasUnsortedHitsExceedingSortThresholdImpl() const
ICING_SHARED_LOCKS_REQUIRED(mutex_) {
- return header_->cur_size() - header_->searchable_end() >=
+ return GetHitBufferUnsortedSizeImpl() >=
(options_.hit_buffer_sort_threshold_bytes /
sizeof(TermIdHitPair::Value));
}
@@ -408,6 +426,12 @@ class LiteIndex {
std::vector<Hit::TermFrequencyArray>* term_frequency_out) const
ICING_SHARED_LOCKS_REQUIRED(mutex_);
+ // Returns the size of unsorted part of hit_buffer_.
+ uint32_t GetHitBufferUnsortedSizeImpl() const
+ ICING_SHARED_LOCKS_REQUIRED(mutex_) {
+ return header_->cur_size() - header_->searchable_end();
+ }
+
// File descriptor that points to where the header and hit buffer are written
// to.
ScopedFd hit_buffer_fd_ ICING_GUARDED_BY(mutex_);
diff --git a/icing/index/lite/lite-index_test.cc b/icing/index/lite/lite-index_test.cc
index 9811fa2..7ff006e 100644
--- a/icing/index/lite/lite-index_test.cc
+++ b/icing/index/lite/lite-index_test.cc
@@ -28,6 +28,7 @@
#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/index/lite/doc-hit-info-iterator-term-lite.h"
#include "icing/index/lite/lite-index-header.h"
+#include "icing/index/lite/term-id-hit-pair.h"
#include "icing/index/term-id-codec.h"
#include "icing/legacy/index/icing-dynamic-trie.h"
#include "icing/legacy/index/icing-filesystem.h"
@@ -49,6 +50,7 @@ using ::testing::Eq;
using ::testing::IsEmpty;
using ::testing::IsFalse;
using ::testing::IsTrue;
+using ::testing::Pointee;
using ::testing::SizeIs;
class LiteIndexTest : public testing::Test {
@@ -113,18 +115,12 @@ TEST_F(LiteIndexTest,
ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, bar_hit0));
ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, bar_hit1));
+ // Check the total size and unsorted size of the hit buffer.
+ EXPECT_THAT(lite_index, Pointee(SizeIs(4)));
+ EXPECT_THAT(lite_index->GetHitBufferUnsortedSize(), Eq(4));
// Check that unsorted hits does not exceed the sort threshold.
EXPECT_THAT(lite_index->HasUnsortedHitsExceedingSortThreshold(), IsFalse());
- // Check that hits are unsorted. Persist the data and pread from
- // LiteIndexHeader.
- ASSERT_THAT(lite_index->PersistToDisk(), IsOk());
- LiteIndex_HeaderImpl::HeaderData header_data;
- ASSERT_TRUE(filesystem_.PRead((lite_index_file_name + "hb").c_str(),
- &header_data, sizeof(header_data),
- LiteIndex::kHeaderFileOffset));
- EXPECT_THAT(header_data.cur_size - header_data.searchable_end, Eq(4));
-
// Query the LiteIndex
std::vector<DocHitInfo> hits1;
lite_index->FetchHits(
@@ -148,13 +144,10 @@ TEST_F(LiteIndexTest,
// checker.
EXPECT_THAT(hits2, IsEmpty());
- // Check that hits are sorted after querying LiteIndex. Persist the data and
- // pread from LiteIndexHeader.
- ASSERT_THAT(lite_index->PersistToDisk(), IsOk());
- ASSERT_TRUE(filesystem_.PRead((lite_index_file_name + "hb").c_str(),
- &header_data, sizeof(header_data),
- LiteIndex::kHeaderFileOffset));
- EXPECT_THAT(header_data.cur_size - header_data.searchable_end, Eq(0));
+ // Check the total size and unsorted size of the hit buffer. Hits should be
+ // sorted after querying LiteIndex.
+ EXPECT_THAT(lite_index, Pointee(SizeIs(4)));
+ EXPECT_THAT(lite_index->GetHitBufferUnsortedSize(), Eq(0));
}
TEST_F(LiteIndexTest,
@@ -203,18 +196,12 @@ TEST_F(LiteIndexTest,
ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, bar_hit0));
ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, bar_hit1));
+ // Check the total size and unsorted size of the hit buffer.
+ EXPECT_THAT(lite_index, Pointee(SizeIs(4)));
+ EXPECT_THAT(lite_index->GetHitBufferUnsortedSize(), Eq(4));
// Check that unsorted hits does not exceed the sort threshold.
EXPECT_THAT(lite_index->HasUnsortedHitsExceedingSortThreshold(), IsFalse());
- // Check that hits are unsorted. Persist the data and pread from
- // LiteIndexHeader.
- ASSERT_THAT(lite_index->PersistToDisk(), IsOk());
- LiteIndex_HeaderImpl::HeaderData header_data;
- ASSERT_TRUE(filesystem_.PRead((lite_index_file_name + "hb").c_str(),
- &header_data, sizeof(header_data),
- LiteIndex::kHeaderFileOffset));
- EXPECT_THAT(header_data.cur_size - header_data.searchable_end, Eq(4));
-
// Query the LiteIndex
std::vector<DocHitInfo> hits1;
lite_index->FetchHits(
@@ -238,15 +225,11 @@ TEST_F(LiteIndexTest,
// checker.
EXPECT_THAT(hits2, IsEmpty());
- // Check that hits are still unsorted after querying LiteIndex because the
- // HitBuffer unsorted size is still below the sort threshold, and we've
- // enabled sort_at_indexing.
- // Persist the data and performing a pread on LiteIndexHeader.
- ASSERT_THAT(lite_index->PersistToDisk(), IsOk());
- ASSERT_TRUE(filesystem_.PRead((lite_index_file_name + "hb").c_str(),
- &header_data, sizeof(header_data),
- LiteIndex::kHeaderFileOffset));
- EXPECT_THAT(header_data.cur_size - header_data.searchable_end, Eq(4));
+ // Check the total size and unsorted size of the hit buffer. Hits should be
+ // still unsorted after querying LiteIndex because the HitBuffer unsorted size
+ // is still below the sort threshold, and we've enabled sort_at_indexing.
+ EXPECT_THAT(lite_index, Pointee(SizeIs(4)));
+ EXPECT_THAT(lite_index->GetHitBufferUnsortedSize(), Eq(4));
}
TEST_F(
@@ -348,7 +331,10 @@ TEST_F(
ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc2_hit3));
ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc3_hit0));
ICING_ASSERT_OK(lite_index->AddHit(baz_term_id, doc3_hit1));
- // Verify that the HitBuffer has not been sorted.
+ // Check the total size and unsorted size of the hit buffer. The HitBuffer has
+ // not been sorted.
+ EXPECT_THAT(lite_index, Pointee(SizeIs(14)));
+ EXPECT_THAT(lite_index->GetHitBufferUnsortedSize(), Eq(14));
EXPECT_THAT(lite_index->HasUnsortedHitsExceedingSortThreshold(), IsTrue());
// We now have the following in the hit buffer:
@@ -400,7 +386,10 @@ TEST_F(
EXPECT_THAT(hits3[1].document_id(), Eq(0));
EXPECT_THAT(hits3[1].hit_section_ids_mask(), Eq(0b1));
- // Check that the HitBuffer is sorted after the query call.
+ // Check the total size and unsorted size of the hit buffer. The HitBuffer
+ // should be sorted after the query call.
+ EXPECT_THAT(lite_index, Pointee(SizeIs(14)));
+ EXPECT_THAT(lite_index->GetHitBufferUnsortedSize(), Eq(0));
EXPECT_THAT(lite_index->HasUnsortedHitsExceedingSortThreshold(), IsFalse());
}
@@ -511,13 +500,11 @@ TEST_F(
// AddHit() itself, we need to invoke SortHits() manually.
EXPECT_THAT(lite_index->HasUnsortedHitsExceedingSortThreshold(), IsTrue());
lite_index->SortHits();
- // Check that the HitBuffer is sorted.
- ASSERT_THAT(lite_index->PersistToDisk(), IsOk());
- LiteIndex_HeaderImpl::HeaderData header_data;
- ASSERT_TRUE(filesystem_.PRead((lite_index_file_name + "hb").c_str(),
- &header_data, sizeof(header_data),
- LiteIndex::kHeaderFileOffset));
- EXPECT_THAT(header_data.cur_size - header_data.searchable_end, Eq(0));
+ // Check the total size and unsorted size of the hit buffer. The HitBuffer
+ // should be sorted after calling SortHits().
+ EXPECT_THAT(lite_index, Pointee(SizeIs(8)));
+ EXPECT_THAT(lite_index->GetHitBufferUnsortedSize(), Eq(0));
+ EXPECT_THAT(lite_index->HasUnsortedHitsExceedingSortThreshold(), IsFalse());
// Add 12 more hits so that sort threshold is exceeded again.
ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc2_hit0));
@@ -536,6 +523,8 @@ TEST_F(
// Adding these hits exceeds the sort threshold. However when sort_at_indexing
// is enabled, sorting is done in the string-section-indexing-handler rather
// than AddHit() itself.
+ EXPECT_THAT(lite_index, Pointee(SizeIs(20)));
+ EXPECT_THAT(lite_index->GetHitBufferUnsortedSize(), Eq(12));
EXPECT_THAT(lite_index->HasUnsortedHitsExceedingSortThreshold(), IsTrue());
// We now have the following in the hit buffer:
@@ -589,15 +578,13 @@ TEST_F(
EXPECT_THAT(hits3[1].document_id(), Eq(0));
EXPECT_THAT(hits3[1].hit_section_ids_mask(), Eq(0b1));
- // Check that the HitBuffer is sorted after the query call. FetchHits should
- // sort before performing binary search if the HitBuffer unsorted size exceeds
- // the sort threshold. Regardless of the sort_at_indexing config.
+ // Check the total size and unsorted size of the hit buffer. FetchHits should
+ // sort before performing search if the HitBuffer unsorted size exceeds the
+ // sort threshold, regardless of the sort_at_indexing config (to avoid
+ // sequential search on an extremely long unsorted tails).
+ EXPECT_THAT(lite_index, Pointee(SizeIs(20)));
+ EXPECT_THAT(lite_index->GetHitBufferUnsortedSize(), Eq(0));
EXPECT_THAT(lite_index->HasUnsortedHitsExceedingSortThreshold(), IsFalse());
- ASSERT_THAT(lite_index->PersistToDisk(), IsOk());
- ASSERT_TRUE(filesystem_.PRead((lite_index_file_name + "hb").c_str(),
- &header_data, sizeof(header_data),
- LiteIndex::kHeaderFileOffset));
- EXPECT_THAT(header_data.cur_size - header_data.searchable_end, Eq(0));
}
TEST_F(LiteIndexTest, LiteIndexIterator) {
@@ -736,6 +723,53 @@ TEST_F(LiteIndexTest, LiteIndexIterator_sortAtIndexingDisabled) {
term, expected_section_ids_tf_map0)));
}
+TEST_F(LiteIndexTest, LiteIndexHitBufferSize) {
+ // Set up LiteIndex and TermIdCodec
+ std::string lite_index_file_name = index_dir_ + "/test_file.lite-idx.index";
+ // At 64 bytes the unsorted tail can contain a max of 8 TermHitPairs.
+ LiteIndex::Options options(lite_index_file_name,
+ /*hit_buffer_want_merge_bytes=*/1024 * 1024,
+ /*hit_buffer_sort_at_indexing=*/true,
+ /*hit_buffer_sort_threshold_bytes=*/64);
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LiteIndex> lite_index,
+ LiteIndex::Create(options, &icing_filesystem_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ term_id_codec_,
+ TermIdCodec::Create(
+ IcingDynamicTrie::max_value_index(IcingDynamicTrie::Options()),
+ IcingDynamicTrie::max_value_index(options.lexicon_options)));
+
+ const std::string term = "foo";
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t tvi,
+ lite_index->InsertTerm(term, TermMatchType::PREFIX, kNamespace0));
+ ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
+ term_id_codec_->EncodeTvi(tvi, TviType::LITE));
+ Hit hit0(/*section_id=*/0, /*document_id=*/0, /*term_frequency=*/3,
+ /*is_in_prefix_section=*/false);
+ Hit hit1(/*section_id=*/1, /*document_id=*/0, /*term_frequency=*/5,
+ /*is_in_prefix_section=*/false);
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, hit0));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, hit1));
+
+ // Check the total size and byte size of the hit buffer.
+ EXPECT_THAT(lite_index, Pointee(SizeIs(2)));
+ EXPECT_THAT(lite_index->GetHitBufferByteSize(),
+ Eq(2 * sizeof(TermIdHitPair::Value)));
+ // Check the unsorted size and byte size of the hit buffer.
+ EXPECT_THAT(lite_index->GetHitBufferUnsortedSize(), Eq(2));
+ EXPECT_THAT(lite_index->GetHitBufferUnsortedByteSize(),
+ Eq(2 * sizeof(TermIdHitPair::Value)));
+
+ // Sort the hit buffer and check again.
+ lite_index->SortHits();
+ EXPECT_THAT(lite_index, Pointee(SizeIs(2)));
+ EXPECT_THAT(lite_index->GetHitBufferByteSize(),
+ Eq(2 * sizeof(TermIdHitPair::Value)));
+ EXPECT_THAT(lite_index->GetHitBufferUnsortedSize(), Eq(0));
+ EXPECT_THAT(lite_index->GetHitBufferUnsortedByteSize(), Eq(0));
+}
+
} // namespace
} // namespace lib
} // namespace icing
diff --git a/icing/query/query-processor.cc b/icing/query/query-processor.cc
index bbfbf3c..74e51cf 100644
--- a/icing/query/query-processor.cc
+++ b/icing/query/query-processor.cc
@@ -14,6 +14,7 @@
#include "icing/query/query-processor.h"
+#include <cstdint>
#include <deque>
#include <memory>
#include <stack>
@@ -34,13 +35,14 @@
#include "icing/index/iterator/doc-hit-info-iterator-or.h"
#include "icing/index/iterator/doc-hit-info-iterator-section-restrict.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/index/numeric/numeric-index.h"
+#include "icing/proto/logging.pb.h"
#include "icing/proto/search.pb.h"
#include "icing/query/advanced_query_parser/abstract-syntax-tree.h"
#include "icing/query/advanced_query_parser/lexer.h"
#include "icing/query/advanced_query_parser/parser.h"
#include "icing/query/advanced_query_parser/query-visitor.h"
#include "icing/query/query-features.h"
-#include "icing/query/query-processor.h"
#include "icing/query/query-results.h"
#include "icing/query/query-terms.h"
#include "icing/query/query-utils.h"
@@ -49,11 +51,11 @@
#include "icing/store/document-id.h"
#include "icing/store/document-store.h"
#include "icing/tokenization/language-segmenter.h"
-#include "icing/tokenization/raw-query-tokenizer.h"
#include "icing/tokenization/token.h"
#include "icing/tokenization/tokenizer-factory.h"
#include "icing/tokenization/tokenizer.h"
#include "icing/transform/normalizer.h"
+#include "icing/util/clock.h"
#include "icing/util/status-macros.h"
namespace icing {
@@ -112,17 +114,18 @@ QueryProcessor::Create(Index* index, const NumericIndex<int64_t>* numeric_index,
const LanguageSegmenter* language_segmenter,
const Normalizer* normalizer,
const DocumentStore* document_store,
- const SchemaStore* schema_store) {
+ const SchemaStore* schema_store, const Clock* clock) {
ICING_RETURN_ERROR_IF_NULL(index);
ICING_RETURN_ERROR_IF_NULL(numeric_index);
ICING_RETURN_ERROR_IF_NULL(language_segmenter);
ICING_RETURN_ERROR_IF_NULL(normalizer);
ICING_RETURN_ERROR_IF_NULL(document_store);
ICING_RETURN_ERROR_IF_NULL(schema_store);
+ ICING_RETURN_ERROR_IF_NULL(clock);
return std::unique_ptr<QueryProcessor>(
new QueryProcessor(index, numeric_index, language_segmenter, normalizer,
- document_store, schema_store));
+ document_store, schema_store, clock));
}
QueryProcessor::QueryProcessor(Index* index,
@@ -130,18 +133,20 @@ QueryProcessor::QueryProcessor(Index* index,
const LanguageSegmenter* language_segmenter,
const Normalizer* normalizer,
const DocumentStore* document_store,
- const SchemaStore* schema_store)
+ const SchemaStore* schema_store,
+ const Clock* clock)
: index_(*index),
numeric_index_(*numeric_index),
language_segmenter_(*language_segmenter),
normalizer_(*normalizer),
document_store_(*document_store),
- schema_store_(*schema_store) {}
+ schema_store_(*schema_store),
+ clock_(*clock) {}
libtextclassifier3::StatusOr<QueryResults> QueryProcessor::ParseSearch(
const SearchSpecProto& search_spec,
ScoringSpecProto::RankingStrategy::Code ranking_strategy,
- int64_t current_time_ms) {
+ int64_t current_time_ms, QueryStatsProto::SearchStats* search_stats) {
if (search_spec.search_type() == SearchSpecProto::SearchType::UNDEFINED) {
return absl_ports::InvalidArgumentError(absl_ports::StrCat(
"Search type ",
@@ -152,9 +157,9 @@ libtextclassifier3::StatusOr<QueryResults> QueryProcessor::ParseSearch(
if (search_spec.search_type() ==
SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
ICING_VLOG(1) << "Using EXPERIMENTAL_ICING_ADVANCED_QUERY parser!";
- ICING_ASSIGN_OR_RETURN(
- results,
- ParseAdvancedQuery(search_spec, ranking_strategy, current_time_ms));
+ ICING_ASSIGN_OR_RETURN(results,
+ ParseAdvancedQuery(search_spec, ranking_strategy,
+ current_time_ms, search_stats));
} else {
ICING_ASSIGN_OR_RETURN(
results, ParseRawQuery(search_spec, ranking_strategy, current_time_ms));
@@ -188,17 +193,27 @@ libtextclassifier3::StatusOr<QueryResults> QueryProcessor::ParseSearch(
libtextclassifier3::StatusOr<QueryResults> QueryProcessor::ParseAdvancedQuery(
const SearchSpecProto& search_spec,
ScoringSpecProto::RankingStrategy::Code ranking_strategy,
- int64_t current_time_ms) const {
- QueryResults results;
+ int64_t current_time_ms, QueryStatsProto::SearchStats* search_stats) const {
+ std::unique_ptr<Timer> lexer_timer = clock_.GetNewTimer();
Lexer lexer(search_spec.query(), Lexer::Language::QUERY);
ICING_ASSIGN_OR_RETURN(std::vector<Lexer::LexerToken> lexer_tokens,
lexer.ExtractTokens());
+ if (search_stats != nullptr) {
+ search_stats->set_query_processor_lexer_extract_token_latency_ms(
+ lexer_timer->GetElapsedMilliseconds());
+ }
+ std::unique_ptr<Timer> parser_timer = clock_.GetNewTimer();
Parser parser = Parser::Create(std::move(lexer_tokens));
ICING_ASSIGN_OR_RETURN(std::unique_ptr<Node> tree_root,
parser.ConsumeQuery());
+ if (search_stats != nullptr) {
+ search_stats->set_query_processor_parser_consume_query_latency_ms(
+ parser_timer->GetElapsedMilliseconds());
+ }
if (tree_root == nullptr) {
+ QueryResults results;
results.root_iterator = std::make_unique<DocHitInfoIteratorAllDocumentId>(
document_store_.last_added_document_id());
return results;
@@ -210,13 +225,22 @@ libtextclassifier3::StatusOr<QueryResults> QueryProcessor::ParseAdvancedQuery(
DocHitInfoIteratorFilter::Options options = GetFilterOptions(search_spec);
bool needs_term_frequency_info =
ranking_strategy == ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE;
+
+ std::unique_ptr<Timer> query_visitor_timer = clock_.GetNewTimer();
QueryVisitor query_visitor(&index_, &numeric_index_, &document_store_,
&schema_store_, &normalizer_,
plain_tokenizer.get(), search_spec.query(),
std::move(options), search_spec.term_match_type(),
needs_term_frequency_info, current_time_ms);
tree_root->Accept(&query_visitor);
- return std::move(query_visitor).ConsumeResults();
+ ICING_ASSIGN_OR_RETURN(QueryResults results,
+ std::move(query_visitor).ConsumeResults());
+ if (search_stats != nullptr) {
+ search_stats->set_query_processor_query_visitor_latency_ms(
+ query_visitor_timer->GetElapsedMilliseconds());
+ }
+
+ return results;
}
// TODO(cassiewang): Collect query stats to populate the SearchResultsProto
diff --git a/icing/query/query-processor.h b/icing/query/query-processor.h
index d4c22dd..de256ee 100644
--- a/icing/query/query-processor.h
+++ b/icing/query/query-processor.h
@@ -20,16 +20,15 @@
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/index/index.h"
-#include "icing/index/iterator/doc-hit-info-iterator-filter.h"
-#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/index/numeric/numeric-index.h"
+#include "icing/proto/logging.pb.h"
#include "icing/proto/search.pb.h"
#include "icing/query/query-results.h"
-#include "icing/query/query-terms.h"
#include "icing/schema/schema-store.h"
#include "icing/store/document-store.h"
#include "icing/tokenization/language-segmenter.h"
#include "icing/transform/normalizer.h"
+#include "icing/util/clock.h"
namespace icing {
namespace lib {
@@ -49,7 +48,8 @@ class QueryProcessor {
static libtextclassifier3::StatusOr<std::unique_ptr<QueryProcessor>> Create(
Index* index, const NumericIndex<int64_t>* numeric_index,
const LanguageSegmenter* language_segmenter, const Normalizer* normalizer,
- const DocumentStore* document_store, const SchemaStore* schema_store);
+ const DocumentStore* document_store, const SchemaStore* schema_store,
+ const Clock* clock);
// Parse the search configurations (including the query, any additional
// filters, etc.) in the SearchSpecProto into one DocHitInfoIterator.
@@ -68,7 +68,8 @@ class QueryProcessor {
libtextclassifier3::StatusOr<QueryResults> ParseSearch(
const SearchSpecProto& search_spec,
ScoringSpecProto::RankingStrategy::Code ranking_strategy,
- int64_t current_time_ms);
+ int64_t current_time_ms,
+ QueryStatsProto::SearchStats* search_stats = nullptr);
private:
explicit QueryProcessor(Index* index,
@@ -76,7 +77,7 @@ class QueryProcessor {
const LanguageSegmenter* language_segmenter,
const Normalizer* normalizer,
const DocumentStore* document_store,
- const SchemaStore* schema_store);
+ const SchemaStore* schema_store, const Clock* clock);
// Parse the query into a one DocHitInfoIterator that represents the root of a
// query tree in our new Advanced Query Language.
@@ -88,7 +89,8 @@ class QueryProcessor {
libtextclassifier3::StatusOr<QueryResults> ParseAdvancedQuery(
const SearchSpecProto& search_spec,
ScoringSpecProto::RankingStrategy::Code ranking_strategy,
- int64_t current_time_ms) const;
+ int64_t current_time_ms,
+ QueryStatsProto::SearchStats* search_stats) const;
// Parse the query into a one DocHitInfoIterator that represents the root of a
// query tree.
@@ -106,12 +108,13 @@ class QueryProcessor {
// Not const because we could modify/sort the hit buffer in the lite index at
// query time.
- Index& index_;
- const NumericIndex<int64_t>& numeric_index_;
- const LanguageSegmenter& language_segmenter_;
- const Normalizer& normalizer_;
- const DocumentStore& document_store_;
- const SchemaStore& schema_store_;
+ Index& index_; // Does not own.
+ const NumericIndex<int64_t>& numeric_index_; // Does not own.
+ const LanguageSegmenter& language_segmenter_; // Does not own.
+ const Normalizer& normalizer_; // Does not own.
+ const DocumentStore& document_store_; // Does not own.
+ const SchemaStore& schema_store_; // Does not own.
+ const Clock& clock_; // Does not own.
};
} // namespace lib
diff --git a/icing/query/query-processor_benchmark.cc b/icing/query/query-processor_benchmark.cc
index 025e8e6..3be74fd 100644
--- a/icing/query/query-processor_benchmark.cc
+++ b/icing/query/query-processor_benchmark.cc
@@ -173,7 +173,7 @@ void BM_QueryOneTerm(benchmark::State& state) {
std::unique_ptr<QueryProcessor> query_processor,
QueryProcessor::Create(index.get(), numeric_index.get(),
language_segmenter.get(), normalizer.get(),
- document_store.get(), schema_store.get()));
+ document_store.get(), schema_store.get(), &clock));
SearchSpecProto search_spec;
search_spec.set_query(input_string);
@@ -316,7 +316,7 @@ void BM_QueryFiveTerms(benchmark::State& state) {
std::unique_ptr<QueryProcessor> query_processor,
QueryProcessor::Create(index.get(), numeric_index.get(),
language_segmenter.get(), normalizer.get(),
- document_store.get(), schema_store.get()));
+ document_store.get(), schema_store.get(), &clock));
const std::string query_string = absl_ports::StrCat(
input_string_a, " ", input_string_b, " ", input_string_c, " ",
@@ -452,7 +452,7 @@ void BM_QueryDiacriticTerm(benchmark::State& state) {
std::unique_ptr<QueryProcessor> query_processor,
QueryProcessor::Create(index.get(), numeric_index.get(),
language_segmenter.get(), normalizer.get(),
- document_store.get(), schema_store.get()));
+ document_store.get(), schema_store.get(), &clock));
SearchSpecProto search_spec;
search_spec.set_query(input_string);
@@ -584,7 +584,7 @@ void BM_QueryHiragana(benchmark::State& state) {
std::unique_ptr<QueryProcessor> query_processor,
QueryProcessor::Create(index.get(), numeric_index.get(),
language_segmenter.get(), normalizer.get(),
- document_store.get(), schema_store.get()));
+ document_store.get(), schema_store.get(), &clock));
SearchSpecProto search_spec;
search_spec.set_query(input_string);
diff --git a/icing/query/query-processor_test.cc b/icing/query/query-processor_test.cc
index 53e3035..43c9629 100644
--- a/icing/query/query-processor_test.cc
+++ b/icing/query/query-processor_test.cc
@@ -33,6 +33,7 @@
#include "icing/jni/jni-cache.h"
#include "icing/legacy/index/icing-filesystem.h"
#include "icing/portable/platform.h"
+#include "icing/proto/logging.pb.h"
#include "icing/proto/schema.pb.h"
#include "icing/proto/search.pb.h"
#include "icing/proto/term.pb.h"
@@ -61,6 +62,7 @@ namespace lib {
namespace {
using ::testing::ElementsAre;
+using ::testing::Eq;
using ::testing::IsEmpty;
using ::testing::SizeIs;
using ::testing::UnorderedElementsAre;
@@ -138,7 +140,8 @@ class QueryProcessorTest
query_processor_,
QueryProcessor::Create(index_.get(), numeric_index_.get(),
language_segmenter_.get(), normalizer_.get(),
- document_store_.get(), schema_store_.get()));
+ document_store_.get(), schema_store_.get(),
+ &fake_clock_));
}
libtextclassifier3::Status AddTokenToIndex(
@@ -188,35 +191,41 @@ class QueryProcessorTest
};
TEST_P(QueryProcessorTest, CreationWithNullPointerShouldFail) {
- EXPECT_THAT(
- QueryProcessor::Create(/*index=*/nullptr, numeric_index_.get(),
- language_segmenter_.get(), normalizer_.get(),
- document_store_.get(), schema_store_.get()),
- StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
- EXPECT_THAT(
- QueryProcessor::Create(index_.get(), /*numeric_index_=*/nullptr,
- language_segmenter_.get(), normalizer_.get(),
- document_store_.get(), schema_store_.get()),
- StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
- EXPECT_THAT(
- QueryProcessor::Create(index_.get(), numeric_index_.get(),
- /*language_segmenter=*/nullptr, normalizer_.get(),
- document_store_.get(), schema_store_.get()),
- StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
- EXPECT_THAT(
- QueryProcessor::Create(
- index_.get(), numeric_index_.get(), language_segmenter_.get(),
- /*normalizer=*/nullptr, document_store_.get(), schema_store_.get()),
- StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ EXPECT_THAT(QueryProcessor::Create(/*index=*/nullptr, numeric_index_.get(),
+ language_segmenter_.get(),
+ normalizer_.get(), document_store_.get(),
+ schema_store_.get(), &fake_clock_),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ EXPECT_THAT(QueryProcessor::Create(index_.get(), /*numeric_index_=*/nullptr,
+ language_segmenter_.get(),
+ normalizer_.get(), document_store_.get(),
+ schema_store_.get(), &fake_clock_),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ EXPECT_THAT(QueryProcessor::Create(index_.get(), numeric_index_.get(),
+ /*language_segmenter=*/nullptr,
+ normalizer_.get(), document_store_.get(),
+ schema_store_.get(), &fake_clock_),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ EXPECT_THAT(QueryProcessor::Create(
+ index_.get(), numeric_index_.get(), language_segmenter_.get(),
+ /*normalizer=*/nullptr, document_store_.get(),
+ schema_store_.get(), &fake_clock_),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
EXPECT_THAT(
QueryProcessor::Create(index_.get(), numeric_index_.get(),
language_segmenter_.get(), normalizer_.get(),
- /*document_store=*/nullptr, schema_store_.get()),
+ /*document_store=*/nullptr, schema_store_.get(),
+ &fake_clock_),
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
EXPECT_THAT(QueryProcessor::Create(index_.get(), numeric_index_.get(),
language_segmenter_.get(),
normalizer_.get(), document_store_.get(),
- /*schema_store=*/nullptr),
+ /*schema_store=*/nullptr, &fake_clock_),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ EXPECT_THAT(QueryProcessor::Create(index_.get(), numeric_index_.get(),
+ language_segmenter_.get(),
+ normalizer_.get(), document_store_.get(),
+ schema_store_.get(), /*clock=*/nullptr),
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
}
@@ -2948,7 +2957,8 @@ TEST_P(QueryProcessorTest, DocumentBeforeTtlNotFilteredOut) {
std::unique_ptr<QueryProcessor> local_query_processor,
QueryProcessor::Create(index_.get(), numeric_index_.get(),
language_segmenter_.get(), normalizer_.get(),
- document_store_.get(), schema_store_.get()));
+ document_store_.get(), schema_store_.get(),
+ &fake_clock_));
SearchSpecProto search_spec;
search_spec.set_query("hello");
@@ -3010,7 +3020,8 @@ TEST_P(QueryProcessorTest, DocumentPastTtlFilteredOut) {
std::unique_ptr<QueryProcessor> local_query_processor,
QueryProcessor::Create(index_.get(), numeric_index_.get(),
language_segmenter_.get(), normalizer_.get(),
- document_store_.get(), schema_store_.get()));
+ document_store_.get(), schema_store_.get(),
+ &fake_clock_));
SearchSpecProto search_spec;
search_spec.set_query("hello");
@@ -3320,6 +3331,65 @@ TEST_P(QueryProcessorTest, GroupingInSectionRestriction) {
std::vector<SectionId>{prop1_section_id})));
}
+TEST_P(QueryProcessorTest, ParseAdvancedQueryShouldSetSearchStats) {
+ if (GetParam() !=
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
+ GTEST_SKIP();
+ }
+
+ // Create the schema and document store
+ SchemaProto schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
+
+ // These documents don't actually match to the tokens in the index. We're
+ // inserting the documents to get the appropriate number of documents and
+ // namespaces populated.
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace1", "1")
+ .SetSchema("email")
+ .Build()));
+
+ // Populate the index
+ SectionId section_id = 0;
+ TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
+
+ EXPECT_THAT(
+ AddTokenToIndex(document_id, section_id, term_match_type, "hello"),
+ IsOk());
+ EXPECT_THAT(
+ AddTokenToIndex(document_id, section_id, term_match_type, "world"),
+ IsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_query("hello world");
+ search_spec.set_term_match_type(term_match_type);
+ search_spec.set_search_type(GetParam());
+
+ static constexpr int64_t kSearchStatsLatencyMs = 10;
+ fake_clock_.SetTimerElapsedMilliseconds(kSearchStatsLatencyMs);
+
+ QueryStatsProto::SearchStats search_stats;
+ ICING_ASSERT_OK_AND_ASSIGN(
+ QueryResults results,
+ query_processor_->ParseSearch(
+ search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+ fake_clock_.GetSystemTimeMilliseconds(), &search_stats));
+
+ ASSERT_THAT(results.root_iterator->Advance(), IsOk());
+ EXPECT_THAT(search_stats.query_processor_lexer_extract_token_latency_ms(),
+ Eq(kSearchStatsLatencyMs));
+ EXPECT_THAT(search_stats.query_processor_parser_consume_query_latency_ms(),
+ Eq(kSearchStatsLatencyMs));
+ EXPECT_THAT(search_stats.query_processor_query_visitor_latency_ms(),
+ Eq(kSearchStatsLatencyMs));
+}
+
INSTANTIATE_TEST_SUITE_P(
QueryProcessorTest, QueryProcessorTest,
testing::Values(
diff --git a/icing/query/suggestion-processor.cc b/icing/query/suggestion-processor.cc
index eb86e3b..2d73c6b 100644
--- a/icing/query/suggestion-processor.cc
+++ b/icing/query/suggestion-processor.cc
@@ -14,14 +14,34 @@
#include "icing/query/suggestion-processor.h"
-#include "icing/proto/schema.pb.h"
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <unordered_map>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/index/index.h"
+#include "icing/index/numeric/numeric-index.h"
+#include "icing/index/term-metadata.h"
#include "icing/proto/search.pb.h"
#include "icing/query/query-processor.h"
+#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-filter-data.h"
#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
+#include "icing/store/namespace-id.h"
#include "icing/store/suggestion-result-checker-impl.h"
-#include "icing/tokenization/tokenizer-factory.h"
-#include "icing/tokenization/tokenizer.h"
+#include "icing/tokenization/language-segmenter.h"
#include "icing/transform/normalizer.h"
+#include "icing/util/clock.h"
+#include "icing/util/status-macros.h"
namespace icing {
namespace lib {
@@ -32,17 +52,19 @@ SuggestionProcessor::Create(Index* index,
const LanguageSegmenter* language_segmenter,
const Normalizer* normalizer,
const DocumentStore* document_store,
- const SchemaStore* schema_store) {
+ const SchemaStore* schema_store,
+ const Clock* clock) {
ICING_RETURN_ERROR_IF_NULL(index);
ICING_RETURN_ERROR_IF_NULL(numeric_index);
ICING_RETURN_ERROR_IF_NULL(language_segmenter);
ICING_RETURN_ERROR_IF_NULL(normalizer);
ICING_RETURN_ERROR_IF_NULL(document_store);
ICING_RETURN_ERROR_IF_NULL(schema_store);
+ ICING_RETURN_ERROR_IF_NULL(clock);
return std::unique_ptr<SuggestionProcessor>(
new SuggestionProcessor(index, numeric_index, language_segmenter,
- normalizer, document_store, schema_store));
+ normalizer, document_store, schema_store, clock));
}
libtextclassifier3::StatusOr<
@@ -225,7 +247,8 @@ SuggestionProcessor::QuerySuggestions(
ICING_ASSIGN_OR_RETURN(
std::unique_ptr<QueryProcessor> query_processor,
QueryProcessor::Create(&index_, &numeric_index_, &language_segmenter_,
- &normalizer_, &document_store_, &schema_store_));
+ &normalizer_, &document_store_, &schema_store_,
+ &clock_));
SearchSpecProto search_spec;
search_spec.set_query(suggestion_spec.prefix());
@@ -299,13 +322,15 @@ SuggestionProcessor::QuerySuggestions(
SuggestionProcessor::SuggestionProcessor(
Index* index, const NumericIndex<int64_t>* numeric_index,
const LanguageSegmenter* language_segmenter, const Normalizer* normalizer,
- const DocumentStore* document_store, const SchemaStore* schema_store)
+ const DocumentStore* document_store, const SchemaStore* schema_store,
+ const Clock* clock)
: index_(*index),
numeric_index_(*numeric_index),
language_segmenter_(*language_segmenter),
normalizer_(*normalizer),
document_store_(*document_store),
- schema_store_(*schema_store) {}
+ schema_store_(*schema_store),
+ clock_(*clock) {}
} // namespace lib
} // namespace icing
diff --git a/icing/query/suggestion-processor.h b/icing/query/suggestion-processor.h
index e100031..fe1437c 100644
--- a/icing/query/suggestion-processor.h
+++ b/icing/query/suggestion-processor.h
@@ -15,6 +15,10 @@
#ifndef ICING_QUERY_SUGGESTION_PROCESSOR_H_
#define ICING_QUERY_SUGGESTION_PROCESSOR_H_
+#include <cstdint>
+#include <memory>
+#include <vector>
+
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/index/index.h"
#include "icing/index/numeric/numeric-index.h"
@@ -23,6 +27,7 @@
#include "icing/store/document-store.h"
#include "icing/tokenization/language-segmenter.h"
#include "icing/transform/normalizer.h"
+#include "icing/util/clock.h"
namespace icing {
namespace lib {
@@ -43,7 +48,7 @@ class SuggestionProcessor {
Create(Index* index, const NumericIndex<int64_t>* numeric_index,
const LanguageSegmenter* language_segmenter,
const Normalizer* normalizer, const DocumentStore* document_store,
- const SchemaStore* schema_store);
+ const SchemaStore* schema_store, const Clock* clock);
// Query suggestions based on the given SuggestionSpecProto.
//
@@ -60,16 +65,18 @@ class SuggestionProcessor {
const LanguageSegmenter* language_segmenter,
const Normalizer* normalizer,
const DocumentStore* document_store,
- const SchemaStore* schema_store);
+ const SchemaStore* schema_store,
+ const Clock* clock);
// Not const because we could modify/sort the TermMetaData buffer in the lite
// index.
- Index& index_;
- const NumericIndex<int64_t>& numeric_index_;
- const LanguageSegmenter& language_segmenter_;
- const Normalizer& normalizer_;
- const DocumentStore& document_store_;
- const SchemaStore& schema_store_;
+ Index& index_; // Does not own.
+ const NumericIndex<int64_t>& numeric_index_; // Does not own.
+ const LanguageSegmenter& language_segmenter_; // Does not own.
+ const Normalizer& normalizer_; // Does not own.
+ const DocumentStore& document_store_; // Does not own.
+ const SchemaStore& schema_store_; // Does not own.
+ const Clock& clock_; // Does not own.
};
} // namespace lib
diff --git a/icing/query/suggestion-processor_test.cc b/icing/query/suggestion-processor_test.cc
index 9f9094d..4c5e4ac 100644
--- a/icing/query/suggestion-processor_test.cc
+++ b/icing/query/suggestion-processor_test.cc
@@ -117,9 +117,10 @@ class SuggestionProcessorTest : public Test {
ICING_ASSERT_OK_AND_ASSIGN(
suggestion_processor_,
- SuggestionProcessor::Create(
- index_.get(), numeric_index_.get(), language_segmenter_.get(),
- normalizer_.get(), document_store_.get(), schema_store_.get()));
+ SuggestionProcessor::Create(index_.get(), numeric_index_.get(),
+ language_segmenter_.get(),
+ normalizer_.get(), document_store_.get(),
+ schema_store_.get(), &fake_clock_));
}
libtextclassifier3::Status AddTokenToIndex(
diff --git a/icing/schema/schema-store.cc b/icing/schema/schema-store.cc
index e17e388..6830787 100644
--- a/icing/schema/schema-store.cc
+++ b/icing/schema/schema-store.cc
@@ -486,7 +486,7 @@ libtextclassifier3::Status SchemaStore::RegenerateDerivedFiles(
ICING_RETURN_IF_ERROR(schema_file_->Write(std::move(base_schema_ptr)));
// LINT.IfChange(min_overlay_version_compatibility)
- // Although the current version is 3, the schema is compatible with
+ // Although the current version is 4, the schema is compatible with
// version 1, so min_overlay_version_compatibility should be 1.
int32_t min_overlay_version_compatibility = version_util::kVersionOne;
// LINT.ThenChange(//depot/google3/icing/file/version-util.h:kVersion)
diff --git a/icing/text_classifier/lib3/utils/base/logging.h b/icing/text_classifier/lib3/utils/base/logging.h
index 92d775e..39e09eb 100644
--- a/icing/text_classifier/lib3/utils/base/logging.h
+++ b/icing/text_classifier/lib3/utils/base/logging.h
@@ -16,9 +16,9 @@
#define ICING_TEXT_CLASSIFIER_LIB3_UTILS_BASE_LOGGING_H_
#include <cassert>
+#include <cstdint>
#include <string>
-#include "icing/text_classifier/lib3/utils/base/integral_types.h"
#include "icing/text_classifier/lib3/utils/base/logging_levels.h"
#include "icing/text_classifier/lib3/utils/base/port.h"
@@ -45,7 +45,8 @@ inline LoggingStringStream& operator<<(LoggingStringStream& stream,
template <typename T>
inline LoggingStringStream& operator<<(LoggingStringStream& stream,
T* const entry) {
- stream.message.append(std::to_string(reinterpret_cast<const uint64>(entry)));
+ stream.message.append(
+ std::to_string(reinterpret_cast<const uint64_t>(entry)));
return stream;
}
diff --git a/icing/text_classifier/lib3/utils/java/jni-helper.cc b/icing/text_classifier/lib3/utils/java/jni-helper.cc
index 60a9dfb..cb0b899 100644
--- a/icing/text_classifier/lib3/utils/java/jni-helper.cc
+++ b/icing/text_classifier/lib3/utils/java/jni-helper.cc
@@ -14,6 +14,8 @@
#include "icing/text_classifier/lib3/utils/java/jni-helper.h"
+#include <cstdint>
+
#include "icing/text_classifier/lib3/utils/base/status_macros.h"
namespace libtextclassifier3 {
@@ -121,8 +123,8 @@ StatusOr<bool> JniHelper::CallBooleanMethod(JNIEnv* env, jobject object,
return result;
}
-StatusOr<int32> JniHelper::CallIntMethod(JNIEnv* env, jobject object,
- jmethodID method_id, ...) {
+StatusOr<int32_t> JniHelper::CallIntMethod(JNIEnv* env, jobject object,
+ jmethodID method_id, ...) {
va_list args;
va_start(args, method_id);
jint result = env->CallIntMethodV(object, method_id, args);
@@ -132,8 +134,8 @@ StatusOr<int32> JniHelper::CallIntMethod(JNIEnv* env, jobject object,
return result;
}
-StatusOr<int64> JniHelper::CallLongMethod(JNIEnv* env, jobject object,
- jmethodID method_id, ...) {
+StatusOr<int64_t> JniHelper::CallLongMethod(JNIEnv* env, jobject object,
+ jmethodID method_id, ...) {
va_list args;
va_start(args, method_id);
jlong result = env->CallLongMethodV(object, method_id, args);
diff --git a/icing/text_classifier/lib3/utils/java/jni-helper.h b/icing/text_classifier/lib3/utils/java/jni-helper.h
index 4e548ec..8b57c11 100644
--- a/icing/text_classifier/lib3/utils/java/jni-helper.h
+++ b/icing/text_classifier/lib3/utils/java/jni-helper.h
@@ -20,6 +20,7 @@
#include <jni.h>
+#include <cstdint>
#include <string>
#include "icing/text_classifier/lib3/utils/base/status.h"
@@ -140,10 +141,10 @@ class JniHelper {
...);
static StatusOr<bool> CallBooleanMethod(JNIEnv* env, jobject object,
jmethodID method_id, ...);
- static StatusOr<int32> CallIntMethod(JNIEnv* env, jobject object,
- jmethodID method_id, ...);
- static StatusOr<int64> CallLongMethod(JNIEnv* env, jobject object,
- jmethodID method_id, ...);
+ static StatusOr<int32_t> CallIntMethod(JNIEnv* env, jobject object,
+ jmethodID method_id, ...);
+ static StatusOr<int64_t> CallLongMethod(JNIEnv* env, jobject object,
+ jmethodID method_id, ...);
static StatusOr<float> CallFloatMethod(JNIEnv* env, jobject object,
jmethodID method_id, ...);
static StatusOr<double> CallDoubleMethod(JNIEnv* env, jobject object,
diff --git a/java/src/com/google/android/icing/IcingSearchEngine.java b/java/src/com/google/android/icing/IcingSearchEngine.java
index 47b94a5..a8a571e 100644
--- a/java/src/com/google/android/icing/IcingSearchEngine.java
+++ b/java/src/com/google/android/icing/IcingSearchEngine.java
@@ -77,12 +77,6 @@ public class IcingSearchEngine implements IcingSearchEngineInterface {
icingSearchEngineImpl.close();
}
- @Override
- protected void finalize() throws Throwable {
- icingSearchEngineImpl.close();
- super.finalize();
- }
-
@NonNull
@Override
public InitializeResultProto initialize() {
diff --git a/java/src/com/google/android/icing/IcingSearchEngineImpl.java b/java/src/com/google/android/icing/IcingSearchEngineImpl.java
index 8e79a88..7994162 100644
--- a/java/src/com/google/android/icing/IcingSearchEngineImpl.java
+++ b/java/src/com/google/android/icing/IcingSearchEngineImpl.java
@@ -71,12 +71,6 @@ public class IcingSearchEngineImpl implements Closeable {
closed = true;
}
- @Override
- protected void finalize() throws Throwable {
- close();
- super.finalize();
- }
-
@Nullable
public byte[] initialize() {
throwIfClosed();
diff --git a/proto/icing/proto/initialize.proto b/proto/icing/proto/initialize.proto
index 9dd9e88..7c58d2c 100644
--- a/proto/icing/proto/initialize.proto
+++ b/proto/icing/proto/initialize.proto
@@ -23,6 +23,56 @@ option java_package = "com.google.android.icing.proto";
option java_multiple_files = true;
option objc_class_prefix = "ICNG";
+// Next tag: 7
+message IcingSearchEngineFeatureInfoProto {
+ // REQUIRED: Enum representing an IcingLib feature flagged using
+ // IcingSearchEngineOptions
+ optional FlaggedFeatureType feature_type = 1;
+
+ enum FlaggedFeatureType {
+ // This value should never purposely be used. This is used for backwards
+ // compatibility reasons.
+ UNKNOWN = 0;
+
+ // Feature for flag
+ // IcingSearchEngineOptions::build_property_existence_metadata_hits.
+ //
+ // This feature covers the kHasPropertyFunctionFeature advanced query
+ // feature, and related metadata hits indexing used for property existence
+ // check.
+ FEATURE_HAS_PROPERTY_OPERATOR = 1;
+ }
+
+ // Whether the feature requires the document store to be rebuilt.
+ // The default value is false.
+ optional bool needs_document_store_rebuild = 2;
+
+ // Whether the feature requires the schema store to be rebuilt.
+ // The default value is false.
+ optional bool needs_schema_store_rebuild = 3;
+
+ // Whether the feature requires the term index to be rebuilt.
+ // The default value is false.
+ optional bool needs_term_index_rebuild = 4;
+
+ // Whether the feature requires the integer index to be rebuilt.
+ // The default value is false.
+ optional bool needs_integer_index_rebuild = 5;
+
+ // Whether the feature requires the qualified id join index to be rebuilt.
+ // The default value is false.
+ optional bool needs_qualified_id_join_index_rebuild = 6;
+}
+
+// Next tag: 4
+message IcingSearchEngineVersionProto {
+ // version and max_version are from the original version file.
+ optional int32 version = 1;
+ optional int32 max_version = 2;
+ // Features that are enabled in an icing version at initialization.
+ repeated IcingSearchEngineFeatureInfoProto enabled_features = 3;
+}
+
// Next tag: 16
message IcingSearchEngineOptions {
// Directory to persist files for Icing. Required.
@@ -131,9 +181,6 @@ message IcingSearchEngineOptions {
// Whether to build the metadata hits used for property existence check, which
// is required to support the hasProperty function in advanced query.
- //
- // TODO(b/309826655): Implement the feature flag derived files rebuild
- // mechanism to handle index rebuild, instead of using index's magic value.
optional bool build_property_existence_metadata_hits = 15;
reserved 2;
diff --git a/proto/icing/proto/logging.proto b/proto/icing/proto/logging.proto
index fcedeed..4854521 100644
--- a/proto/icing/proto/logging.proto
+++ b/proto/icing/proto/logging.proto
@@ -55,6 +55,10 @@ message InitializeStatsProto {
// Any dependencies have changed.
DEPENDENCIES_CHANGED = 7;
+
+ // Change detected in Icing's feature flags since last initialization that
+ // requires recovery.
+ FEATURE_FLAG_CHANGED = 8;
}
// Possible recovery causes for document store:
@@ -170,7 +174,7 @@ message PutDocumentStatsProto {
// Stats of the top-level function IcingSearchEngine::Search() and
// IcingSearchEngine::GetNextPage().
-// Next tag: 26
+// Next tag: 28
message QueryStatsProto {
// TODO(b/305098009): deprecate. Use parent_search_stats instead.
// The UTF-8 length of the query string
@@ -252,7 +256,7 @@ message QueryStatsProto {
optional bool is_join_query = 23;
// Stats of the search. Only valid for first page.
- // Next tag: 13
+ // Next tag: 16
message SearchStats {
// The UTF-8 length of the query string
optional int32 query_length = 1;
@@ -290,6 +294,15 @@ message QueryStatsProto {
// Number of hits fetched by integer index before applying any filters.
optional int32 num_fetched_hits_integer_index = 12;
+
+ // Time used in Lexer to extract lexer tokens from the query.
+ optional int32 query_processor_lexer_extract_token_latency_ms = 13;
+
+ // Time used in Parser to consume lexer tokens extracted from the query.
+ optional int32 query_processor_parser_consume_query_latency_ms = 14;
+
+ // Time used in QueryVisitor to visit and build (nested) DocHitInfoIterator.
+ optional int32 query_processor_query_visitor_latency_ms = 15;
}
// Search stats for parent. Only valid for first page.
@@ -298,6 +311,12 @@ message QueryStatsProto {
// Search stats for child.
optional SearchStats child_search_stats = 25;
+ // Byte size of the lite index hit buffer.
+ optional int64 lite_index_hit_buffer_byte_size = 26;
+
+ // Byte size of the unsorted tail of the lite index hit buffer.
+ optional int64 lite_index_hit_buffer_unsorted_byte_size = 27;
+
reserved 9;
}
diff --git a/proto/icing/proto/storage.proto b/proto/icing/proto/storage.proto
index 39dab6b..e0323a1 100644
--- a/proto/icing/proto/storage.proto
+++ b/proto/icing/proto/storage.proto
@@ -22,6 +22,8 @@ option java_package = "com.google.android.icing.proto";
option java_multiple_files = true;
option objc_class_prefix = "ICNG";
+// TODO(b/305098009): fix byte size vs size naming issue.
+
// Next tag: 10
message NamespaceStorageInfoProto {
// Name of the namespace
diff --git a/synced_AOSP_CL_number.txt b/synced_AOSP_CL_number.txt
index dd08fd1..a36a007 100644
--- a/synced_AOSP_CL_number.txt
+++ b/synced_AOSP_CL_number.txt
@@ -1 +1 @@
-set(synced_AOSP_CL_number=587883838)
+set(synced_AOSP_CL_number=596096845)