aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYen-Chieh Sung <sungyc@google.com>2023-07-06 18:01:13 -0700
committerYen-Chieh Sung <sungyc@google.com>2023-07-06 19:39:11 -0700
commit4ac48569941d4f48b1a9bffdaf4abb2a3e8e9269 (patch)
tree076d8d961026f2800a6ca41106aa11d54527a29b
parentf1f564329ad560f580c03f9e928057ad4e4fadc9 (diff)
downloadicing-4ac48569941d4f48b1a9bffdaf4abb2a3e8e9269.tar.gz
Update Icing from upstream.
Descriptions: ======================================================================== [Icing] Extend document store uri mapper ======================================================================== Implement dirty flag in PersistentStorage ======================================================================== [Icing][version 2][1/x] Implement ShouldRebuildDerivedFiles ======================================================================== [Icing][version 2][2/x] Bump kVersion to 2 ======================================================================== [NumericSearch][optimization] Change numeric index bucket splitting threshold ======================================================================== [NumericSearch][optimization] Flag guard integer index bucket splitting threshold ======================================================================== Bug: 193919210 Bug: 280094535 Bug: 288969109 Bug: 259743562 NO_IFTTT="Path is only valid in G3." Change-Id: Ie1e6f5969549f6c945ea32d03c724850ff5600a9
-rw-r--r--icing/file/persistent-hash-map.cc47
-rw-r--r--icing/file/persistent-hash-map.h27
-rw-r--r--icing/file/persistent-storage.h65
-rw-r--r--icing/file/posting_list/flash-index-storage.cc3
-rw-r--r--icing/file/version-util.cc38
-rw-r--r--icing/file/version-util.h21
-rw-r--r--icing/file/version-util_test.cc88
-rw-r--r--icing/icing-search-engine.cc30
-rw-r--r--icing/icing-search-engine_benchmark.cc53
-rw-r--r--icing/icing-search-engine_initialization_test.cc144
-rw-r--r--icing/index/index-processor_benchmark.cc4
-rw-r--r--icing/index/index-processor_test.cc22
-rw-r--r--icing/index/integer-section-indexing-handler_test.cc3
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-filter_test.cc3
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-property-in-schema_test.cc15
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc15
-rw-r--r--icing/index/numeric/dummy-numeric-index.h9
-rw-r--r--icing/index/numeric/integer-index-bucket-util.h2
-rw-r--r--icing/index/numeric/integer-index-storage.cc205
-rw-r--r--icing/index/numeric/integer-index-storage.h104
-rw-r--r--icing/index/numeric/integer-index-storage_benchmark.cc42
-rw-r--r--icing/index/numeric/integer-index-storage_test.cc571
-rw-r--r--icing/index/numeric/integer-index.cc106
-rw-r--r--icing/index/numeric/integer-index.h62
-rw-r--r--icing/index/numeric/integer-index_test.cc197
-rw-r--r--icing/index/numeric/numeric-index.h14
-rw-r--r--icing/index/numeric/posting-list-integer-index-accessor.h10
-rw-r--r--icing/join/join-processor_test.cc15
-rw-r--r--icing/join/qualified-id-join-index.cc32
-rw-r--r--icing/join/qualified-id-join-index.h29
-rw-r--r--icing/query/advanced_query_parser/query-visitor_test.cc15
-rw-r--r--icing/query/query-processor_benchmark.cc3
-rw-r--r--icing/query/query-processor_test.cc3
-rw-r--r--icing/query/suggestion-processor_test.cc15
-rw-r--r--icing/result/result-retriever-v2_group-result-limiter_test.cc15
-rw-r--r--icing/result/result-retriever-v2_projection_test.cc15
-rw-r--r--icing/result/result-retriever-v2_snippet_test.cc15
-rw-r--r--icing/result/result-retriever-v2_test.cc3
-rw-r--r--icing/result/result-state-manager_test.cc15
-rw-r--r--icing/result/result-state-manager_thread-safety_test.cc15
-rw-r--r--icing/result/result-state-v2_test.cc15
-rw-r--r--icing/schema/schema-store.cc8
-rw-r--r--icing/schema/schema-store_test.cc21
-rw-r--r--icing/scoring/advanced_scoring/advanced-scorer_fuzz_test.cc14
-rw-r--r--icing/scoring/advanced_scoring/advanced-scorer_test.cc15
-rw-r--r--icing/scoring/score-and-rank_benchmark.cc3
-rw-r--r--icing/scoring/scorer_test.cc15
-rw-r--r--icing/scoring/scoring-processor_test.cc15
-rw-r--r--icing/store/document-store.cc97
-rw-r--r--icing/store/document-store.h24
-rw-r--r--icing/store/document-store_benchmark.cc3
-rw-r--r--icing/store/document-store_test.cc440
-rw-r--r--proto/icing/proto/initialize.proto5
-rw-r--r--synced_AOSP_CL_number.txt2
54 files changed, 2027 insertions, 740 deletions
diff --git a/icing/file/persistent-hash-map.cc b/icing/file/persistent-hash-map.cc
index 729b09a..558c242 100644
--- a/icing/file/persistent-hash-map.cc
+++ b/icing/file/persistent-hash-map.cc
@@ -27,6 +27,7 @@
#include "icing/absl_ports/str_cat.h"
#include "icing/file/file-backed-vector.h"
#include "icing/file/memory-mapped-file.h"
+#include "icing/file/persistent-storage.h"
#include "icing/util/crc32.h"
#include "icing/util/status-macros.h"
@@ -167,6 +168,8 @@ PersistentHashMap::~PersistentHashMap() {
libtextclassifier3::Status PersistentHashMap::Put(std::string_view key,
const void* value) {
+ SetDirty();
+
ICING_RETURN_IF_ERROR(ValidateKey(key));
ICING_ASSIGN_OR_RETURN(
int32_t bucket_idx,
@@ -207,6 +210,7 @@ libtextclassifier3::Status PersistentHashMap::GetOrPut(std::string_view key,
FindEntryIndexByKey(bucket_idx, key));
if (idx_pair.target_entry_index == Entry::kInvalidIndex) {
// If not found, then insert new key value pair.
+ SetDirty();
return Insert(bucket_idx, key, next_value);
}
@@ -232,6 +236,8 @@ libtextclassifier3::Status PersistentHashMap::Get(std::string_view key,
}
libtextclassifier3::Status PersistentHashMap::Delete(std::string_view key) {
+ SetDirty();
+
ICING_RETURN_IF_ERROR(ValidateKey(key));
ICING_ASSIGN_OR_RETURN(
int32_t bucket_idx,
@@ -514,6 +520,7 @@ PersistentHashMap::InitializeExistingFiles(const Filesystem& filesystem,
<< " to "
<< persistent_hash_map->options_.max_load_factor_percent;
+ persistent_hash_map->SetInfoDirty();
persistent_hash_map->info().max_load_factor_percent =
persistent_hash_map->options_.max_load_factor_percent;
ICING_RETURN_IF_ERROR(
@@ -525,26 +532,50 @@ PersistentHashMap::InitializeExistingFiles(const Filesystem& filesystem,
return persistent_hash_map;
}
-libtextclassifier3::Status PersistentHashMap::PersistStoragesToDisk() {
+libtextclassifier3::Status PersistentHashMap::PersistStoragesToDisk(
+ bool force) {
+ if (!force && !is_storage_dirty()) {
+ return libtextclassifier3::Status::OK;
+ }
+
ICING_RETURN_IF_ERROR(bucket_storage_->PersistToDisk());
ICING_RETURN_IF_ERROR(entry_storage_->PersistToDisk());
ICING_RETURN_IF_ERROR(kv_storage_->PersistToDisk());
+ is_storage_dirty_ = false;
return libtextclassifier3::Status::OK;
}
-libtextclassifier3::Status PersistentHashMap::PersistMetadataToDisk() {
+libtextclassifier3::Status PersistentHashMap::PersistMetadataToDisk(
+ bool force) {
+ // We can skip persisting metadata to disk only if both info and storage are
+ // clean.
+ if (!force && !is_info_dirty() && !is_storage_dirty()) {
+ return libtextclassifier3::Status::OK;
+ }
+
// Changes should have been applied to the underlying file when using
// MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, but call msync() as an
// extra safety step to ensure they are written out.
- return metadata_mmapped_file_->PersistToDisk();
+ ICING_RETURN_IF_ERROR(metadata_mmapped_file_->PersistToDisk());
+ is_info_dirty_ = false;
+ return libtextclassifier3::Status::OK;
}
-libtextclassifier3::StatusOr<Crc32> PersistentHashMap::ComputeInfoChecksum() {
+libtextclassifier3::StatusOr<Crc32> PersistentHashMap::ComputeInfoChecksum(
+ bool force) {
+ if (!force && !is_info_dirty()) {
+ return Crc32(crcs().component_crcs.info_crc);
+ }
+
return info().ComputeChecksum();
}
-libtextclassifier3::StatusOr<Crc32>
-PersistentHashMap::ComputeStoragesChecksum() {
+libtextclassifier3::StatusOr<Crc32> PersistentHashMap::ComputeStoragesChecksum(
+ bool force) {
+ if (!force && !is_storage_dirty()) {
+ return Crc32(crcs().component_crcs.storages_crc);
+ }
+
// Compute crcs
ICING_ASSIGN_OR_RETURN(Crc32 bucket_storage_crc,
bucket_storage_->ComputeChecksum());
@@ -602,6 +633,8 @@ libtextclassifier3::Status PersistentHashMap::CopyEntryValue(
libtextclassifier3::Status PersistentHashMap::Insert(int32_t bucket_idx,
std::string_view key,
const void* value) {
+ SetDirty();
+
// If entry_storage_->num_elements() + 1 exceeds options_.max_num_entries,
// then return error.
// We compute max_file_size of 3 storages by options_.max_num_entries. Since
@@ -655,6 +688,8 @@ libtextclassifier3::Status PersistentHashMap::RehashIfNecessary(
return libtextclassifier3::Status::OK;
}
+ SetDirty();
+
// Resize and reset buckets.
ICING_RETURN_IF_ERROR(
bucket_storage_->Set(0, new_num_bucket, Bucket(Entry::kInvalidIndex)));
diff --git a/icing/file/persistent-hash-map.h b/icing/file/persistent-hash-map.h
index 845b22a..5f7999d 100644
--- a/icing/file/persistent-hash-map.h
+++ b/icing/file/persistent-hash-map.h
@@ -394,7 +394,9 @@ class PersistentHashMap : public PersistentStorage {
std::move(metadata_mmapped_file))),
bucket_storage_(std::move(bucket_storage)),
entry_storage_(std::move(entry_storage)),
- kv_storage_(std::move(kv_storage)) {}
+ kv_storage_(std::move(kv_storage)),
+ is_info_dirty_(false),
+ is_storage_dirty_(false) {}
static libtextclassifier3::StatusOr<std::unique_ptr<PersistentHashMap>>
InitializeNewFiles(const Filesystem& filesystem, std::string&& working_path,
@@ -409,20 +411,20 @@ class PersistentHashMap : public PersistentStorage {
// Returns:
// - OK on success
// - INTERNAL_ERROR on I/O error
- libtextclassifier3::Status PersistStoragesToDisk() override;
+ libtextclassifier3::Status PersistStoragesToDisk(bool force) override;
// Flushes contents of metadata file.
//
// Returns:
// - OK on success
// - INTERNAL_ERROR on I/O error
- libtextclassifier3::Status PersistMetadataToDisk() override;
+ libtextclassifier3::Status PersistMetadataToDisk(bool force) override;
// Computes and returns Info checksum.
//
// Returns:
// - Crc of the Info on success
- libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum() override;
+ libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum(bool force) override;
// Computes and returns all storages checksum. Checksums of bucket_storage_,
// entry_storage_ and kv_storage_ will be combined together by XOR.
@@ -430,7 +432,8 @@ class PersistentHashMap : public PersistentStorage {
// Returns:
// - Crc of all storages on success
// - INTERNAL_ERROR if any data inconsistency
- libtextclassifier3::StatusOr<Crc32> ComputeStoragesChecksum() override;
+ libtextclassifier3::StatusOr<Crc32> ComputeStoragesChecksum(
+ bool force) override;
// Find the index of the target entry (that contains the key) from a bucket
// (specified by bucket index). Also return the previous entry index, since
@@ -496,6 +499,17 @@ class PersistentHashMap : public PersistentStorage {
kInfoMetadataFileOffset);
}
+ void SetInfoDirty() { is_info_dirty_ = true; }
+ // When storage is dirty, we have to set info dirty as well. So just expose
+ // SetDirty to set both.
+ void SetDirty() {
+ is_info_dirty_ = true;
+ is_storage_dirty_ = true;
+ }
+
+ bool is_info_dirty() const { return is_info_dirty_; }
+ bool is_storage_dirty() const { return is_storage_dirty_; }
+
Options options_;
std::unique_ptr<MemoryMappedFile> metadata_mmapped_file_;
@@ -504,6 +518,9 @@ class PersistentHashMap : public PersistentStorage {
std::unique_ptr<FileBackedVector<Bucket>> bucket_storage_;
std::unique_ptr<FileBackedVector<Entry>> entry_storage_;
std::unique_ptr<FileBackedVector<char>> kv_storage_;
+
+ bool is_info_dirty_;
+ bool is_storage_dirty_;
};
} // namespace lib
diff --git a/icing/file/persistent-storage.h b/icing/file/persistent-storage.h
index 727cae9..9cb5e4d 100644
--- a/icing/file/persistent-storage.h
+++ b/icing/file/persistent-storage.h
@@ -148,8 +148,9 @@ class PersistentStorage {
return libtextclassifier3::Status::OK;
}
- ICING_RETURN_IF_ERROR(UpdateChecksumsInternal());
- ICING_RETURN_IF_ERROR(PersistMetadataToDisk());
+ ICING_RETURN_IF_ERROR(UpdateChecksumsInternal(/*force=*/true));
+ ICING_RETURN_IF_ERROR(PersistStoragesToDisk(/*force=*/true));
+ ICING_RETURN_IF_ERROR(PersistMetadataToDisk(/*force=*/true));
is_initialized_ = true;
return libtextclassifier3::Status::OK;
@@ -184,38 +185,52 @@ class PersistentStorage {
// 2) Updates all checksums by new data.
// 3) Flushes metadata.
//
+ // Force flag will be passed down to PersistMetadataToDisk,
+ // PersistStoragesToDisk, ComputeInfoChecksum, ComputeStoragesChecksum.
+ // - If force == true, then performs actual persisting operations/recomputes
+ // the checksum.
+ // - Otherwise, the derived class can decide itself whether skipping
+ // persisting operations/doing lazy checksum recomputing if the storage is
+ // not dirty.
+ //
// Returns:
// - OK on success
// - FAILED_PRECONDITION_ERROR if PersistentStorage is uninitialized
// - Any errors from PersistStoragesToDisk, UpdateChecksums,
// PersistMetadataToDisk, depending on actual implementation
- libtextclassifier3::Status PersistToDisk() {
+ libtextclassifier3::Status PersistToDisk(bool force = false) {
if (!is_initialized_) {
return absl_ports::FailedPreconditionError(absl_ports::StrCat(
"PersistentStorage ", working_path_, " not initialized"));
}
- ICING_RETURN_IF_ERROR(PersistStoragesToDisk());
- ICING_RETURN_IF_ERROR(UpdateChecksums());
- ICING_RETURN_IF_ERROR(PersistMetadataToDisk());
+ ICING_RETURN_IF_ERROR(UpdateChecksumsInternal(force));
+ ICING_RETURN_IF_ERROR(PersistStoragesToDisk(force));
+ ICING_RETURN_IF_ERROR(PersistMetadataToDisk(force));
return libtextclassifier3::Status::OK;
}
// Updates checksums of all components and returns the overall crc (all_crc)
// of the persistent storage.
//
+ // Force flag will be passed down ComputeInfoChecksum,
+ // ComputeStoragesChecksum.
+ // - If force == true, then recomputes the checksum.
+ // - Otherwise, the derived class can decide itself whether doing lazy
+ // checksum recomputing if the storage is not dirty.
+ //
// Returns:
// - Overall crc of the persistent storage on success
// - FAILED_PRECONDITION_ERROR if PersistentStorage is uninitialized
// - Any errors from ComputeInfoChecksum, ComputeStoragesChecksum, depending
// on actual implementation
- libtextclassifier3::StatusOr<Crc32> UpdateChecksums() {
+ libtextclassifier3::StatusOr<Crc32> UpdateChecksums(bool force = false) {
if (!is_initialized_) {
return absl_ports::FailedPreconditionError(absl_ports::StrCat(
"PersistentStorage ", working_path_, " not initialized"));
}
- return UpdateChecksumsInternal();
+ return UpdateChecksumsInternal(force);
}
protected:
@@ -234,33 +249,41 @@ class PersistentStorage {
// Returns:
// - OK on success
// - Any other errors, depending on actual implementation
- virtual libtextclassifier3::Status PersistMetadataToDisk() = 0;
+ virtual libtextclassifier3::Status PersistMetadataToDisk(bool force) = 0;
// Flushes contents of all storages to underlying files.
//
// Returns:
// - OK on success
// - Any other errors, depending on actual implementation
- virtual libtextclassifier3::Status PersistStoragesToDisk() = 0;
+ virtual libtextclassifier3::Status PersistStoragesToDisk(bool force) = 0;
// Computes and returns Info checksum.
+ // - If force = true, then recompute the entire checksum.
+ // - Otherwise, the derived class can decide itself whether doing lazy
+ // checksum computing if the storage is not dirty.
//
// This function will be mainly called by UpdateChecksums.
//
// Returns:
// - Crc of the Info on success
// - Any other errors, depending on actual implementation
- virtual libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum() = 0;
+ virtual libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum(
+ bool force) = 0;
// Computes and returns all storages checksum. If there are multiple storages,
// usually we XOR their checksums together to a single checksum.
+ // - If force = true, then recompute the entire checksum.
+ // - Otherwise, the derived class can decide itself whether doing lazy
+ // checksum computing if the storage is not dirty.
//
// This function will be mainly called by UpdateChecksums.
//
// Returns:
// - Crc of all storages on success
// - Any other errors from depending on actual implementation
- virtual libtextclassifier3::StatusOr<Crc32> ComputeStoragesChecksum() = 0;
+ virtual libtextclassifier3::StatusOr<Crc32> ComputeStoragesChecksum(
+ bool force) = 0;
// Returns the Crcs instance reference. The derived class can either own a
// concrete Crcs instance, or reinterpret_cast the memory-mapped region to
@@ -292,11 +315,18 @@ class PersistentStorage {
// - Overall crc of the persistent storage on success
// - Any errors from ComputeInfoChecksum, ComputeStoragesChecksum, depending
// on actual implementation
- libtextclassifier3::StatusOr<Crc32> UpdateChecksumsInternal() {
+ libtextclassifier3::StatusOr<Crc32> UpdateChecksumsInternal(bool force) {
Crcs& crcs_ref = crcs();
// Compute and update storages + info checksums.
- ICING_ASSIGN_OR_RETURN(Crc32 info_crc, ComputeInfoChecksum());
- ICING_ASSIGN_OR_RETURN(Crc32 storages_crc, ComputeStoragesChecksum());
+ ICING_ASSIGN_OR_RETURN(Crc32 info_crc, ComputeInfoChecksum(force));
+ ICING_ASSIGN_OR_RETURN(Crc32 storages_crc, ComputeStoragesChecksum(force));
+ if (crcs_ref.component_crcs.info_crc == info_crc.Get() &&
+ crcs_ref.component_crcs.storages_crc == storages_crc.Get()) {
+ // If info and storages crc haven't changed, then we don't have to update
+ // checksums.
+ return Crc32(crcs_ref.all_crc);
+ }
+
crcs_ref.component_crcs.info_crc = info_crc.Get();
crcs_ref.component_crcs.storages_crc = storages_crc.Get();
@@ -318,12 +348,13 @@ class PersistentStorage {
return absl_ports::FailedPreconditionError("Invalid all crc");
}
- ICING_ASSIGN_OR_RETURN(Crc32 info_crc, ComputeInfoChecksum());
+ ICING_ASSIGN_OR_RETURN(Crc32 info_crc, ComputeInfoChecksum(/*force=*/true));
if (crcs_ref.component_crcs.info_crc != info_crc.Get()) {
return absl_ports::FailedPreconditionError("Invalid info crc");
}
- ICING_ASSIGN_OR_RETURN(Crc32 storages_crc, ComputeStoragesChecksum());
+ ICING_ASSIGN_OR_RETURN(Crc32 storages_crc,
+ ComputeStoragesChecksum(/*force=*/true));
if (crcs_ref.component_crcs.storages_crc != storages_crc.Get()) {
return absl_ports::FailedPreconditionError("Invalid storages crc");
}
diff --git a/icing/file/posting_list/flash-index-storage.cc b/icing/file/posting_list/flash-index-storage.cc
index cd7ac12..21fea8a 100644
--- a/icing/file/posting_list/flash-index-storage.cc
+++ b/icing/file/posting_list/flash-index-storage.cc
@@ -487,6 +487,9 @@ libtextclassifier3::Status FlashIndexStorage::FreePostingList(
PostingListHolder&& holder) {
ICING_ASSIGN_OR_RETURN(IndexBlock block,
GetIndexBlock(holder.id.block_index()));
+ if (block.posting_list_bytes() == max_posting_list_bytes()) {
+ block.SetNextBlockIndex(kInvalidBlockIndex);
+ }
uint32_t posting_list_bytes = block.posting_list_bytes();
int best_block_info_index = FindBestIndexBlockInfo(posting_list_bytes);
diff --git a/icing/file/version-util.cc b/icing/file/version-util.cc
index f477072..7684262 100644
--- a/icing/file/version-util.cc
+++ b/icing/file/version-util.cc
@@ -102,6 +102,44 @@ StateChange GetVersionStateChange(const VersionInfo& existing_version_info,
}
}
+bool ShouldRebuildDerivedFiles(const VersionInfo& existing_version_info,
+ int32_t curr_version) {
+ StateChange state_change =
+ GetVersionStateChange(existing_version_info, curr_version);
+ switch (state_change) {
+ case StateChange::kCompatible:
+ return false;
+ case StateChange::kUndetermined:
+ [[fallthrough]];
+ case StateChange::kRollBack:
+ [[fallthrough]];
+ case StateChange::kRollForward:
+ [[fallthrough]];
+ case StateChange::kVersionZeroRollForward:
+ [[fallthrough]];
+ case StateChange::kVersionZeroUpgrade:
+ return true;
+ case StateChange::kUpgrade:
+ break;
+ }
+
+ bool should_rebuild = false;
+ int32_t existing_version = existing_version_info.version;
+ while (existing_version < curr_version) {
+ switch (existing_version) {
+ case 1: {
+ // version 1 -> version 2 upgrade, no need to rebuild
+ break;
+ }
+ default:
+ // This should not happen. Rebuild anyway if unsure.
+ should_rebuild |= true;
+ }
+ ++existing_version;
+ }
+ return should_rebuild;
+}
+
} // namespace version_util
} // namespace lib
diff --git a/icing/file/version-util.h b/icing/file/version-util.h
index 7fa7fbd..30c457d 100644
--- a/icing/file/version-util.h
+++ b/icing/file/version-util.h
@@ -28,9 +28,16 @@ namespace lib {
namespace version_util {
// - Version 0: Android T. Can be identified only by flash index magic.
-// - Version 1: mainline release 2023-06.
-inline static constexpr int32_t kVersion = 1;
+// - Version 1: Android U release 2023-06.
+// - Version 2: Android U 1st mainline release 2023-09. Schema is compatible
+// with version 1.
+// TODO(b/288969109): bump kVersion to 2 before finalizing the 1st Android U
+// mainline release.
+// LINT.IfChange(kVersion)
+inline static constexpr int32_t kVersion = 2;
+// LINT.ThenChange(//depot/google3/icing/schema/schema-store.cc:min_overlay_version_compatibility)
inline static constexpr int32_t kVersionOne = 1;
+inline static constexpr int32_t kVersionTwo = 2;
inline static constexpr int kVersionZeroFlashIndexMagic = 0x6dfba6ae;
@@ -89,6 +96,16 @@ libtextclassifier3::Status WriteVersion(const Filesystem& filesystem,
StateChange GetVersionStateChange(const VersionInfo& existing_version_info,
int32_t curr_version = kVersion);
+// Helper method to determine whether Icing should rebuild all derived files.
+// Sometimes it is not required to rebuild derived files when
+// roll-forward/upgrading. This function "encodes" upgrade paths and checks if
+// the roll-forward/upgrading requires derived files to be rebuilt or not.
+//
+// REQUIRES: curr_version > 0. We implement version checking in version 1, so
+// the callers (except unit tests) will always use a version # greater than 0.
+bool ShouldRebuildDerivedFiles(const VersionInfo& existing_version_info,
+ int32_t curr_version = kVersion);
+
} // namespace version_util
} // namespace lib
diff --git a/icing/file/version-util_test.cc b/icing/file/version-util_test.cc
index 78cdb7d..e94c351 100644
--- a/icing/file/version-util_test.cc
+++ b/icing/file/version-util_test.cc
@@ -32,6 +32,8 @@ namespace version_util {
namespace {
using ::testing::Eq;
+using ::testing::IsFalse;
+using ::testing::IsTrue;
struct VersionUtilReadVersionTestParam {
std::optional<VersionInfo> existing_version_info;
@@ -339,6 +341,14 @@ INSTANTIATE_TEST_SUITE_P(
/*curr_version_in=*/2,
/*expected_state_change_in=*/StateChange::kRollForward),
+ // - version 1, max_version 2
+ // - Current version = 3
+ // - Result: roll forward
+ VersionUtilStateChangeTestParam(
+ /*existing_version_info_in=*/VersionInfo(1, 2),
+ /*curr_version_in=*/3,
+ /*expected_state_change_in=*/StateChange::kRollForward),
+
// - version 1, max_version 3
// - Current version = 2
// - Result: roll forward
@@ -379,6 +389,84 @@ INSTANTIATE_TEST_SUITE_P(
/*curr_version_in=*/2,
/*expected_state_change_in=*/StateChange::kRollBack)));
+TEST(VersionUtilTest, ShouldRebuildDerivedFilesUndeterminedVersion) {
+ EXPECT_THAT(
+ ShouldRebuildDerivedFiles(VersionInfo(-1, -1), /*curr_version=*/1),
+ IsTrue());
+ EXPECT_THAT(
+ ShouldRebuildDerivedFiles(VersionInfo(-1, -1), /*curr_version=*/2),
+ IsTrue());
+}
+
+TEST(VersionUtilTest, ShouldRebuildDerivedFilesVersionZeroUpgrade) {
+ // 0 -> 1
+ EXPECT_THAT(ShouldRebuildDerivedFiles(VersionInfo(0, 0), /*curr_version=*/1),
+ IsTrue());
+
+ // 0 -> 2
+ EXPECT_THAT(ShouldRebuildDerivedFiles(VersionInfo(0, 0), /*curr_version=*/2),
+ IsTrue());
+}
+
+TEST(VersionUtilTest, ShouldRebuildDerivedFilesVersionZeroRollForward) {
+ // (1 -> 0), 0 -> 1
+ EXPECT_THAT(ShouldRebuildDerivedFiles(VersionInfo(0, 1), /*curr_version=*/1),
+ IsTrue());
+
+ // (1 -> 0), 0 -> 2
+ EXPECT_THAT(ShouldRebuildDerivedFiles(VersionInfo(0, 1), /*curr_version=*/2),
+ IsTrue());
+
+ // (2 -> 0), 0 -> 1
+ EXPECT_THAT(ShouldRebuildDerivedFiles(VersionInfo(0, 2), /*curr_version=*/1),
+ IsTrue());
+}
+
+TEST(VersionUtilTest, ShouldRebuildDerivedFilesRollBack) {
+ // 2 -> 1
+ EXPECT_THAT(ShouldRebuildDerivedFiles(VersionInfo(2, 2), /*curr_version=*/1),
+ IsTrue());
+
+ // 3 -> 1
+ EXPECT_THAT(ShouldRebuildDerivedFiles(VersionInfo(3, 3), /*curr_version=*/1),
+ IsTrue());
+
+ // (3 -> 2), 2 -> 1
+ EXPECT_THAT(ShouldRebuildDerivedFiles(VersionInfo(2, 3), /*curr_version=*/1),
+ IsTrue());
+}
+
+TEST(VersionUtilTest, ShouldRebuildDerivedFilesRollForward) {
+ // (2 -> 1), 1 -> 2
+ EXPECT_THAT(ShouldRebuildDerivedFiles(VersionInfo(1, 2), /*curr_version=*/2),
+ IsTrue());
+
+ // (2 -> 1), 1 -> 3
+ EXPECT_THAT(ShouldRebuildDerivedFiles(VersionInfo(1, 2), /*curr_version=*/3),
+ IsTrue());
+
+ // (3 -> 1), 1 -> 2
+ EXPECT_THAT(ShouldRebuildDerivedFiles(VersionInfo(1, 3), /*curr_version=*/2),
+ IsTrue());
+}
+
+TEST(VersionUtilTest, ShouldRebuildDerivedFilesCompatible) {
+ EXPECT_THAT(ShouldRebuildDerivedFiles(VersionInfo(2, 2), /*curr_version=*/2),
+ IsFalse());
+
+ EXPECT_THAT(ShouldRebuildDerivedFiles(VersionInfo(2, 3), /*curr_version=*/2),
+ IsFalse());
+}
+
+TEST(VersionUtilTest, ShouldRebuildDerivedFilesUpgrade) {
+ // Unlike other state changes, upgrade depends on the actual "encoded path".
+
+ // kVersionOne -> kVersionTwo
+ EXPECT_THAT(ShouldRebuildDerivedFiles(VersionInfo(kVersionOne, kVersionOne),
+ /*curr_version=*/kVersionTwo),
+ IsFalse());
+}
+
} // namespace
} // namespace version_util
diff --git a/icing/icing-search-engine.cc b/icing/icing-search-engine.cc
index 3ffb297..467c943 100644
--- a/icing/icing-search-engine.cc
+++ b/icing/icing-search-engine.cc
@@ -592,8 +592,10 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers(
filesystem_.get(), MakeSchemaDirectoryPath(options_.base_dir()),
version_state_change, version_util::kVersion));
- // Step 2: discard all derived data
- ICING_RETURN_IF_ERROR(DiscardDerivedFiles());
+ // Step 2: discard all derived data if needed rebuild.
+ if (version_util::ShouldRebuildDerivedFiles(version_info)) {
+ ICING_RETURN_IF_ERROR(DiscardDerivedFiles());
+ }
// Step 3: update version file
version_util::VersionInfo new_version_info(
@@ -670,6 +672,7 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers(
ICING_ASSIGN_OR_RETURN(
integer_index_,
IntegerIndex::Create(*filesystem_, std::move(integer_index_dir),
+ options_.integer_index_bucket_split_threshold(),
options_.pre_mapping_fbv()));
// Discard qualified id join index directory and instantiate a new one.
@@ -774,11 +777,12 @@ libtextclassifier3::Status IcingSearchEngine::InitializeDocumentStore(
}
ICING_ASSIGN_OR_RETURN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(filesystem_.get(), document_dir, clock_.get(),
- schema_store_.get(),
- force_recovery_and_revalidate_documents,
- options_.document_store_namespace_id_fingerprint(),
- options_.compression_level(), initialize_stats));
+ DocumentStore::Create(
+ filesystem_.get(), document_dir, clock_.get(), schema_store_.get(),
+ force_recovery_and_revalidate_documents,
+ options_.document_store_namespace_id_fingerprint(),
+ options_.pre_mapping_fbv(), options_.use_persistent_hash_map(),
+ options_.compression_level(), initialize_stats));
document_store_ = std::move(create_result.document_store);
return libtextclassifier3::Status::OK;
@@ -825,8 +829,10 @@ libtextclassifier3::Status IcingSearchEngine::InitializeIndex(
std::string integer_index_dir =
MakeIntegerIndexWorkingPath(options_.base_dir());
InitializeStatsProto::RecoveryCause integer_index_recovery_cause;
- auto integer_index_or = IntegerIndex::Create(*filesystem_, integer_index_dir,
- options_.pre_mapping_fbv());
+ auto integer_index_or =
+ IntegerIndex::Create(*filesystem_, integer_index_dir,
+ options_.integer_index_bucket_split_threshold(),
+ options_.pre_mapping_fbv());
if (!integer_index_or.ok()) {
ICING_RETURN_IF_ERROR(
IntegerIndex::Discard(*filesystem_, integer_index_dir));
@@ -837,6 +843,7 @@ libtextclassifier3::Status IcingSearchEngine::InitializeIndex(
ICING_ASSIGN_OR_RETURN(
integer_index_,
IntegerIndex::Create(*filesystem_, std::move(integer_index_dir),
+ options_.integer_index_bucket_split_threshold(),
options_.pre_mapping_fbv()));
} else {
// Integer index was created fine.
@@ -2250,8 +2257,7 @@ IcingSearchEngine::OptimizeDocumentStore(OptimizeStatsProto* optimize_stats) {
// Copies valid document data to tmp directory
libtextclassifier3::StatusOr<std::vector<DocumentId>>
document_id_old_to_new_or = document_store_->OptimizeInto(
- temporary_document_dir, language_segmenter_.get(),
- options_.document_store_namespace_id_fingerprint(), optimize_stats);
+ temporary_document_dir, language_segmenter_.get(), optimize_stats);
// Handles error if any
if (!document_id_old_to_new_or.ok()) {
@@ -2289,6 +2295,7 @@ IcingSearchEngine::OptimizeDocumentStore(OptimizeStatsProto* optimize_stats) {
filesystem_.get(), current_document_dir, clock_.get(),
schema_store_.get(), /*force_recovery_and_revalidate_documents=*/false,
options_.document_store_namespace_id_fingerprint(),
+ options_.pre_mapping_fbv(), options_.use_persistent_hash_map(),
options_.compression_level(), /*initialize_stats=*/nullptr);
// TODO(b/144458732): Implement a more robust version of
// TC_ASSIGN_OR_RETURN that can support error logging.
@@ -2316,6 +2323,7 @@ IcingSearchEngine::OptimizeDocumentStore(OptimizeStatsProto* optimize_stats) {
filesystem_.get(), current_document_dir, clock_.get(),
schema_store_.get(), /*force_recovery_and_revalidate_documents=*/false,
options_.document_store_namespace_id_fingerprint(),
+ options_.pre_mapping_fbv(), options_.use_persistent_hash_map(),
options_.compression_level(), /*initialize_stats=*/nullptr);
if (!create_result_or.ok()) {
// Unable to create DocumentStore from the new file. Mark as uninitialized
diff --git a/icing/icing-search-engine_benchmark.cc b/icing/icing-search-engine_benchmark.cc
index fb44595..354d11c 100644
--- a/icing/icing-search-engine_benchmark.cc
+++ b/icing/icing-search-engine_benchmark.cc
@@ -37,6 +37,7 @@
#include "icing/join/join-processor.h"
#include "icing/proto/document.pb.h"
#include "icing/proto/initialize.pb.h"
+#include "icing/proto/persist.pb.h"
#include "icing/proto/reset.pb.h"
#include "icing/proto/schema.pb.h"
#include "icing/proto/scoring.pb.h"
@@ -1214,6 +1215,58 @@ void BM_JoinQueryQualifiedId(benchmark::State& state) {
}
BENCHMARK(BM_JoinQueryQualifiedId);
+void BM_PersistToDisk(benchmark::State& state) {
+ // Initialize the filesystem
+ std::string test_dir = GetTestTempDir() + "/icing/benchmark";
+ Filesystem filesystem;
+ DestructibleDirectory ddir(filesystem, test_dir);
+
+ // Create the schema.
+ std::default_random_engine random;
+ int num_types = kAvgNumNamespaces * kAvgNumTypes;
+ ExactStringPropertyGenerator property_generator;
+ SchemaGenerator<ExactStringPropertyGenerator> schema_generator(
+ /*num_properties=*/state.range(1), &property_generator);
+ SchemaProto schema = schema_generator.GenerateSchema(num_types);
+ EvenDistributionTypeSelector type_selector(schema);
+
+ // Generate documents.
+ int num_docs = state.range(0);
+ std::vector<std::string> language = CreateLanguages(kLanguageSize, &random);
+ const std::vector<DocumentProto> random_docs =
+ GenerateRandomDocuments(&type_selector, num_docs, language);
+
+ for (auto _ : state) {
+ state.PauseTiming();
+ // Create the index.
+ IcingSearchEngineOptions options;
+ options.set_base_dir(test_dir);
+ options.set_index_merge_size(kIcingFullIndexSize);
+ options.set_use_persistent_hash_map(true);
+ std::unique_ptr<IcingSearchEngine> icing =
+ std::make_unique<IcingSearchEngine>(options);
+
+ ASSERT_THAT(icing->Reset().status(), ProtoIsOk());
+ ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+
+ for (const DocumentProto& doc : random_docs) {
+ ASSERT_THAT(icing->Put(doc).status(), ProtoIsOk());
+ }
+
+ state.ResumeTiming();
+
+ ASSERT_THAT(icing->PersistToDisk(PersistType::FULL).status(), ProtoIsOk());
+
+ state.PauseTiming();
+ icing.reset();
+ ASSERT_TRUE(filesystem.DeleteDirectoryRecursively(test_dir.c_str()));
+ state.ResumeTiming();
+ }
+}
+BENCHMARK(BM_PersistToDisk)
+ // Arguments: num_indexed_documents, num_sections
+ ->ArgPair(1024, 5);
+
} // namespace
} // namespace lib
diff --git a/icing/icing-search-engine_initialization_test.cc b/icing/icing-search-engine_initialization_test.cc
index 64bcc98..d6c8de8 100644
--- a/icing/icing-search-engine_initialization_test.cc
+++ b/icing/icing-search-engine_initialization_test.cc
@@ -88,6 +88,7 @@ using ::testing::Eq;
using ::testing::HasSubstr;
using ::testing::IsEmpty;
using ::testing::Matcher;
+using ::testing::Ne;
using ::testing::Return;
using ::testing::SizeIs;
@@ -1060,13 +1061,14 @@ TEST_F(IcingSearchEngineInitializationTest,
// Puts message2 into DocumentStore but doesn't index it.
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(filesystem(), GetDocumentDir(), &fake_clock,
- schema_store.get(),
- /*force_recovery_and_revalidate_documents=*/false,
- /*namespace_id_fingerprint=*/false,
- PortableFileBackedProtoLog<
- DocumentWrapper>::kDeflateCompressionLevel,
- /*initialize_stats=*/nullptr));
+ DocumentStore::Create(
+ filesystem(), GetDocumentDir(), &fake_clock, schema_store.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
@@ -1493,6 +1495,108 @@ TEST_F(IcingSearchEngineInitializationTest, RecoverFromCorruptIntegerIndex) {
}
TEST_F(IcingSearchEngineInitializationTest,
+ RecoverFromIntegerIndexBucketSplitThresholdChange) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("indexableInteger")
+ .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ DocumentProto message =
+ DocumentBuilder()
+ .SetKey("namespace", "message/1")
+ .SetSchema("Message")
+ .AddInt64Property("indexableInteger", 123)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ // 1. Create an index with a message document.
+ {
+ TestIcingSearchEngine icing(
+ GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
+ GetTestJniCache());
+
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
+ }
+
+ // 2. Create the index again with different
+ // integer_index_bucket_split_threshold. This should trigger index
+ // restoration.
+ {
+ // Mock filesystem to observe and check the behavior of all indices.
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
+ .WillRepeatedly(DoDefault());
+ // Ensure term index directory should never be discarded.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/index_dir")))
+ .Times(0);
+ // Ensure integer index directory should be discarded once, and Clear()
+ // should never be called (i.e. storage sub directory
+ // "*/integer_index_dir/*" should never be discarded) since we start it from
+ // scratch.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
+ .Times(1);
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
+ .Times(0);
+ // Ensure qualified id join index directory should never be discarded, and
+ // Clear() should never be called (i.e. storage sub directory
+ // "*/qualified_id_join_index_dir/*" should never be discarded).
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
+ EndsWith("/qualified_id_join_index_dir")))
+ .Times(0);
+ EXPECT_CALL(
+ *mock_filesystem,
+ DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
+ .Times(0);
+
+ static constexpr int32_t kNewIntegerIndexBucketSplitThreshold = 1000;
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ ASSERT_THAT(kNewIntegerIndexBucketSplitThreshold,
+ Ne(options.integer_index_bucket_split_threshold()));
+ options.set_integer_index_bucket_split_threshold(
+ kNewIntegerIndexBucketSplitThreshold);
+
+ TestIcingSearchEngine icing(options, std::move(mock_filesystem),
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(),
+ GetTestJniCache());
+ InitializeResultProto initialize_result = icing.Initialize();
+ ASSERT_THAT(initialize_result.status(), ProtoIsOk());
+ EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(
+ initialize_result.initialize_stats().integer_index_restoration_cause(),
+ Eq(InitializeStatsProto::IO_ERROR));
+ EXPECT_THAT(initialize_result.initialize_stats()
+ .qualified_id_join_index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+
+ // Verify integer index works normally
+ SearchSpecProto search_spec;
+ search_spec.set_query("indexableInteger == 123");
+ search_spec.set_search_type(
+ SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
+ search_spec.add_enabled_features(std::string(kNumericSearchFeature));
+
+ SearchResultProto results =
+ icing.Search(search_spec, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ ASSERT_THAT(results.results(), SizeIs(1));
+ EXPECT_THAT(results.results(0).document().uri(), Eq("message/1"));
+ }
+}
+
+TEST_F(IcingSearchEngineInitializationTest,
RecoverFromCorruptQualifiedIdJoinIndex) {
// Test the following scenario: qualified id join index is corrupted (e.g.
// checksum doesn't match). IcingSearchEngine should be able to recover
@@ -3197,6 +3301,7 @@ TEST_F(IcingSearchEngineInitializationTest,
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndex> integer_index,
IntegerIndex::Create(filesystem, GetIntegerIndexDir(),
+ /*num_data_threshold_for_bucket_split=*/65536,
/*pre_mapping_fbv=*/false));
// Add hits for document 0.
ASSERT_THAT(integer_index->last_added_document_id(), kInvalidDocumentId);
@@ -3376,6 +3481,7 @@ TEST_F(IcingSearchEngineInitializationTest,
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndex> integer_index,
IntegerIndex::Create(filesystem, GetIntegerIndexDir(),
+ /*num_data_threshold_for_bucket_split=*/65536,
/*pre_mapping_fbv=*/false));
// Add hits for document 4.
DocumentId original_last_added_doc_id =
@@ -5122,13 +5228,14 @@ TEST_P(IcingSearchEngineInitializationVersionChangeTest,
// Put message into DocumentStore
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(filesystem(), GetDocumentDir(), &fake_clock,
- schema_store.get(),
- /*force_recovery_and_revalidate_documents=*/false,
- /*namespace_id_fingerprint=*/false,
- PortableFileBackedProtoLog<
- DocumentWrapper>::kDeflateCompressionLevel,
- /*initialize_stats=*/nullptr));
+ DocumentStore::Create(
+ filesystem(), GetDocumentDir(), &fake_clock, schema_store.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);
ICING_ASSERT_OK_AND_ASSIGN(DocumentId doc_id, document_store->Put(message));
@@ -5142,6 +5249,7 @@ TEST_P(IcingSearchEngineInitializationVersionChangeTest,
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndex> integer_index,
IntegerIndex::Create(*filesystem(), GetIntegerIndexDir(),
+ /*num_data_threshold_for_bucket_split=*/65536,
/*pre_mapping_fbv=*/false));
ICING_ASSERT_OK_AND_ASSIGN(
@@ -5299,12 +5407,8 @@ INSTANTIATE_TEST_SUITE_P(
/*version_in=*/version_util::kVersion + 1,
/*max_version_in=*/version_util::kVersion + 1),
- // Manually change existing data set's version to kVersion - 1 and
- // max_version to kVersion - 1. When initializing, it will detect
- // "upgrade".
- version_util::VersionInfo(
- /*version_in=*/version_util::kVersion - 1,
- /*max_version_in=*/version_util::kVersion - 1),
+ // Currently we don't have any "upgrade" that requires rebuild derived
+ // files, so skip this case until we have a case for it.
// Manually change existing data set's version to kVersion - 1 and
// max_version to kVersion. When initializing, it will detect "roll
diff --git a/icing/index/index-processor_benchmark.cc b/icing/index/index-processor_benchmark.cc
index b6d3c29..9fee925 100644
--- a/icing/index/index-processor_benchmark.cc
+++ b/icing/index/index-processor_benchmark.cc
@@ -227,6 +227,7 @@ void BM_IndexDocumentWithOneProperty(benchmark::State& state) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<NumericIndex<int64_t>> integer_index,
IntegerIndex::Create(filesystem, integer_index_dir,
+ IntegerIndex::kDefaultNumDataThresholdForBucketSplit,
/*pre_mapping_fbv=*/true));
language_segmenter_factory::SegmenterOptions options(ULOC_US);
std::unique_ptr<LanguageSegmenter> language_segmenter =
@@ -302,6 +303,7 @@ void BM_IndexDocumentWithTenProperties(benchmark::State& state) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<NumericIndex<int64_t>> integer_index,
IntegerIndex::Create(filesystem, integer_index_dir,
+ IntegerIndex::kDefaultNumDataThresholdForBucketSplit,
/*pre_mapping_fbv=*/true));
language_segmenter_factory::SegmenterOptions options(ULOC_US);
std::unique_ptr<LanguageSegmenter> language_segmenter =
@@ -378,6 +380,7 @@ void BM_IndexDocumentWithDiacriticLetters(benchmark::State& state) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<NumericIndex<int64_t>> integer_index,
IntegerIndex::Create(filesystem, integer_index_dir,
+ IntegerIndex::kDefaultNumDataThresholdForBucketSplit,
/*pre_mapping_fbv=*/true));
language_segmenter_factory::SegmenterOptions options(ULOC_US);
std::unique_ptr<LanguageSegmenter> language_segmenter =
@@ -454,6 +457,7 @@ void BM_IndexDocumentWithHiragana(benchmark::State& state) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<NumericIndex<int64_t>> integer_index,
IntegerIndex::Create(filesystem, integer_index_dir,
+ IntegerIndex::kDefaultNumDataThresholdForBucketSplit,
/*pre_mapping_fbv=*/true));
language_segmenter_factory::SegmenterOptions options(ULOC_US);
std::unique_ptr<LanguageSegmenter> language_segmenter =
diff --git a/icing/index/index-processor_test.cc b/icing/index/index-processor_test.cc
index 9505dbd..c2eaf62 100644
--- a/icing/index/index-processor_test.cc
+++ b/icing/index/index-processor_test.cc
@@ -172,8 +172,11 @@ class IndexProcessorTest : public Test {
index_, Index::Create(options, &filesystem_, &icing_filesystem_));
ICING_ASSERT_OK_AND_ASSIGN(
- integer_index_, IntegerIndex::Create(filesystem_, integer_index_dir_,
- /*pre_mapping_fbv=*/false));
+ integer_index_,
+ IntegerIndex::Create(
+ filesystem_, integer_index_dir_,
+ IntegerIndex::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv=*/false));
ICING_ASSERT_OK_AND_ASSIGN(
qualified_id_join_index_,
@@ -277,13 +280,14 @@ class IndexProcessorTest : public Test {
ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(doc_store_dir_.c_str()));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, doc_store_dir_, &fake_clock_,
- schema_store_.get(),
- /*force_recovery_and_revalidate_documents=*/false,
- /*namespace_id_fingerprint=*/false,
- PortableFileBackedProtoLog<
- DocumentWrapper>::kDeflateCompressionLevel,
- /*initialize_stats=*/nullptr));
+ DocumentStore::Create(
+ &filesystem_, doc_store_dir_, &fake_clock_, schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
doc_store_ = std::move(create_result.document_store);
ICING_ASSERT_OK_AND_ASSIGN(
diff --git a/icing/index/integer-section-indexing-handler_test.cc b/icing/index/integer-section-indexing-handler_test.cc
index 96e21ca..91cc06f 100644
--- a/icing/index/integer-section-indexing-handler_test.cc
+++ b/icing/index/integer-section-indexing-handler_test.cc
@@ -106,6 +106,7 @@ class IntegerSectionIndexingHandlerTest : public ::testing::Test {
ICING_ASSERT_OK_AND_ASSIGN(
integer_index_,
IntegerIndex::Create(filesystem_, integer_index_working_path_,
+ /*num_data_threshold_for_bucket_split=*/65536,
/*pre_mapping_fbv=*/false));
language_segmenter_factory::SegmenterOptions segmenter_options(ULOC_US);
@@ -169,6 +170,8 @@ class IntegerSectionIndexingHandlerTest : public ::testing::Test {
schema_store_.get(),
/*force_recovery_and_revalidate_documents=*/false,
/*namespace_id_fingerprint=*/false,
+ /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
PortableFileBackedProtoLog<
DocumentWrapper>::kDeflateCompressionLevel,
/*initialize_stats=*/nullptr));
diff --git a/icing/index/iterator/doc-hit-info-iterator-filter_test.cc b/icing/index/iterator/doc-hit-info-iterator-filter_test.cc
index d8839dc..d93fd02 100644
--- a/icing/index/iterator/doc-hit-info-iterator-filter_test.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-filter_test.cc
@@ -55,7 +55,8 @@ libtextclassifier3::StatusOr<DocumentStore::CreateResult> CreateDocumentStore(
return DocumentStore::Create(
filesystem, base_dir, clock, schema_store,
/*force_recovery_and_revalidate_documents=*/false,
- /*namespace_id_fingerprint=*/false,
+ /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
PortableFileBackedProtoLog<DocumentWrapper>::kDeflateCompressionLevel,
/*initialize_stats=*/nullptr);
}
diff --git a/icing/index/iterator/doc-hit-info-iterator-property-in-schema_test.cc b/icing/index/iterator/doc-hit-info-iterator-property-in-schema_test.cc
index df5ddf5..47f5cc5 100644
--- a/icing/index/iterator/doc-hit-info-iterator-property-in-schema_test.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-property-in-schema_test.cc
@@ -97,13 +97,14 @@ class DocHitInfoIteratorPropertyInSchemaTest : public ::testing::Test {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get(),
- /*force_recovery_and_revalidate_documents=*/false,
- /*namespace_id_fingerprint=*/false,
- PortableFileBackedProtoLog<
- DocumentWrapper>::kDeflateCompressionLevel,
- /*initialize_stats=*/nullptr));
+ DocumentStore::Create(
+ &filesystem_, test_dir_, &fake_clock_, schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
document_store_ = std::move(create_result.document_store);
}
diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc b/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
index c765e6d..47f55aa 100644
--- a/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
@@ -101,13 +101,14 @@ class DocHitInfoIteratorSectionRestrictTest : public ::testing::Test {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get(),
- /*force_recovery_and_revalidate_documents=*/false,
- /*namespace_id_fingerprint=*/false,
- PortableFileBackedProtoLog<
- DocumentWrapper>::kDeflateCompressionLevel,
- /*initialize_stats=*/nullptr));
+ DocumentStore::Create(
+ &filesystem_, test_dir_, &fake_clock_, schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
document_store_ = std::move(create_result.document_store);
}
diff --git a/icing/index/numeric/dummy-numeric-index.h b/icing/index/numeric/dummy-numeric-index.h
index 2c077a2..34b1acb 100644
--- a/icing/index/numeric/dummy-numeric-index.h
+++ b/icing/index/numeric/dummy-numeric-index.h
@@ -185,19 +185,20 @@ class DummyNumericIndex : public NumericIndex<T> {
PersistentStorage::WorkingPathType::kDummy),
last_added_document_id_(kInvalidDocumentId) {}
- libtextclassifier3::Status PersistStoragesToDisk() override {
+ libtextclassifier3::Status PersistStoragesToDisk(bool force) override {
return libtextclassifier3::Status::OK;
}
- libtextclassifier3::Status PersistMetadataToDisk() override {
+ libtextclassifier3::Status PersistMetadataToDisk(bool force) override {
return libtextclassifier3::Status::OK;
}
- libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum() override {
+ libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum(bool force) override {
return Crc32(0);
}
- libtextclassifier3::StatusOr<Crc32> ComputeStoragesChecksum() override {
+ libtextclassifier3::StatusOr<Crc32> ComputeStoragesChecksum(
+ bool force) override {
return Crc32(0);
}
diff --git a/icing/index/numeric/integer-index-bucket-util.h b/icing/index/numeric/integer-index-bucket-util.h
index 863bd01..d6fc245 100644
--- a/icing/index/numeric/integer-index-bucket-util.h
+++ b/icing/index/numeric/integer-index-bucket-util.h
@@ -61,7 +61,7 @@ struct DataRangeAndBucketInfo {
// - Data slice (i.e. [start, end)) can be empty.
//
// REQUIRES:
-// - original_key_lower <= original_key_upper
+// - original_key_lower < original_key_upper
// - num_data_threshold > 0
// - Keys of all data are in range [original_key_lower, original_key_upper]
//
diff --git a/icing/index/numeric/integer-index-storage.cc b/icing/index/numeric/integer-index-storage.cc
index fa62b19..f0212da 100644
--- a/icing/index/numeric/integer-index-storage.cc
+++ b/icing/index/numeric/integer-index-storage.cc
@@ -45,6 +45,7 @@
#include "icing/index/numeric/posting-list-integer-index-serializer.h"
#include "icing/schema/section.h"
#include "icing/store/document-id.h"
+#include "icing/util/crc32.h"
#include "icing/util/status-macros.h"
namespace icing {
@@ -311,6 +312,11 @@ libtextclassifier3::Status IntegerIndexStorageIterator::Advance() {
}
bool IntegerIndexStorage::Options::IsValid() const {
+ if (num_data_threshold_for_bucket_split <=
+ kMinNumDataThresholdForBucketSplit) {
+ return false;
+ }
+
if (!HasCustomInitBuckets()) {
return true;
}
@@ -403,12 +409,20 @@ libtextclassifier3::Status IntegerIndexStorage::AddKeys(
return libtextclassifier3::Status::OK;
}
+ SetDirty();
+
std::sort(new_keys.begin(), new_keys.end());
// Dedupe
auto last = std::unique(new_keys.begin(), new_keys.end());
new_keys.erase(last, new_keys.end());
+ if (static_cast<int32_t>(new_keys.size()) >
+ std::numeric_limits<int32_t>::max() - info().num_data) {
+ return absl_ports::ResourceExhaustedError(
+ "# of keys in this integer index storage exceed the limit");
+ }
+
// When adding keys into a bucket, we potentially split it into 2 new buckets
// and one of them will be added into the unsorted bucket array.
// When handling keys belonging to buckets in the unsorted bucket array, we
@@ -649,6 +663,9 @@ libtextclassifier3::Status IntegerIndexStorage::TransferIndex(
return lhs.get() < rhs.get();
});
+ const int32_t num_data_threshold_for_bucket_merge =
+ kNumDataThresholdRatioForBucketMerge *
+ new_storage->options_.num_data_threshold_for_bucket_split;
int64_t curr_key_lower = std::numeric_limits<int64_t>::min();
int64_t curr_key_upper = std::numeric_limits<int64_t>::min();
std::vector<IntegerIndexData> accumulated_data;
@@ -687,7 +704,7 @@ libtextclassifier3::Status IntegerIndexStorage::TransferIndex(
// - Flush accumulated_data and create a new bucket for them.
// - OR merge new_data into accumulated_data and go to the next round.
if (!accumulated_data.empty() && accumulated_data.size() + new_data.size() >
- kNumDataThresholdForBucketMerge) {
+ num_data_threshold_for_bucket_merge) {
// TODO(b/259743562): [Optimization 3] adjust upper bound to fit more data
// from new_data to accumulated_data.
ICING_RETURN_IF_ERROR(FlushDataIntoNewSortedBucket(
@@ -879,9 +896,11 @@ IntegerIndexStorage::InitializeExistingFiles(
IntegerIndexStorage::FlushDataIntoNewSortedBucket(
int64_t key_lower, int64_t key_upper, std::vector<IntegerIndexData>&& data,
IntegerIndexStorage* storage) {
+ storage->SetDirty();
+
if (data.empty()) {
- return storage->sorted_buckets_->Append(
- Bucket(key_lower, key_upper, PostingListIdentifier::kInvalid));
+ return storage->sorted_buckets_->Append(Bucket(
+ key_lower, key_upper, PostingListIdentifier::kInvalid, /*num_data=*/0));
}
ICING_ASSIGN_OR_RETURN(
@@ -891,10 +910,16 @@ IntegerIndexStorage::FlushDataIntoNewSortedBucket(
data.end()));
storage->info().num_data += data.size();
- return storage->sorted_buckets_->Append(Bucket(key_lower, key_upper, pl_id));
+ return storage->sorted_buckets_->Append(
+ Bucket(key_lower, key_upper, pl_id, data.size()));
}
-libtextclassifier3::Status IntegerIndexStorage::PersistStoragesToDisk() {
+libtextclassifier3::Status IntegerIndexStorage::PersistStoragesToDisk(
+ bool force) {
+ if (!force && !is_storage_dirty()) {
+ return libtextclassifier3::Status::OK;
+ }
+
ICING_RETURN_IF_ERROR(sorted_buckets_->PersistToDisk());
ICING_RETURN_IF_ERROR(unsorted_buckets_->PersistToDisk());
if (!flash_index_storage_->PersistToDisk()) {
@@ -904,19 +929,35 @@ libtextclassifier3::Status IntegerIndexStorage::PersistStoragesToDisk() {
return libtextclassifier3::Status::OK;
}
-libtextclassifier3::Status IntegerIndexStorage::PersistMetadataToDisk() {
+libtextclassifier3::Status IntegerIndexStorage::PersistMetadataToDisk(
+ bool force) {
+ // We can skip persisting metadata to disk only if both info and storage are
+ // clean.
+ if (!force && !is_info_dirty() && !is_storage_dirty()) {
+ return libtextclassifier3::Status::OK;
+ }
+
// Changes should have been applied to the underlying file when using
// MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, but call msync() as an
// extra safety step to ensure they are written out.
return metadata_mmapped_file_->PersistToDisk();
}
-libtextclassifier3::StatusOr<Crc32> IntegerIndexStorage::ComputeInfoChecksum() {
+libtextclassifier3::StatusOr<Crc32> IntegerIndexStorage::ComputeInfoChecksum(
+ bool force) {
+ if (!force && !is_info_dirty()) {
+ return Crc32(crcs().component_crcs.info_crc);
+ }
+
return info().ComputeChecksum();
}
libtextclassifier3::StatusOr<Crc32>
-IntegerIndexStorage::ComputeStoragesChecksum() {
+IntegerIndexStorage::ComputeStoragesChecksum(bool force) {
+ if (!force && !is_storage_dirty()) {
+ return Crc32(crcs().component_crcs.storages_crc);
+ }
+
// Compute crcs
ICING_ASSIGN_OR_RETURN(Crc32 sorted_buckets_crc,
sorted_buckets_->ComputeChecksum());
@@ -933,6 +974,89 @@ IntegerIndexStorage::AddKeysIntoBucketAndSplitIfNecessary(
const std::vector<int64_t>::const_iterator& it_start,
const std::vector<int64_t>::const_iterator& it_end,
FileBackedVector<Bucket>::MutableView& mutable_bucket) {
+ int32_t num_data_in_bucket = mutable_bucket.Get().num_data();
+ int32_t num_new_data = std::distance(it_start, it_end);
+ if (mutable_bucket.Get().key_lower() < mutable_bucket.Get().key_upper() &&
+ num_new_data + num_data_in_bucket >
+ options_.num_data_threshold_for_bucket_split) {
+ // Split bucket.
+
+ // 1. Read all data and free all posting lists.
+ std::vector<IntegerIndexData> all_data;
+ if (mutable_bucket.Get().posting_list_identifier().is_valid()) {
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor,
+ PostingListIntegerIndexAccessor::CreateFromExisting(
+ flash_index_storage_.get(), posting_list_serializer_,
+ mutable_bucket.Get().posting_list_identifier()));
+ ICING_ASSIGN_OR_RETURN(all_data, pl_accessor->GetAllDataAndFree());
+ }
+
+ // 2. Append all new data.
+ all_data.reserve(all_data.size() + num_new_data);
+ for (auto it = it_start; it != it_end; ++it) {
+ all_data.push_back(IntegerIndexData(section_id, document_id, *it));
+ }
+
+ // 3. Run bucket splitting algorithm to decide new buckets and dispatch
+ // data.
+ // - # of data in a full bucket =
+ // options_.num_data_threshold_for_bucket_split.
+ // - Bucket splitting logic will be invoked if adding new data
+ // (num_new_data >= 1) into a full bucket.
+ // - In order to achieve good (amortized) time complexity, we want # of
+ // data in new buckets to be around half_of_threshold (i.e.
+ // options_.num_data_threshold_for_bucket_split / 2).
+ // - Using half_of_threshold as the cutoff threshold will cause splitting
+ // buckets with [half_of_threshold, half_of_threshold, num_new_data]
+ // data, which is not ideal because num_new_data is usually small.
+ // - Thus, we pick (half_of_threshold + kNumDataAfterSplitAdjustment) as
+ // the cutoff threshold to avoid over-splitting. It can tolerate
+ // num_new_data up to (2 * kNumDataAfterSplitAdjustment) and
+ // split only 2 buckets (instead of 3) with
+ // [half_of_threshold + kNumDataAfterSplitAdjustment,
+ // half_of_threshold + (kNumDataAfterSplitAdjustment - num_new_data)].
+ int32_t cutoff_threshold =
+ options_.num_data_threshold_for_bucket_split / 2 +
+ kNumDataAfterSplitAdjustment;
+ std::vector<integer_index_bucket_util::DataRangeAndBucketInfo>
+ new_bucket_infos = integer_index_bucket_util::Split(
+ all_data, mutable_bucket.Get().key_lower(),
+ mutable_bucket.Get().key_upper(), cutoff_threshold);
+ if (new_bucket_infos.empty()) {
+ ICING_LOG(WARNING)
+ << "No buckets after splitting. This should not happen.";
+ return absl_ports::InternalError("Split error");
+ }
+
+ // 4. Flush data and create new buckets.
+ std::vector<Bucket> new_buckets;
+ for (int i = 0; i < new_bucket_infos.size(); ++i) {
+ int32_t num_data_in_new_bucket =
+ std::distance(new_bucket_infos[i].start, new_bucket_infos[i].end);
+ ICING_ASSIGN_OR_RETURN(
+ PostingListIdentifier pl_id,
+ FlushDataIntoPostingLists(
+ flash_index_storage_.get(), posting_list_serializer_,
+ new_bucket_infos[i].start, new_bucket_infos[i].end));
+ if (i == 0) {
+ // Reuse mutable_bucket
+ mutable_bucket.Get().set_key_lower(new_bucket_infos[i].key_lower);
+ mutable_bucket.Get().set_key_upper(new_bucket_infos[i].key_upper);
+ mutable_bucket.Get().set_posting_list_identifier(pl_id);
+ mutable_bucket.Get().set_num_data(num_data_in_new_bucket);
+ } else {
+ new_buckets.push_back(Bucket(new_bucket_infos[i].key_lower,
+ new_bucket_infos[i].key_upper, pl_id,
+ num_data_in_new_bucket));
+ }
+ }
+
+ return new_buckets;
+ }
+
+ // Otherwise, we don't need to split bucket. Just simply add all new data into
+ // the bucket.
std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor;
if (mutable_bucket.Get().posting_list_identifier().is_valid()) {
ICING_ASSIGN_OR_RETURN(
@@ -946,68 +1070,6 @@ IntegerIndexStorage::AddKeysIntoBucketAndSplitIfNecessary(
}
for (auto it = it_start; it != it_end; ++it) {
- if (mutable_bucket.Get().key_lower() < mutable_bucket.Get().key_upper() &&
- pl_accessor->WantsSplit()) {
- // If the bucket needs split (max size and full) and is splittable, then
- // we perform bucket splitting.
-
- // 1. Finalize the current posting list accessor.
- PostingListAccessor::FinalizeResult result =
- std::move(*pl_accessor).Finalize();
- if (!result.status.ok()) {
- return result.status;
- }
-
- // 2. Create another posting list accessor instance. Read all data and
- // free all posting lists.
- ICING_ASSIGN_OR_RETURN(
- pl_accessor,
- PostingListIntegerIndexAccessor::CreateFromExisting(
- flash_index_storage_.get(), posting_list_serializer_, result.id));
- ICING_ASSIGN_OR_RETURN(std::vector<IntegerIndexData> all_data,
- pl_accessor->GetAllDataAndFree());
-
- // 3. Append all remaining new data.
- all_data.reserve(all_data.size() + std::distance(it, it_end));
- for (; it != it_end; ++it) {
- all_data.push_back(IntegerIndexData(section_id, document_id, *it));
- }
-
- // 4. Run bucket splitting algorithm to decide new buckets and dispatch
- // data.
- std::vector<integer_index_bucket_util::DataRangeAndBucketInfo>
- new_bucket_infos = integer_index_bucket_util::Split(
- all_data, mutable_bucket.Get().key_lower(),
- mutable_bucket.Get().key_upper(),
- kNumDataThresholdForBucketSplit);
- if (new_bucket_infos.empty()) {
- ICING_LOG(WARNING)
- << "No buckets after splitting. This should not happen.";
- return absl_ports::InternalError("Split error");
- }
-
- // 5. Flush data.
- std::vector<Bucket> new_buckets;
- for (int i = 0; i < new_bucket_infos.size(); ++i) {
- ICING_ASSIGN_OR_RETURN(
- PostingListIdentifier pl_id,
- FlushDataIntoPostingLists(
- flash_index_storage_.get(), posting_list_serializer_,
- new_bucket_infos[i].start, new_bucket_infos[i].end));
- if (i == 0) {
- // Reuse mutable_bucket
- mutable_bucket.Get().set_key_lower(new_bucket_infos[i].key_lower);
- mutable_bucket.Get().set_key_upper(new_bucket_infos[i].key_upper);
- mutable_bucket.Get().set_posting_list_identifier(pl_id);
- } else {
- new_buckets.push_back(Bucket(new_bucket_infos[i].key_lower,
- new_bucket_infos[i].key_upper, pl_id));
- }
- }
-
- return new_buckets;
- }
-
ICING_RETURN_IF_ERROR(pl_accessor->PrependData(
IntegerIndexData(section_id, document_id, *it)));
}
@@ -1022,6 +1084,9 @@ IntegerIndexStorage::AddKeysIntoBucketAndSplitIfNecessary(
}
mutable_bucket.Get().set_posting_list_identifier(result.id);
+ // We've already verified num_new_data won't exceed the limit of the entire
+ // storage, so it is safe to add to the counter of the bucket.
+ mutable_bucket.Get().set_num_data(num_data_in_bucket + num_new_data);
return std::vector<Bucket>();
}
diff --git a/icing/index/numeric/integer-index-storage.h b/icing/index/numeric/integer-index-storage.h
index 9f2e58c..0c1afbb 100644
--- a/icing/index/numeric/integer-index-storage.h
+++ b/icing/index/numeric/integer-index-storage.h
@@ -75,7 +75,7 @@ namespace lib {
class IntegerIndexStorage : public PersistentStorage {
public:
struct Info {
- static constexpr int32_t kMagic = 0xc4bf0ccc;
+ static constexpr int32_t kMagic = 0x6470e547;
int32_t magic;
int32_t num_data;
@@ -99,10 +99,12 @@ class IntegerIndexStorage : public PersistentStorage {
explicit Bucket(int64_t key_lower, int64_t key_upper,
PostingListIdentifier posting_list_identifier =
- PostingListIdentifier::kInvalid)
+ PostingListIdentifier::kInvalid,
+ int32_t num_data = 0)
: key_lower_(key_lower),
key_upper_(key_upper),
- posting_list_identifier_(posting_list_identifier) {}
+ posting_list_identifier_(posting_list_identifier),
+ num_data_(num_data) {}
bool operator<(const Bucket& other) const {
return key_lower_ < other.key_lower_;
@@ -130,12 +132,16 @@ class IntegerIndexStorage : public PersistentStorage {
posting_list_identifier_ = posting_list_identifier;
}
+ int32_t num_data() const { return num_data_; }
+ void set_num_data(int32_t num_data) { num_data_ = num_data; }
+
private:
int64_t key_lower_;
int64_t key_upper_;
PostingListIdentifier posting_list_identifier_;
+ int32_t num_data_;
} __attribute__((packed));
- static_assert(sizeof(Bucket) == 20, "");
+ static_assert(sizeof(Bucket) == 24, "");
static_assert(sizeof(Bucket) == FileBackedVector<Bucket>::kElementTypeSize,
"Bucket type size is inconsistent with FileBackedVector "
"element type size");
@@ -146,15 +152,31 @@ class IntegerIndexStorage : public PersistentStorage {
"Max # of buckets cannot fit into FileBackedVector");
struct Options {
- explicit Options(bool pre_mapping_fbv_in)
- : pre_mapping_fbv(pre_mapping_fbv_in) {}
+ // - According to the benchmark result, the more # of buckets, the higher
+ // latency for range query. Therefore, this number cannot be too small to
+ // avoid splitting bucket too aggressively.
+ // - We use `num_data_threshold_for_bucket_split / 2 + 5` as the cutoff
+ // threshold after splitting. This number cannot be too small (e.g. 10)
+ // because in this case we will have similar # of data in a single bucket
+ // before and after splitting, which contradicts the purpose of splitting.
+ // - For convenience, let's set 64 as the minimum value.
+ static constexpr int32_t kMinNumDataThresholdForBucketSplit = 64;
+
+ explicit Options(int32_t num_data_threshold_for_bucket_split_in,
+ bool pre_mapping_fbv_in)
+ : num_data_threshold_for_bucket_split(
+ num_data_threshold_for_bucket_split_in),
+ pre_mapping_fbv(pre_mapping_fbv_in) {}
explicit Options(std::vector<Bucket> custom_init_sorted_buckets_in,
std::vector<Bucket> custom_init_unsorted_buckets_in,
+ int32_t num_data_threshold_for_bucket_split_in,
bool pre_mapping_fbv_in)
: custom_init_sorted_buckets(std::move(custom_init_sorted_buckets_in)),
custom_init_unsorted_buckets(
std::move(custom_init_unsorted_buckets_in)),
+ num_data_threshold_for_bucket_split(
+ num_data_threshold_for_bucket_split_in),
pre_mapping_fbv(pre_mapping_fbv_in) {}
bool IsValid() const;
@@ -172,6 +194,14 @@ class IntegerIndexStorage : public PersistentStorage {
std::vector<Bucket> custom_init_sorted_buckets;
std::vector<Bucket> custom_init_unsorted_buckets;
+ // Threshold for invoking bucket splitting. If # of data in a bucket exceeds
+ // this number after adding new data, then it will invoke bucket splitting
+ // logic.
+ //
+ // Note: num_data_threshold_for_bucket_split should be >=
+ // kMinNumDataThresholdForBucketSplit.
+ int32_t num_data_threshold_for_bucket_split;
+
// Flag indicating whether memory map max possible file size for underlying
// FileBackedVector before growing the actual file size.
bool pre_mapping_fbv;
@@ -188,28 +218,25 @@ class IntegerIndexStorage : public PersistentStorage {
WorkingPathType::kDirectory;
static constexpr std::string_view kFilePrefix = "integer_index_storage";
- // # of data threshold for bucket merging during optimization (TransferIndex).
- // If total # data of adjacent buckets exceed this value, then flush the
- // accumulated data. Otherwise merge buckets and their data.
- //
- // Calculated by: 0.7 * (kMaxPostingListSize / sizeof(IntegerIndexData)),
- // where kMaxPostingListSize = (kPageSize - sizeof(IndexBlock::BlockHeader)).
- static constexpr int32_t kNumDataThresholdForBucketMerge = 240;
-
- // # of data threshold for bucket splitting during indexing (AddKeys).
- // When the posting list of a bucket is full, we will try to split data into
- // multiple buckets according to their keys. In order to achieve good
- // (amortized) time complexity, we want # of data in new buckets to be at most
- // half # of elements in a full posting list.
+ // Default # of data threshold for bucket splitting during indexing (AddKeys).
+ // When # of data in a bucket reaches this number, we will try to split data
+ // into multiple buckets according to their keys.
+ static constexpr int32_t kDefaultNumDataThresholdForBucketSplit = 65536;
+
+ // # of data threshold for bucket merging during optimization (TransferIndex)
+ // = kNumDataThresholdRatioForBucketMerge *
+ // options.num_data_threshold_for_bucket_split
//
- // Calculated by: 0.5 * (kMaxPostingListSize / sizeof(IntegerIndexData)),
- // where kMaxPostingListSize = (kPageSize - sizeof(IndexBlock::BlockHeader)).
- static constexpr int32_t kNumDataThresholdForBucketSplit = 170;
+ // If total # data of adjacent buckets exceed this threshold, then flush the
+ // accumulated data. Otherwise merge buckets and their data.
+ static constexpr double kNumDataThresholdRatioForBucketMerge = 0.7;
// Length threshold to sort and merge unsorted buckets into sorted buckets. If
// the length of unsorted_buckets exceed the threshold, then call
// SortBuckets().
- static constexpr int32_t kUnsortedBucketsLengthThreshold = 50;
+ // TODO(b/259743562): decide if removing unsorted buckets given that we
+ // changed bucket splitting threshold and # of buckets are small now.
+ static constexpr int32_t kUnsortedBucketsLengthThreshold = 5;
// Creates a new IntegerIndexStorage instance to index integers (for a single
// property). If any of the underlying file is missing, then delete the whole
@@ -272,6 +299,8 @@ class IntegerIndexStorage : public PersistentStorage {
//
// Returns:
// - OK on success
+ // - RESOURCE_EXHAUSTED_ERROR if # of integers in this storage exceed
+ // INT_MAX after adding new_keys
// - Any FileBackedVector or PostingList errors
libtextclassifier3::Status AddKeys(DocumentId document_id,
SectionId section_id,
@@ -314,6 +343,8 @@ class IntegerIndexStorage : public PersistentStorage {
int32_t num_data() const { return info().num_data; }
private:
+ static constexpr int8_t kNumDataAfterSplitAdjustment = 5;
+
explicit IntegerIndexStorage(
const Filesystem& filesystem, std::string&& working_path,
Options&& options,
@@ -329,7 +360,9 @@ class IntegerIndexStorage : public PersistentStorage {
metadata_mmapped_file_(std::move(metadata_mmapped_file)),
sorted_buckets_(std::move(sorted_buckets)),
unsorted_buckets_(std::move(unsorted_buckets)),
- flash_index_storage_(std::move(flash_index_storage)) {}
+ flash_index_storage_(std::move(flash_index_storage)),
+ is_info_dirty_(false),
+ is_storage_dirty_(false) {}
static libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
InitializeNewFiles(
@@ -360,20 +393,20 @@ class IntegerIndexStorage : public PersistentStorage {
// Returns:
// - OK on success
// - INTERNAL_ERROR on I/O error
- libtextclassifier3::Status PersistStoragesToDisk() override;
+ libtextclassifier3::Status PersistStoragesToDisk(bool force) override;
// Flushes contents of metadata file.
//
// Returns:
// - OK on success
// - INTERNAL_ERROR on I/O error
- libtextclassifier3::Status PersistMetadataToDisk() override;
+ libtextclassifier3::Status PersistMetadataToDisk(bool force) override;
// Computes and returns Info checksum.
//
// Returns:
// - Crc of the Info on success
- libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum() override;
+ libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum(bool force) override;
// Computes and returns all storages checksum. Checksums of sorted_buckets_,
// unsorted_buckets_ will be combined together by XOR.
@@ -382,7 +415,8 @@ class IntegerIndexStorage : public PersistentStorage {
// Returns:
// - Crc of all storages on success
// - INTERNAL_ERROR if any data inconsistency
- libtextclassifier3::StatusOr<Crc32> ComputeStoragesChecksum() override;
+ libtextclassifier3::StatusOr<Crc32> ComputeStoragesChecksum(
+ bool force) override;
// Helper function to add keys in range [it_start, it_end) into the given
// bucket. It handles the bucket and its corresponding posting list(s) to make
@@ -442,6 +476,17 @@ class IntegerIndexStorage : public PersistentStorage {
kInfoMetadataFileOffset);
}
+ void SetInfoDirty() { is_info_dirty_ = true; }
+ // When storage is dirty, we have to set info dirty as well. So just expose
+ // SetDirty to set both.
+ void SetDirty() {
+ is_info_dirty_ = true;
+ is_storage_dirty_ = true;
+ }
+
+ bool is_info_dirty() const { return is_info_dirty_; }
+ bool is_storage_dirty() const { return is_storage_dirty_; }
+
Options options_;
PostingListIntegerIndexSerializer* posting_list_serializer_; // Does not own.
@@ -450,6 +495,9 @@ class IntegerIndexStorage : public PersistentStorage {
std::unique_ptr<FileBackedVector<Bucket>> sorted_buckets_;
std::unique_ptr<FileBackedVector<Bucket>> unsorted_buckets_;
std::unique_ptr<FlashIndexStorage> flash_index_storage_;
+
+ bool is_info_dirty_;
+ bool is_storage_dirty_;
};
} // namespace lib
diff --git a/icing/index/numeric/integer-index-storage_benchmark.cc b/icing/index/numeric/integer-index-storage_benchmark.cc
index bf5f134..85d381d 100644
--- a/icing/index/numeric/integer-index-storage_benchmark.cc
+++ b/icing/index/numeric/integer-index-storage_benchmark.cc
@@ -68,6 +68,8 @@ using ::testing::Eq;
using ::testing::IsEmpty;
using ::testing::SizeIs;
+static constexpr int32_t kNumDataThresholdForBucketSplit =
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit;
static constexpr bool kPreMappingFbv = true;
static constexpr SectionId kDefaultSectionId = 12;
@@ -150,11 +152,13 @@ void BM_Index(benchmark::State& state) {
state.PauseTiming();
benchmark.filesystem.DeleteDirectoryRecursively(
benchmark.working_path.c_str());
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<IntegerIndexStorage> storage,
- IntegerIndexStorage::Create(
- benchmark.filesystem, benchmark.working_path,
- IntegerIndexStorage::Options(kPreMappingFbv),
- &benchmark.posting_list_serializer));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ benchmark.filesystem, benchmark.working_path,
+ IntegerIndexStorage::Options(kNumDataThresholdForBucketSplit,
+ kPreMappingFbv),
+ &benchmark.posting_list_serializer));
state.ResumeTiming();
for (int i = 0; i < num_keys; ++i) {
@@ -210,11 +214,13 @@ void BM_BatchIndex(benchmark::State& state) {
state.PauseTiming();
benchmark.filesystem.DeleteDirectoryRecursively(
benchmark.working_path.c_str());
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<IntegerIndexStorage> storage,
- IntegerIndexStorage::Create(
- benchmark.filesystem, benchmark.working_path,
- IntegerIndexStorage::Options(kPreMappingFbv),
- &benchmark.posting_list_serializer));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ benchmark.filesystem, benchmark.working_path,
+ IntegerIndexStorage::Options(kNumDataThresholdForBucketSplit,
+ kPreMappingFbv),
+ &benchmark.posting_list_serializer));
std::vector<int64_t> keys_copy(keys);
state.ResumeTiming();
@@ -263,9 +269,11 @@ void BM_ExactQuery(benchmark::State& state) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndexStorage> storage,
- IntegerIndexStorage::Create(benchmark.filesystem, benchmark.working_path,
- IntegerIndexStorage::Options(kPreMappingFbv),
- &benchmark.posting_list_serializer));
+ IntegerIndexStorage::Create(
+ benchmark.filesystem, benchmark.working_path,
+ IntegerIndexStorage::Options(kNumDataThresholdForBucketSplit,
+ kPreMappingFbv),
+ &benchmark.posting_list_serializer));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<NumberGenerator<int64_t>> generator,
CreateIntegerGenerator(distribution_type, kDefaultSeed, num_keys));
@@ -340,9 +348,11 @@ void BM_RangeQueryAll(benchmark::State& state) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndexStorage> storage,
- IntegerIndexStorage::Create(benchmark.filesystem, benchmark.working_path,
- IntegerIndexStorage::Options(kPreMappingFbv),
- &benchmark.posting_list_serializer));
+ IntegerIndexStorage::Create(
+ benchmark.filesystem, benchmark.working_path,
+ IntegerIndexStorage::Options(kNumDataThresholdForBucketSplit,
+ kPreMappingFbv),
+ &benchmark.posting_list_serializer));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<NumberGenerator<int64_t>> generator,
CreateIntegerGenerator(distribution_type, kDefaultSeed, num_keys));
diff --git a/icing/index/numeric/integer-index-storage_test.cc b/icing/index/numeric/integer-index-storage_test.cc
index 4d4e665..8675172 100644
--- a/icing/index/numeric/integer-index-storage_test.cc
+++ b/icing/index/numeric/integer-index-storage_test.cc
@@ -30,8 +30,6 @@
#include "icing/file/file-backed-vector.h"
#include "icing/file/filesystem.h"
#include "icing/file/persistent-storage.h"
-#include "icing/file/posting_list/flash-index-storage.h"
-#include "icing/file/posting_list/index-block.h"
#include "icing/file/posting_list/posting-list-identifier.h"
#include "icing/index/hit/doc-hit-info.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
@@ -106,7 +104,32 @@ libtextclassifier3::StatusOr<std::vector<DocHitInfo>> Query(
}
TEST_P(IntegerIndexStorageTest, OptionsEmptyCustomInitBucketsShouldBeValid) {
- EXPECT_THAT(Options(/*pre_mapping_fbv_in=*/GetParam()).IsValid(), IsTrue());
+ EXPECT_THAT(
+ Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam())
+ .IsValid(),
+ IsTrue());
+}
+
+TEST_P(IntegerIndexStorageTest, OptionsInvalidNumDataThresholdForBucketSplit) {
+ EXPECT_THAT(Options(/*custom_init_sorted_buckets_in=*/{},
+ /*custom_init_unsorted_buckets_in=*/{},
+ /*num_data_threshold_for_bucket_split=*/-1,
+ /*pre_mapping_fbv_in=*/GetParam())
+ .IsValid(),
+ IsFalse());
+ EXPECT_THAT(Options(/*custom_init_sorted_buckets_in=*/{},
+ /*custom_init_unsorted_buckets_in=*/{},
+ /*num_data_threshold_for_bucket_split=*/0,
+ /*pre_mapping_fbv_in=*/GetParam())
+ .IsValid(),
+ IsFalse());
+ EXPECT_THAT(Options(/*custom_init_sorted_buckets_in=*/{},
+ /*custom_init_unsorted_buckets_in=*/{},
+ /*num_data_threshold_for_bucket_split=*/63,
+ /*pre_mapping_fbv_in=*/GetParam())
+ .IsValid(),
+ IsFalse());
}
TEST_P(IntegerIndexStorageTest, OptionsInvalidCustomInitBucketsRange) {
@@ -116,6 +139,7 @@ TEST_P(IntegerIndexStorageTest, OptionsInvalidCustomInitBucketsRange) {
{Bucket(std::numeric_limits<int64_t>::min(), 5), Bucket(9, 6)},
/*custom_init_unsorted_buckets_in=*/
{Bucket(10, std::numeric_limits<int64_t>::max())},
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
/*pre_mapping_fbv_in=*/GetParam())
.IsValid(),
IsFalse());
@@ -126,6 +150,7 @@ TEST_P(IntegerIndexStorageTest, OptionsInvalidCustomInitBucketsRange) {
{Bucket(10, std::numeric_limits<int64_t>::max())},
/*custom_init_unsorted_buckets_in=*/
{Bucket(std::numeric_limits<int64_t>::min(), 5), Bucket(9, 6)},
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
/*pre_mapping_fbv_in=*/GetParam())
.IsValid(),
IsFalse());
@@ -138,91 +163,109 @@ TEST_P(IntegerIndexStorageTest,
ASSERT_THAT(valid_posting_list_identifier.is_valid(), IsTrue());
// Invalid custom init sorted bucket
- EXPECT_THAT(Options(/*custom_init_sorted_buckets_in=*/
- {Bucket(std::numeric_limits<int64_t>::min(),
- std::numeric_limits<int64_t>::max(),
- valid_posting_list_identifier)},
- /*custom_init_unsorted_buckets_in=*/{},
- /*pre_mapping_fbv_in=*/GetParam())
- .IsValid(),
- IsFalse());
+ EXPECT_THAT(
+ Options(/*custom_init_sorted_buckets_in=*/
+ {Bucket(std::numeric_limits<int64_t>::min(),
+ std::numeric_limits<int64_t>::max(),
+ valid_posting_list_identifier)},
+ /*custom_init_unsorted_buckets_in=*/{},
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam())
+ .IsValid(),
+ IsFalse());
// Invalid custom init unsorted bucket
- EXPECT_THAT(Options(/*custom_init_sorted_buckets_in=*/{},
- /*custom_init_unsorted_buckets_in=*/
- {Bucket(std::numeric_limits<int64_t>::min(),
- std::numeric_limits<int64_t>::max(),
- valid_posting_list_identifier)},
- /*pre_mapping_fbv_in=*/GetParam())
- .IsValid(),
- IsFalse());
+ EXPECT_THAT(
+ Options(/*custom_init_sorted_buckets_in=*/{},
+ /*custom_init_unsorted_buckets_in=*/
+ {Bucket(std::numeric_limits<int64_t>::min(),
+ std::numeric_limits<int64_t>::max(),
+ valid_posting_list_identifier)},
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam())
+ .IsValid(),
+ IsFalse());
}
TEST_P(IntegerIndexStorageTest, OptionsInvalidCustomInitBucketsOverlapping) {
// sorted buckets overlap
- EXPECT_THAT(Options(/*custom_init_sorted_buckets_in=*/
- {Bucket(std::numeric_limits<int64_t>::min(), -100),
- Bucket(-100, std::numeric_limits<int64_t>::max())},
- /*custom_init_unsorted_buckets_in=*/{},
- /*pre_mapping_fbv_in=*/GetParam())
- .IsValid(),
- IsFalse());
+ EXPECT_THAT(
+ Options(/*custom_init_sorted_buckets_in=*/
+ {Bucket(std::numeric_limits<int64_t>::min(), -100),
+ Bucket(-100, std::numeric_limits<int64_t>::max())},
+ /*custom_init_unsorted_buckets_in=*/{},
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam())
+ .IsValid(),
+ IsFalse());
// unsorted buckets overlap
- EXPECT_THAT(Options(/*custom_init_sorted_buckets_in=*/{},
- /*custom_init_unsorted_buckets_in=*/
- {Bucket(-100, std::numeric_limits<int64_t>::max()),
- Bucket(std::numeric_limits<int64_t>::min(), -100)},
- /*pre_mapping_fbv_in=*/GetParam())
- .IsValid(),
- IsFalse());
+ EXPECT_THAT(
+ Options(/*custom_init_sorted_buckets_in=*/{},
+ /*custom_init_unsorted_buckets_in=*/
+ {Bucket(-100, std::numeric_limits<int64_t>::max()),
+ Bucket(std::numeric_limits<int64_t>::min(), -100)},
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam())
+ .IsValid(),
+ IsFalse());
// Cross buckets overlap
- EXPECT_THAT(Options(/*custom_init_sorted_buckets_in=*/
- {Bucket(std::numeric_limits<int64_t>::min(), -100),
- Bucket(-99, 0)},
- /*custom_init_unsorted_buckets_in=*/
- {Bucket(200, std::numeric_limits<int64_t>::max()),
- Bucket(0, 50), Bucket(51, 199)},
- /*pre_mapping_fbv_in=*/GetParam())
- .IsValid(),
- IsFalse());
+ EXPECT_THAT(
+ Options(/*custom_init_sorted_buckets_in=*/
+ {Bucket(std::numeric_limits<int64_t>::min(), -100),
+ Bucket(-99, 0)},
+ /*custom_init_unsorted_buckets_in=*/
+ {Bucket(200, std::numeric_limits<int64_t>::max()), Bucket(0, 50),
+ Bucket(51, 199)},
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam())
+ .IsValid(),
+ IsFalse());
}
TEST_P(IntegerIndexStorageTest, OptionsInvalidCustomInitBucketsUnion) {
// Missing INT64_MAX
- EXPECT_THAT(Options(/*custom_init_sorted_buckets_in=*/
- {Bucket(std::numeric_limits<int64_t>::min(), -100),
- Bucket(-99, 0)},
- /*custom_init_unsorted_buckets_in=*/
- {Bucket(1, 1000)}, /*pre_mapping_fbv_in=*/GetParam())
- .IsValid(),
- IsFalse());
+ EXPECT_THAT(
+ Options(/*custom_init_sorted_buckets_in=*/
+ {Bucket(std::numeric_limits<int64_t>::min(), -100),
+ Bucket(-99, 0)},
+ /*custom_init_unsorted_buckets_in=*/{Bucket(1, 1000)},
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam())
+ .IsValid(),
+ IsFalse());
// Missing INT64_MIN
- EXPECT_THAT(Options(/*custom_init_sorted_buckets_in=*/
- {Bucket(-200, -100), Bucket(-99, 0)},
- /*custom_init_unsorted_buckets_in=*/
- {Bucket(1, std::numeric_limits<int64_t>::max())},
- /*pre_mapping_fbv_in=*/GetParam())
- .IsValid(),
- IsFalse());
+ EXPECT_THAT(
+ Options(/*custom_init_sorted_buckets_in=*/
+ {Bucket(-200, -100), Bucket(-99, 0)},
+ /*custom_init_unsorted_buckets_in=*/
+ {Bucket(1, std::numeric_limits<int64_t>::max())},
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam())
+ .IsValid(),
+ IsFalse());
// Missing some intermediate ranges
- EXPECT_THAT(Options(/*custom_init_sorted_buckets_in=*/
- {Bucket(std::numeric_limits<int64_t>::min(), -100)},
- /*custom_init_unsorted_buckets_in=*/
- {Bucket(1, std::numeric_limits<int64_t>::max())},
- /*pre_mapping_fbv_in=*/GetParam())
- .IsValid(),
- IsFalse());
+ EXPECT_THAT(
+ Options(/*custom_init_sorted_buckets_in=*/
+ {Bucket(std::numeric_limits<int64_t>::min(), -100)},
+ /*custom_init_unsorted_buckets_in=*/
+ {Bucket(1, std::numeric_limits<int64_t>::max())},
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam())
+ .IsValid(),
+ IsFalse());
}
TEST_P(IntegerIndexStorageTest, InvalidWorkingPath) {
EXPECT_THAT(
IntegerIndexStorage::Create(
filesystem_, "/dev/null/integer_index_storage_test",
- Options(/*pre_mapping_fbv_in=*/GetParam()), serializer_.get()),
+ Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()),
StatusIs(libtextclassifier3::StatusCode::INTERNAL));
}
@@ -232,6 +275,7 @@ TEST_P(IntegerIndexStorageTest, CreateWithInvalidOptionsShouldFail) {
/*custom_init_unsorted_buckets_in=*/
{Bucket(-100, std::numeric_limits<int64_t>::max()),
Bucket(std::numeric_limits<int64_t>::min(), -100)},
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
/*pre_mapping_fbv_in=*/GetParam());
ASSERT_THAT(invalid_options.IsValid(), IsFalse());
@@ -246,9 +290,11 @@ TEST_P(IntegerIndexStorageTest, InitializeNewFiles) {
ASSERT_FALSE(filesystem_.DirectoryExists(working_path_.c_str()));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndexStorage> storage,
- IntegerIndexStorage::Create(filesystem_, working_path_,
- Options(/*pre_mapping_fbv_in=*/GetParam()),
- serializer_.get()));
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
ICING_ASSERT_OK(storage->PersistToDisk());
}
@@ -290,9 +336,11 @@ TEST_P(IntegerIndexStorageTest,
// Create new integer index storage
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndexStorage> storage,
- IntegerIndexStorage::Create(filesystem_, working_path_,
- Options(/*pre_mapping_fbv_in=*/GetParam()),
- serializer_.get()));
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
// Insert some data.
ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/0, /*section_id=*/20,
@@ -305,9 +353,11 @@ TEST_P(IntegerIndexStorageTest,
// Without calling PersistToDisk, checksums will not be recomputed or synced
// to disk, so initializing another instance on the same files should fail.
EXPECT_THAT(
- IntegerIndexStorage::Create(filesystem_, working_path_,
- Options(/*pre_mapping_fbv_in=*/GetParam()),
- serializer_.get()),
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()),
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
}
@@ -315,9 +365,11 @@ TEST_P(IntegerIndexStorageTest, InitializationShouldSucceedWithPersistToDisk) {
// Create new integer index storage
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndexStorage> storage1,
- IntegerIndexStorage::Create(filesystem_, working_path_,
- Options(/*pre_mapping_fbv_in=*/GetParam()),
- serializer_.get()));
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
// Insert some data.
ICING_ASSERT_OK(storage1->AddKeys(/*document_id=*/0, /*section_id=*/20,
@@ -339,9 +391,11 @@ TEST_P(IntegerIndexStorageTest, InitializationShouldSucceedWithPersistToDisk) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndexStorage> storage2,
- IntegerIndexStorage::Create(filesystem_, working_path_,
- Options(/*pre_mapping_fbv_in=*/GetParam()),
- serializer_.get()));
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
EXPECT_THAT(
Query(storage2.get(), /*key_lower=*/std::numeric_limits<int64_t>::min(),
/*key_upper=*/std::numeric_limits<int64_t>::max()),
@@ -355,9 +409,11 @@ TEST_P(IntegerIndexStorageTest, InitializationShouldSucceedAfterDestruction) {
// Create new integer index storage
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndexStorage> storage,
- IntegerIndexStorage::Create(filesystem_, working_path_,
- Options(/*pre_mapping_fbv_in=*/GetParam()),
- serializer_.get()));
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
// Insert some data.
ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/0, /*section_id=*/20,
@@ -380,9 +436,11 @@ TEST_P(IntegerIndexStorageTest, InitializationShouldSucceedAfterDestruction) {
// we should be able to get the same contents.
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndexStorage> storage,
- IntegerIndexStorage::Create(filesystem_, working_path_,
- Options(/*pre_mapping_fbv_in=*/GetParam()),
- serializer_.get()));
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
EXPECT_THAT(
Query(storage.get(), /*key_lower=*/std::numeric_limits<int64_t>::min(),
/*key_upper=*/std::numeric_limits<int64_t>::max()),
@@ -397,9 +455,11 @@ TEST_P(IntegerIndexStorageTest,
// Create new integer index storage
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndexStorage> storage,
- IntegerIndexStorage::Create(filesystem_, working_path_,
- Options(/*pre_mapping_fbv_in=*/GetParam()),
- serializer_.get()));
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
ICING_ASSERT_OK(storage->AddKeys(kDefaultDocumentId, kDefaultSectionId,
/*new_keys=*/{0, 100, -100}));
@@ -428,7 +488,9 @@ TEST_P(IntegerIndexStorageTest,
libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
storage_or = IntegerIndexStorage::Create(
filesystem_, working_path_,
- Options(/*pre_mapping_fbv_in=*/GetParam()), serializer_.get());
+ Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get());
EXPECT_THAT(storage_or,
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
EXPECT_THAT(storage_or.status().error_message(),
@@ -442,9 +504,11 @@ TEST_P(IntegerIndexStorageTest,
// Create new integer index storage
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndexStorage> storage,
- IntegerIndexStorage::Create(filesystem_, working_path_,
- Options(/*pre_mapping_fbv_in=*/GetParam()),
- serializer_.get()));
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
ICING_ASSERT_OK(storage->AddKeys(kDefaultDocumentId, kDefaultSectionId,
/*new_keys=*/{0, 100, -100}));
@@ -474,7 +538,9 @@ TEST_P(IntegerIndexStorageTest,
libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
storage_or = IntegerIndexStorage::Create(
filesystem_, working_path_,
- Options(/*pre_mapping_fbv_in=*/GetParam()), serializer_.get());
+ Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get());
EXPECT_THAT(storage_or,
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
EXPECT_THAT(storage_or.status().error_message(),
@@ -488,9 +554,11 @@ TEST_P(IntegerIndexStorageTest,
// Create new integer index storage
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndexStorage> storage,
- IntegerIndexStorage::Create(filesystem_, working_path_,
- Options(/*pre_mapping_fbv_in=*/GetParam()),
- serializer_.get()));
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
ICING_ASSERT_OK(storage->AddKeys(kDefaultDocumentId, kDefaultSectionId,
/*new_keys=*/{0, 100, -100}));
@@ -522,7 +590,9 @@ TEST_P(IntegerIndexStorageTest,
libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
storage_or = IntegerIndexStorage::Create(
filesystem_, working_path_,
- Options(/*pre_mapping_fbv_in=*/GetParam()), serializer_.get());
+ Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get());
EXPECT_THAT(storage_or,
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
EXPECT_THAT(storage_or.status().error_message(),
@@ -536,9 +606,11 @@ TEST_P(IntegerIndexStorageTest,
// Create new integer index storage
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndexStorage> storage,
- IntegerIndexStorage::Create(filesystem_, working_path_,
- Options(/*pre_mapping_fbv_in=*/GetParam()),
- serializer_.get()));
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
ICING_ASSERT_OK(storage->AddKeys(kDefaultDocumentId, kDefaultSectionId,
/*new_keys=*/{0, 100, -100}));
@@ -572,7 +644,9 @@ TEST_P(IntegerIndexStorageTest,
libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
storage_or = IntegerIndexStorage::Create(
filesystem_, working_path_,
- Options(/*pre_mapping_fbv_in=*/GetParam()), serializer_.get());
+ Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get());
EXPECT_THAT(storage_or,
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
EXPECT_THAT(storage_or.status().error_message(),
@@ -586,14 +660,119 @@ TEST_P(IntegerIndexStorageTest, InvalidQuery) {
// Create new integer index storage
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndexStorage> storage,
- IntegerIndexStorage::Create(filesystem_, working_path_,
- Options(/*pre_mapping_fbv_in=*/GetParam()),
- serializer_.get()));
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
EXPECT_THAT(
storage->GetIterator(/*query_key_lower=*/0, /*query_key_upper=*/-1),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
+TEST_P(IntegerIndexStorageTest, AddKeysShouldUpdateNumData) {
+ // We use predefined custom buckets to initialize new integer index storage
+ // and create some test keys accordingly.
+ std::vector<Bucket> custom_init_sorted_buckets = {
+ Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300),
+ Bucket(301, 999)};
+ std::vector<Bucket> custom_init_unsorted_buckets = {
+ Bucket(1000, std::numeric_limits<int64_t>::max()), Bucket(-99, -1),
+ Bucket(101, 149), Bucket(std::numeric_limits<int64_t>::min(), -1001)};
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(std::move(custom_init_sorted_buckets),
+ std::move(custom_init_unsorted_buckets),
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
+
+ // Add some keys into buckets [(-1000,-100), (200,300), (-99,-1)].
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/0, kDefaultSectionId,
+ /*new_keys=*/{-51, -500}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/1, kDefaultSectionId,
+ /*new_keys=*/{201, 209, -149}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/2, kDefaultSectionId,
+ /*new_keys=*/{208}),
+ IsOk());
+ EXPECT_THAT(storage->num_data(), Eq(6));
+
+ ICING_ASSERT_OK(storage->PersistToDisk());
+ }
+
+ // Check sorted_buckets manually.
+ const std::string sorted_buckets_file_path = absl_ports::StrCat(
+ working_path_, "/", IntegerIndexStorage::kFilePrefix, ".s");
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<Bucket>> sorted_buckets,
+ FileBackedVector<Bucket>::Create(
+ filesystem_, sorted_buckets_file_path,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+ EXPECT_THAT(sorted_buckets->num_elements(), Eq(5));
+
+ ICING_ASSERT_OK_AND_ASSIGN(const Bucket* sbk1,
+ sorted_buckets->Get(/*idx=*/0));
+ EXPECT_THAT(sbk1->key_lower(), Eq(-1000));
+ EXPECT_THAT(sbk1->key_upper(), Eq(-100));
+ EXPECT_THAT(sbk1->num_data(), Eq(2));
+ ICING_ASSERT_OK_AND_ASSIGN(const Bucket* sbk2,
+ sorted_buckets->Get(/*idx=*/1));
+ EXPECT_THAT(sbk2->key_lower(), Eq(0));
+ EXPECT_THAT(sbk2->key_upper(), Eq(100));
+ EXPECT_THAT(sbk2->num_data(), Eq(0));
+ ICING_ASSERT_OK_AND_ASSIGN(const Bucket* sbk3,
+ sorted_buckets->Get(/*idx=*/2));
+ EXPECT_THAT(sbk3->key_lower(), Eq(150));
+ EXPECT_THAT(sbk3->key_upper(), Eq(199));
+ EXPECT_THAT(sbk3->num_data(), Eq(0));
+ ICING_ASSERT_OK_AND_ASSIGN(const Bucket* sbk4,
+ sorted_buckets->Get(/*idx=*/3));
+ EXPECT_THAT(sbk4->key_lower(), Eq(200));
+ EXPECT_THAT(sbk4->key_upper(), Eq(300));
+ EXPECT_THAT(sbk4->num_data(), Eq(3));
+ ICING_ASSERT_OK_AND_ASSIGN(const Bucket* sbk5,
+ sorted_buckets->Get(/*idx=*/4));
+ EXPECT_THAT(sbk5->key_lower(), Eq(301));
+ EXPECT_THAT(sbk5->key_upper(), Eq(999));
+ EXPECT_THAT(sbk5->num_data(), Eq(0));
+
+ // Check unsorted_buckets and unsorted buckets manually.
+ const std::string unsorted_buckets_file_path = absl_ports::StrCat(
+ working_path_, "/", IntegerIndexStorage::kFilePrefix, ".u");
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<Bucket>> unsorted_buckets,
+ FileBackedVector<Bucket>::Create(
+ filesystem_, unsorted_buckets_file_path,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+ EXPECT_THAT(unsorted_buckets->num_elements(), Eq(4));
+
+ ICING_ASSERT_OK_AND_ASSIGN(const Bucket* ubk1,
+ unsorted_buckets->Get(/*idx=*/0));
+ EXPECT_THAT(ubk1->key_lower(), Eq(1000));
+ EXPECT_THAT(ubk1->key_upper(), Eq(std::numeric_limits<int64_t>::max()));
+ EXPECT_THAT(ubk1->num_data(), Eq(0));
+ ICING_ASSERT_OK_AND_ASSIGN(const Bucket* ubk2,
+ unsorted_buckets->Get(/*idx=*/1));
+ EXPECT_THAT(ubk2->key_lower(), Eq(-99));
+ EXPECT_THAT(ubk2->key_upper(), Eq(-1));
+ EXPECT_THAT(ubk2->num_data(), Eq(1));
+ ICING_ASSERT_OK_AND_ASSIGN(const Bucket* ubk3,
+ unsorted_buckets->Get(/*idx=*/2));
+ EXPECT_THAT(ubk3->key_lower(), Eq(101));
+ EXPECT_THAT(ubk3->key_upper(), Eq(149));
+ EXPECT_THAT(ubk3->num_data(), Eq(0));
+ ICING_ASSERT_OK_AND_ASSIGN(const Bucket* ubk4,
+ unsorted_buckets->Get(/*idx=*/3));
+ EXPECT_THAT(ubk4->key_lower(), Eq(std::numeric_limits<int64_t>::min()));
+ EXPECT_THAT(ubk4->key_upper(), Eq(-1001));
+ EXPECT_THAT(ubk4->num_data(), Eq(0));
+}
+
TEST_P(IntegerIndexStorageTest, ExactQuerySortedBuckets) {
// We use predefined custom buckets to initialize new integer index storage
// and create some test keys accordingly.
@@ -609,6 +788,7 @@ TEST_P(IntegerIndexStorageTest, ExactQuerySortedBuckets) {
filesystem_, working_path_,
Options(std::move(custom_init_sorted_buckets),
std::move(custom_init_unsorted_buckets),
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
/*pre_mapping_fbv_in=*/GetParam()),
serializer_.get()));
@@ -664,6 +844,7 @@ TEST_P(IntegerIndexStorageTest, ExactQueryUnsortedBuckets) {
filesystem_, working_path_,
Options(std::move(custom_init_sorted_buckets),
std::move(custom_init_unsorted_buckets),
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
/*pre_mapping_fbv_in=*/GetParam()),
serializer_.get()));
@@ -725,6 +906,7 @@ TEST_P(IntegerIndexStorageTest, ExactQueryIdenticalKeys) {
filesystem_, working_path_,
Options(std::move(custom_init_sorted_buckets),
std::move(custom_init_unsorted_buckets),
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
/*pre_mapping_fbv_in=*/GetParam()),
serializer_.get()));
@@ -768,6 +950,7 @@ TEST_P(IntegerIndexStorageTest, RangeQueryEmptyIntegerIndexStorage) {
filesystem_, working_path_,
Options(std::move(custom_init_sorted_buckets),
std::move(custom_init_unsorted_buckets),
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
/*pre_mapping_fbv_in=*/GetParam()),
serializer_.get()));
@@ -792,6 +975,7 @@ TEST_P(IntegerIndexStorageTest, RangeQuerySingleEntireSortedBucket) {
filesystem_, working_path_,
Options(std::move(custom_init_sorted_buckets),
std::move(custom_init_unsorted_buckets),
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
/*pre_mapping_fbv_in=*/GetParam()),
serializer_.get()));
@@ -847,6 +1031,7 @@ TEST_P(IntegerIndexStorageTest, RangeQuerySingleEntireUnsortedBucket) {
filesystem_, working_path_,
Options(std::move(custom_init_sorted_buckets),
std::move(custom_init_unsorted_buckets),
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
/*pre_mapping_fbv_in=*/GetParam()),
serializer_.get()));
@@ -905,6 +1090,7 @@ TEST_P(IntegerIndexStorageTest, RangeQuerySinglePartialSortedBucket) {
filesystem_, working_path_,
Options(std::move(custom_init_sorted_buckets),
std::move(custom_init_unsorted_buckets),
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
/*pre_mapping_fbv_in=*/GetParam()),
serializer_.get()));
@@ -957,6 +1143,7 @@ TEST_P(IntegerIndexStorageTest, RangeQuerySinglePartialUnsortedBucket) {
filesystem_, working_path_,
Options(std::move(custom_init_sorted_buckets),
std::move(custom_init_unsorted_buckets),
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
/*pre_mapping_fbv_in=*/GetParam()),
serializer_.get()));
@@ -1009,6 +1196,7 @@ TEST_P(IntegerIndexStorageTest, RangeQueryMultipleBuckets) {
filesystem_, working_path_,
Options(std::move(custom_init_sorted_buckets),
std::move(custom_init_unsorted_buckets),
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
/*pre_mapping_fbv_in=*/GetParam()),
serializer_.get()));
@@ -1098,6 +1286,7 @@ TEST_P(IntegerIndexStorageTest, BatchAdd) {
filesystem_, working_path_,
Options(std::move(custom_init_sorted_buckets),
std::move(custom_init_unsorted_buckets),
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
/*pre_mapping_fbv_in=*/GetParam()),
serializer_.get()));
@@ -1126,9 +1315,11 @@ TEST_P(IntegerIndexStorageTest, BatchAdd) {
TEST_P(IntegerIndexStorageTest, BatchAddShouldDedupeKeys) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndexStorage> storage,
- IntegerIndexStorage::Create(filesystem_, working_path_,
- Options(/*pre_mapping_fbv_in=*/GetParam()),
- serializer_.get()));
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
std::vector<int64_t> keys = {2, 3, 1, 2, 4, -1, -1, 100, 3};
EXPECT_THAT(
@@ -1152,6 +1343,7 @@ TEST_P(IntegerIndexStorageTest, MultipleKeysShouldMergeAndDedupeDocHitInfo) {
filesystem_, working_path_,
Options(std::move(custom_init_sorted_buckets),
std::move(custom_init_unsorted_buckets),
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
/*pre_mapping_fbv_in=*/GetParam()),
serializer_.get()));
@@ -1188,6 +1380,7 @@ TEST_P(IntegerIndexStorageTest,
filesystem_, working_path_,
Options(std::move(custom_init_sorted_buckets),
std::move(custom_init_unsorted_buckets),
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
/*pre_mapping_fbv_in=*/GetParam()),
serializer_.get()));
@@ -1236,25 +1429,26 @@ TEST_P(IntegerIndexStorageTest,
}
TEST_P(IntegerIndexStorageTest, SplitBuckets) {
+ int32_t custom_num_data_threshold_for_bucket_split = 300;
+
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndexStorage> storage,
- IntegerIndexStorage::Create(filesystem_, working_path_,
- Options(/*pre_mapping_fbv_in=*/GetParam()),
- serializer_.get()));
-
- uint32_t block_size = FlashIndexStorage::SelectBlockSize();
- uint32_t max_posting_list_bytes = IndexBlock::CalculateMaxPostingListBytes(
- block_size, serializer_->GetDataTypeBytes());
- uint32_t max_num_data_before_split =
- max_posting_list_bytes / serializer_->GetDataTypeBytes();
-
- // Add max_num_data_before_split + 1 keys to invoke bucket splitting.
- // Keys: max_num_data_before_split to 0
- // Document ids: 0 to max_num_data_before_split
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(/*custom_init_sorted_buckets_in=*/{},
+ /*custom_init_unsorted_buckets_in=*/{},
+ custom_num_data_threshold_for_bucket_split,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
+
+ // Add custom_num_data_threshold_for_bucket_split + 1 keys to invoke bucket
+ // splitting.
+ // - Keys: custom_num_data_threshold_for_bucket_split to 0 Document
+ // - ids: 0 to custom_num_data_threshold_for_bucket_split
std::unordered_map<int64_t, DocumentId> data;
- int64_t key = max_num_data_before_split;
+ int64_t key = custom_num_data_threshold_for_bucket_split;
DocumentId document_id = 0;
- for (int i = 0; i < max_num_data_before_split + 1; ++i) {
+ for (int i = 0; i < custom_num_data_threshold_for_bucket_split + 1; ++i) {
data[key] = document_id;
ICING_ASSERT_OK(
storage->AddKeys(document_id, kDefaultSectionId, /*new_keys=*/{key}));
@@ -1299,7 +1493,8 @@ TEST_P(IntegerIndexStorageTest, SplitBuckets) {
// Ensure that search works normally.
std::vector<SectionId> expected_sections = {kDefaultSectionId};
- for (int64_t key = max_num_data_before_split; key >= 0; key--) {
+ for (int64_t key = custom_num_data_threshold_for_bucket_split; key >= 0;
+ key--) {
ASSERT_THAT(data, Contains(Key(key)));
DocumentId expected_document_id = data[key];
EXPECT_THAT(Query(storage.get(), /*key_lower=*/key, /*key_upper=*/key),
@@ -1309,20 +1504,21 @@ TEST_P(IntegerIndexStorageTest, SplitBuckets) {
}
TEST_P(IntegerIndexStorageTest, SplitBucketsTriggerSortBuckets) {
+ int32_t custom_num_data_threshold_for_bucket_split = 300;
+
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndexStorage> storage,
- IntegerIndexStorage::Create(filesystem_, working_path_,
- Options(/*pre_mapping_fbv_in=*/GetParam()),
- serializer_.get()));
-
- uint32_t block_size = FlashIndexStorage::SelectBlockSize();
- uint32_t max_posting_list_bytes = IndexBlock::CalculateMaxPostingListBytes(
- block_size, serializer_->GetDataTypeBytes());
- uint32_t max_num_data_before_split =
- max_posting_list_bytes / serializer_->GetDataTypeBytes();
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(/*custom_init_sorted_buckets_in=*/{},
+ /*custom_init_unsorted_buckets_in=*/{},
+ custom_num_data_threshold_for_bucket_split,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
// Add IntegerIndexStorage::kUnsortedBucketsLengthThreshold keys. For each
- // key, add max_num_data_before_split + 1 data. Then we will get:
+ // key, add custom_num_data_threshold_for_bucket_split + 1 data. Then we will
+ // get:
// - Bucket splitting will create kUnsortedBucketsLengthThreshold + 1 unsorted
// buckets [[50, 50], [49, 49], ..., [1, 1], [51, INT64_MAX]].
// - Since there are kUnsortedBucketsLengthThreshold + 1 unsorted buckets, we
@@ -1332,7 +1528,7 @@ TEST_P(IntegerIndexStorageTest, SplitBucketsTriggerSortBuckets) {
DocumentId document_id = 0;
for (int i = 0; i < IntegerIndexStorage::kUnsortedBucketsLengthThreshold;
++i) {
- for (int j = 0; j < max_num_data_before_split + 1; ++j) {
+ for (int j = 0; j < custom_num_data_threshold_for_bucket_split + 1; ++j) {
data[key].push_back(document_id);
ICING_ASSERT_OK(
storage->AddKeys(document_id, kDefaultSectionId, /*new_keys=*/{key}));
@@ -1396,6 +1592,7 @@ TEST_P(IntegerIndexStorageTest, TransferIndex) {
filesystem_, working_path_,
Options(std::move(custom_init_sorted_buckets),
std::move(custom_init_unsorted_buckets),
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
/*pre_mapping_fbv_in=*/GetParam()),
serializer_.get()));
@@ -1433,9 +1630,11 @@ TEST_P(IntegerIndexStorageTest, TransferIndex) {
{
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndexStorage> new_storage,
- IntegerIndexStorage::Create(filesystem_, working_path_ + "_temp",
- Options(/*pre_mapping_fbv_in=*/GetParam()),
- serializer_.get()));
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_ + "_temp",
+ Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
EXPECT_THAT(
storage->TransferIndex(document_id_old_to_new, new_storage.get()),
IsOk());
@@ -1445,9 +1644,11 @@ TEST_P(IntegerIndexStorageTest, TransferIndex) {
// Verify after transferring and reinitializing the instance.
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndexStorage> new_storage,
- IntegerIndexStorage::Create(filesystem_, working_path_ + "_temp",
- Options(/*pre_mapping_fbv_in=*/GetParam()),
- serializer_.get()));
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_ + "_temp",
+ Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
std::vector<SectionId> expected_sections = {kDefaultSectionId};
EXPECT_THAT(new_storage->num_data(), Eq(7));
@@ -1493,9 +1694,11 @@ TEST_P(IntegerIndexStorageTest, TransferIndex) {
TEST_P(IntegerIndexStorageTest, TransferIndexOutOfRangeDocumentId) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndexStorage> storage,
- IntegerIndexStorage::Create(filesystem_, working_path_,
- Options(/*pre_mapping_fbv_in=*/GetParam()),
- serializer_.get()));
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_,
+ Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/1, kDefaultSectionId,
/*new_keys=*/{120}));
@@ -1510,9 +1713,11 @@ TEST_P(IntegerIndexStorageTest, TransferIndexOutOfRangeDocumentId) {
// Transfer to new storage.
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndexStorage> new_storage,
- IntegerIndexStorage::Create(filesystem_, working_path_ + "_temp",
- Options(/*pre_mapping_fbv_in=*/GetParam()),
- serializer_.get()));
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_ + "_temp",
+ Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
EXPECT_THAT(storage->TransferIndex(document_id_old_to_new, new_storage.get()),
IsOk());
@@ -1542,6 +1747,7 @@ TEST_P(IntegerIndexStorageTest, TransferEmptyIndex) {
filesystem_, working_path_,
Options(std::move(custom_init_sorted_buckets),
std::move(custom_init_unsorted_buckets),
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
/*pre_mapping_fbv_in=*/GetParam()),
serializer_.get()));
ASSERT_THAT(storage->num_data(), Eq(0));
@@ -1552,9 +1758,11 @@ TEST_P(IntegerIndexStorageTest, TransferEmptyIndex) {
// Transfer to new storage.
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndexStorage> new_storage,
- IntegerIndexStorage::Create(filesystem_, working_path_ + "_temp",
- Options(/*pre_mapping_fbv_in=*/GetParam()),
- serializer_.get()));
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_ + "_temp",
+ Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
EXPECT_THAT(storage->TransferIndex(document_id_old_to_new, new_storage.get()),
IsOk());
@@ -1581,6 +1789,7 @@ TEST_P(IntegerIndexStorageTest, TransferIndexDeleteAll) {
filesystem_, working_path_,
Options(std::move(custom_init_sorted_buckets),
std::move(custom_init_unsorted_buckets),
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
/*pre_mapping_fbv_in=*/GetParam()),
serializer_.get()));
@@ -1605,9 +1814,11 @@ TEST_P(IntegerIndexStorageTest, TransferIndexDeleteAll) {
{
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndexStorage> new_storage,
- IntegerIndexStorage::Create(filesystem_, working_path_ + "_temp",
- Options(/*pre_mapping_fbv_in=*/GetParam()),
- serializer_.get()));
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_ + "_temp",
+ Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
EXPECT_THAT(
storage->TransferIndex(document_id_old_to_new, new_storage.get()),
IsOk());
@@ -1617,9 +1828,11 @@ TEST_P(IntegerIndexStorageTest, TransferIndexDeleteAll) {
// Verify after transferring and reinitializing the instance.
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndexStorage> new_storage,
- IntegerIndexStorage::Create(filesystem_, working_path_ + "_temp",
- Options(/*pre_mapping_fbv_in=*/GetParam()),
- serializer_.get()));
+ IntegerIndexStorage::Create(
+ filesystem_, working_path_ + "_temp",
+ Options(IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
std::vector<SectionId> expected_sections = {kDefaultSectionId};
EXPECT_THAT(new_storage->num_data(), Eq(0));
@@ -1630,6 +1843,11 @@ TEST_P(IntegerIndexStorageTest, TransferIndexDeleteAll) {
}
TEST_P(IntegerIndexStorageTest, TransferIndexShouldInvokeMergeBuckets) {
+ int32_t custom_num_data_threshold_for_bucket_split = 300;
+ int32_t custom_num_data_threshold_for_bucket_merge =
+ IntegerIndexStorage::kNumDataThresholdRatioForBucketMerge *
+ custom_num_data_threshold_for_bucket_split;
+
// This test verifies that if TransferIndex invokes bucket merging logic to
// ensure sure we're able to avoid having mostly empty buckets after inserting
// and deleting data for many rounds.
@@ -1648,6 +1866,7 @@ TEST_P(IntegerIndexStorageTest, TransferIndexShouldInvokeMergeBuckets) {
filesystem_, working_path_,
Options(std::move(custom_init_sorted_buckets),
std::move(custom_init_unsorted_buckets),
+ custom_num_data_threshold_for_bucket_split,
/*pre_mapping_fbv_in=*/GetParam()),
serializer_.get()));
@@ -1671,7 +1890,7 @@ TEST_P(IntegerIndexStorageTest, TransferIndexShouldInvokeMergeBuckets) {
/*new_keys=*/{20}));
ASSERT_THAT(storage->num_data(), Eq(9));
ASSERT_THAT(storage->num_data(),
- Le(IntegerIndexStorage::kNumDataThresholdForBucketMerge));
+ Le(custom_num_data_threshold_for_bucket_merge));
// Create document_id_old_to_new that keeps all existing documents.
std::vector<DocumentId> document_id_old_to_new(9);
@@ -1683,12 +1902,17 @@ TEST_P(IntegerIndexStorageTest, TransferIndexShouldInvokeMergeBuckets) {
{
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndexStorage> new_storage,
- IntegerIndexStorage::Create(filesystem_, new_storage_working_path,
- Options(/*pre_mapping_fbv_in=*/GetParam()),
- serializer_.get()));
+ IntegerIndexStorage::Create(
+ filesystem_, new_storage_working_path,
+ Options(/*custom_init_sorted_buckets_in=*/{},
+ /*custom_init_unsorted_buckets_in=*/{},
+ custom_num_data_threshold_for_bucket_split,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
EXPECT_THAT(
storage->TransferIndex(document_id_old_to_new, new_storage.get()),
IsOk());
+ EXPECT_THAT(new_storage->num_data(), Eq(9));
}
// Check new_storage->sorted_bucket_ manually.
@@ -1704,9 +1928,15 @@ TEST_P(IntegerIndexStorageTest, TransferIndexShouldInvokeMergeBuckets) {
ICING_ASSERT_OK_AND_ASSIGN(const Bucket* bk1, sorted_buckets->Get(/*idx=*/0));
EXPECT_THAT(bk1->key_lower(), Eq(std::numeric_limits<int64_t>::min()));
EXPECT_THAT(bk1->key_upper(), Eq(std::numeric_limits<int64_t>::max()));
+ EXPECT_THAT(bk1->num_data(), Eq(9));
}
TEST_P(IntegerIndexStorageTest, TransferIndexExceedsMergeThreshold) {
+ int32_t custom_num_data_threshold_for_bucket_split = 300;
+ int32_t custom_num_data_threshold_for_bucket_merge =
+ IntegerIndexStorage::kNumDataThresholdRatioForBucketMerge *
+ custom_num_data_threshold_for_bucket_split;
+
// This test verifies that if TransferIndex invokes bucket merging logic and
// doesn't merge buckets too aggressively to ensure we won't get a bucket with
// too many data.
@@ -1725,15 +1955,16 @@ TEST_P(IntegerIndexStorageTest, TransferIndexExceedsMergeThreshold) {
filesystem_, working_path_,
Options(std::move(custom_init_sorted_buckets),
std::move(custom_init_unsorted_buckets),
+ custom_num_data_threshold_for_bucket_split,
/*pre_mapping_fbv_in=*/GetParam()),
serializer_.get()));
// Insert data into 2 buckets so that total # of these 2 buckets exceed
- // kNumDataThresholdForBucketMerge.
+ // custom_num_data_threshold_for_bucket_merge.
// - Bucket 1: [-1000, -100]
// - Bucket 2: [101, 149]
DocumentId document_id = 0;
- int num_data_for_bucket1 = 200;
+ int num_data_for_bucket1 = custom_num_data_threshold_for_bucket_merge - 50;
for (int i = 0; i < num_data_for_bucket1; ++i) {
ICING_ASSERT_OK(storage->AddKeys(document_id, kDefaultSectionId,
/*new_keys=*/{-200}));
@@ -1747,8 +1978,10 @@ TEST_P(IntegerIndexStorageTest, TransferIndexExceedsMergeThreshold) {
++document_id;
}
+ ASSERT_THAT(storage->num_data(),
+ Eq(num_data_for_bucket1 + num_data_for_bucket2));
ASSERT_THAT(num_data_for_bucket1 + num_data_for_bucket2,
- Gt(IntegerIndexStorage::kNumDataThresholdForBucketMerge));
+ Gt(custom_num_data_threshold_for_bucket_merge));
// Create document_id_old_to_new that keeps all existing documents.
std::vector<DocumentId> document_id_old_to_new(document_id);
@@ -1760,12 +1993,18 @@ TEST_P(IntegerIndexStorageTest, TransferIndexExceedsMergeThreshold) {
{
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndexStorage> new_storage,
- IntegerIndexStorage::Create(filesystem_, new_storage_working_path,
- Options(/*pre_mapping_fbv_in=*/GetParam()),
- serializer_.get()));
+ IntegerIndexStorage::Create(
+ filesystem_, new_storage_working_path,
+ Options(/*custom_init_sorted_buckets_in=*/{},
+ /*custom_init_unsorted_buckets_in=*/{},
+ custom_num_data_threshold_for_bucket_split,
+ /*pre_mapping_fbv_in=*/GetParam()),
+ serializer_.get()));
EXPECT_THAT(
storage->TransferIndex(document_id_old_to_new, new_storage.get()),
IsOk());
+ EXPECT_THAT(new_storage->num_data(),
+ Eq(num_data_for_bucket1 + num_data_for_bucket2));
}
// Check new_storage->sorted_bucket_ manually.
@@ -1781,9 +2020,11 @@ TEST_P(IntegerIndexStorageTest, TransferIndexExceedsMergeThreshold) {
ICING_ASSERT_OK_AND_ASSIGN(const Bucket* bk1, sorted_buckets->Get(/*idx=*/0));
EXPECT_THAT(bk1->key_lower(), Eq(std::numeric_limits<int64_t>::min()));
EXPECT_THAT(bk1->key_upper(), Eq(100));
+ EXPECT_THAT(bk1->num_data(), Eq(num_data_for_bucket1));
ICING_ASSERT_OK_AND_ASSIGN(const Bucket* bk2, sorted_buckets->Get(/*idx=*/1));
EXPECT_THAT(bk2->key_lower(), Eq(101));
EXPECT_THAT(bk2->key_upper(), Eq(std::numeric_limits<int64_t>::max()));
+ EXPECT_THAT(bk2->num_data(), Eq(num_data_for_bucket2));
}
INSTANTIATE_TEST_SUITE_P(IntegerIndexStorageTest, IntegerIndexStorageTest,
diff --git a/icing/index/numeric/integer-index.cc b/icing/index/numeric/integer-index.cc
index 5fa82a5..b2fe159 100644
--- a/icing/index/numeric/integer-index.cc
+++ b/icing/index/numeric/integer-index.cc
@@ -91,7 +91,7 @@ libtextclassifier3::StatusOr<IntegerIndex::PropertyToStorageMapType>
GetPropertyIntegerIndexStorageMap(
const Filesystem& filesystem, const std::string& working_path,
PostingListIntegerIndexSerializer* posting_list_serializer,
- bool pre_mapping_fbv) {
+ int32_t num_data_threshold_for_bucket_split, bool pre_mapping_fbv) {
ICING_ASSIGN_OR_RETURN(std::vector<std::string> property_paths,
GetAllExistingPropertyPaths(filesystem, working_path));
@@ -102,11 +102,13 @@ GetPropertyIntegerIndexStorageMap(
}
std::string storage_working_path =
GetPropertyIndexStoragePath(working_path, property_path);
- ICING_ASSIGN_OR_RETURN(std::unique_ptr<IntegerIndexStorage> storage,
- IntegerIndexStorage::Create(
- filesystem, storage_working_path,
- IntegerIndexStorage::Options(pre_mapping_fbv),
- posting_list_serializer));
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ filesystem, storage_working_path,
+ IntegerIndexStorage::Options(num_data_threshold_for_bucket_split,
+ pre_mapping_fbv),
+ posting_list_serializer));
property_to_storage_map.insert(
std::make_pair(property_path, std::move(storage)));
}
@@ -141,6 +143,8 @@ libtextclassifier3::StatusOr<std::unordered_set<std::string>> CreatePropertySet(
} // namespace
libtextclassifier3::Status IntegerIndex::Editor::IndexAllBufferedKeys() && {
+ integer_index_.SetDirty();
+
auto iter = integer_index_.property_to_storage_map_.find(property_path_);
IntegerIndexStorage* target_storage = nullptr;
// 1. Check if this property already has its own individual index.
@@ -161,7 +165,8 @@ libtextclassifier3::Status IntegerIndex::Editor::IndexAllBufferedKeys() && {
integer_index_.filesystem_,
GetPropertyIndexStoragePath(integer_index_.working_path_,
kWildcardPropertyIndexFileName),
- IntegerIndexStorage::Options(pre_mapping_fbv_),
+ IntegerIndexStorage::Options(num_data_threshold_for_bucket_split_,
+ pre_mapping_fbv_),
integer_index_.posting_list_serializer_.get()));
}
ICING_RETURN_IF_ERROR(
@@ -175,7 +180,8 @@ libtextclassifier3::Status IntegerIndex::Editor::IndexAllBufferedKeys() && {
integer_index_.filesystem_,
GetPropertyIndexStoragePath(integer_index_.working_path_,
property_path_),
- IntegerIndexStorage::Options(pre_mapping_fbv_),
+ IntegerIndexStorage::Options(num_data_threshold_for_bucket_split_,
+ pre_mapping_fbv_),
integer_index_.posting_list_serializer_.get()));
target_storage = new_storage.get();
integer_index_.property_to_storage_map_.insert(
@@ -188,6 +194,7 @@ libtextclassifier3::Status IntegerIndex::Editor::IndexAllBufferedKeys() && {
/* static */ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>>
IntegerIndex::Create(const Filesystem& filesystem, std::string working_path,
+ int32_t num_data_threshold_for_bucket_split,
bool pre_mapping_fbv) {
if (!filesystem.FileExists(GetMetadataFilePath(working_path).c_str())) {
// Discard working_path if metadata file is missing, and reinitialize.
@@ -195,9 +202,11 @@ IntegerIndex::Create(const Filesystem& filesystem, std::string working_path,
ICING_RETURN_IF_ERROR(Discard(filesystem, working_path));
}
return InitializeNewFiles(filesystem, std::move(working_path),
+ num_data_threshold_for_bucket_split,
pre_mapping_fbv);
}
return InitializeExistingFiles(filesystem, std::move(working_path),
+ num_data_threshold_for_bucket_split,
pre_mapping_fbv);
}
@@ -239,6 +248,8 @@ IntegerIndex::GetIterator(std::string_view property_path, int64_t key_lower,
libtextclassifier3::Status IntegerIndex::AddPropertyToWildcardStorage(
const std::string& property_path) {
+ SetDirty();
+
WildcardPropertyStorage wildcard_properties;
wildcard_properties.mutable_property_entries()->Reserve(
wildcard_properties_set_.size());
@@ -272,7 +283,8 @@ libtextclassifier3::Status IntegerIndex::Optimize(
// we can safely swap directories later.
ICING_ASSIGN_OR_RETURN(
std::unique_ptr<IntegerIndex> new_integer_index,
- Create(filesystem_, temp_working_path_ddir.dir(), pre_mapping_fbv_));
+ Create(filesystem_, temp_working_path_ddir.dir(),
+ num_data_threshold_for_bucket_split_, pre_mapping_fbv_));
ICING_RETURN_IF_ERROR(
TransferIndex(document_id_old_to_new, new_integer_index.get()));
new_integer_index->set_last_added_document_id(new_last_added_document_id);
@@ -322,20 +334,24 @@ libtextclassifier3::Status IntegerIndex::Optimize(
filesystem_,
GetPropertyIndexStoragePath(working_path_,
kWildcardPropertyIndexFileName),
- IntegerIndexStorage::Options(pre_mapping_fbv_),
+ IntegerIndexStorage::Options(num_data_threshold_for_bucket_split_,
+ pre_mapping_fbv_),
posting_list_serializer_.get()));
}
// Initialize all existing integer index storages.
- ICING_ASSIGN_OR_RETURN(property_to_storage_map_,
- GetPropertyIntegerIndexStorageMap(
- filesystem_, working_path_,
- posting_list_serializer_.get(), pre_mapping_fbv_));
+ ICING_ASSIGN_OR_RETURN(
+ property_to_storage_map_,
+ GetPropertyIntegerIndexStorageMap(
+ filesystem_, working_path_, posting_list_serializer_.get(),
+ num_data_threshold_for_bucket_split_, pre_mapping_fbv_));
return libtextclassifier3::Status::OK;
}
libtextclassifier3::Status IntegerIndex::Clear() {
+ SetDirty();
+
// Step 1: clear property_to_storage_map_.
property_to_storage_map_.clear();
wildcard_index_storage_.reset();
@@ -367,6 +383,7 @@ libtextclassifier3::Status IntegerIndex::Clear() {
/* static */ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>>
IntegerIndex::InitializeNewFiles(const Filesystem& filesystem,
std::string&& working_path,
+ int32_t num_data_threshold_for_bucket_split,
bool pre_mapping_fbv) {
// Create working directory.
if (!filesystem.CreateDirectoryRecursively(working_path.c_str())) {
@@ -399,12 +416,14 @@ IntegerIndex::InitializeNewFiles(const Filesystem& filesystem,
std::make_unique<MemoryMappedFile>(std::move(metadata_mmapped_file)),
/*property_to_storage_map=*/{}, std::move(wildcard_property_storage),
/*wildcard_properties_set=*/{}, /*wildcard_index_storage=*/nullptr,
- pre_mapping_fbv));
+ num_data_threshold_for_bucket_split, pre_mapping_fbv));
// Initialize info content by writing mapped memory directly.
Info& info_ref = new_integer_index->info();
info_ref.magic = Info::kMagic;
info_ref.last_added_document_id = kInvalidDocumentId;
+ info_ref.num_data_threshold_for_bucket_split =
+ num_data_threshold_for_bucket_split;
// Initialize new PersistentStorage. The initial checksums will be computed
// and set via InitializeNewStorage.
ICING_RETURN_IF_ERROR(new_integer_index->InitializeNewStorage());
@@ -413,9 +432,9 @@ IntegerIndex::InitializeNewFiles(const Filesystem& filesystem,
}
/* static */ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>>
-IntegerIndex::InitializeExistingFiles(const Filesystem& filesystem,
- std::string&& working_path,
- bool pre_mapping_fbv) {
+IntegerIndex::InitializeExistingFiles(
+ const Filesystem& filesystem, std::string&& working_path,
+ int32_t num_data_threshold_for_bucket_split, bool pre_mapping_fbv) {
// Mmap the content of the crcs and info.
ICING_ASSIGN_OR_RETURN(
MemoryMappedFile metadata_mmapped_file,
@@ -432,10 +451,11 @@ IntegerIndex::InitializeExistingFiles(const Filesystem& filesystem,
std::make_unique<PostingListIntegerIndexSerializer>();
// Initialize all existing integer index storages.
- ICING_ASSIGN_OR_RETURN(PropertyToStorageMapType property_to_storage_map,
- GetPropertyIntegerIndexStorageMap(
- filesystem, working_path,
- posting_list_serializer.get(), pre_mapping_fbv));
+ ICING_ASSIGN_OR_RETURN(
+ PropertyToStorageMapType property_to_storage_map,
+ GetPropertyIntegerIndexStorageMap(
+ filesystem, working_path, posting_list_serializer.get(),
+ num_data_threshold_for_bucket_split, pre_mapping_fbv));
std::string wildcard_property_path =
GetWildcardPropertyStorageFilePath(working_path);
@@ -455,7 +475,8 @@ IntegerIndex::InitializeExistingFiles(const Filesystem& filesystem,
filesystem,
GetPropertyIndexStoragePath(working_path,
kWildcardPropertyIndexFileName),
- IntegerIndexStorage::Options(pre_mapping_fbv),
+ IntegerIndexStorage::Options(num_data_threshold_for_bucket_split,
+ pre_mapping_fbv),
posting_list_serializer.get()));
}
@@ -465,7 +486,7 @@ IntegerIndex::InitializeExistingFiles(const Filesystem& filesystem,
std::make_unique<MemoryMappedFile>(std::move(metadata_mmapped_file)),
std::move(property_to_storage_map), std::move(wildcard_property_storage),
std::move(wildcard_properties_set), std::move(wildcard_index_storage),
- pre_mapping_fbv));
+ num_data_threshold_for_bucket_split, pre_mapping_fbv));
// Initialize existing PersistentStorage. Checksums will be validated.
ICING_RETURN_IF_ERROR(integer_index->InitializeExistingStorage());
@@ -474,6 +495,14 @@ IntegerIndex::InitializeExistingFiles(const Filesystem& filesystem,
return absl_ports::FailedPreconditionError("Incorrect magic value");
}
+ // If num_data_threshold_for_bucket_split mismatches, then return error to let
+ // caller rebuild.
+ if (integer_index->info().num_data_threshold_for_bucket_split !=
+ num_data_threshold_for_bucket_split) {
+ return absl_ports::FailedPreconditionError(
+ "Mismatch num_data_threshold_for_bucket_split");
+ }
+
return integer_index;
}
@@ -488,7 +517,8 @@ IntegerIndex::TransferIntegerIndexStorage(
std::unique_ptr<IntegerIndexStorage> new_storage,
IntegerIndexStorage::Create(
new_integer_index->filesystem_, new_storage_working_path,
- IntegerIndexStorage::Options(pre_mapping_fbv_),
+ IntegerIndexStorage::Options(num_data_threshold_for_bucket_split_,
+ pre_mapping_fbv_),
new_integer_index->posting_list_serializer_.get()));
ICING_RETURN_IF_ERROR(
@@ -552,7 +582,11 @@ libtextclassifier3::Status IntegerIndex::TransferIndex(
return libtextclassifier3::Status::OK;
}
-libtextclassifier3::Status IntegerIndex::PersistStoragesToDisk() {
+libtextclassifier3::Status IntegerIndex::PersistStoragesToDisk(bool force) {
+ if (!force && !is_storage_dirty()) {
+ return libtextclassifier3::Status::OK;
+ }
+
for (auto& [_, storage] : property_to_storage_map_) {
ICING_RETURN_IF_ERROR(storage->PersistToDisk());
}
@@ -564,18 +598,32 @@ libtextclassifier3::Status IntegerIndex::PersistStoragesToDisk() {
return libtextclassifier3::Status::OK;
}
-libtextclassifier3::Status IntegerIndex::PersistMetadataToDisk() {
+libtextclassifier3::Status IntegerIndex::PersistMetadataToDisk(bool force) {
+ if (!force && !is_info_dirty() && !is_storage_dirty()) {
+ return libtextclassifier3::Status::OK;
+ }
+
// Changes should have been applied to the underlying file when using
// MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, but call msync() as an
// extra safety step to ensure they are written out.
return metadata_mmapped_file_->PersistToDisk();
}
-libtextclassifier3::StatusOr<Crc32> IntegerIndex::ComputeInfoChecksum() {
+libtextclassifier3::StatusOr<Crc32> IntegerIndex::ComputeInfoChecksum(
+ bool force) {
+ if (!force && !is_info_dirty()) {
+ return Crc32(crcs().component_crcs.info_crc);
+ }
+
return info().ComputeChecksum();
}
-libtextclassifier3::StatusOr<Crc32> IntegerIndex::ComputeStoragesChecksum() {
+libtextclassifier3::StatusOr<Crc32> IntegerIndex::ComputeStoragesChecksum(
+ bool force) {
+ if (!force && !is_storage_dirty()) {
+ return Crc32(crcs().component_crcs.storages_crc);
+ }
+
// XOR all crcs of all storages. Since XOR is commutative and associative,
// the order doesn't matter.
uint32_t storages_checksum = 0;
diff --git a/icing/index/numeric/integer-index.h b/icing/index/numeric/integer-index.h
index 30f9852..e7a3127 100644
--- a/icing/index/numeric/integer-index.h
+++ b/icing/index/numeric/integer-index.h
@@ -55,25 +55,29 @@ class IntegerIndex : public NumericIndex<int64_t> {
// 'wildcard' storage.
static constexpr int kMaxPropertyStorages = 32;
+ static constexpr int32_t kDefaultNumDataThresholdForBucketSplit =
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit;
+
struct Info {
- static constexpr int32_t kMagic = 0x238a3dcb;
+ static constexpr int32_t kMagic = 0x5d8a1e8a;
int32_t magic;
DocumentId last_added_document_id;
+ int32_t num_data_threshold_for_bucket_split;
Crc32 ComputeChecksum() const {
return Crc32(
std::string_view(reinterpret_cast<const char*>(this), sizeof(Info)));
}
} __attribute__((packed));
- static_assert(sizeof(Info) == 8, "");
+ static_assert(sizeof(Info) == 12, "");
// Metadata file layout: <Crcs><Info>
static constexpr int32_t kCrcsMetadataFileOffset = 0;
static constexpr int32_t kInfoMetadataFileOffset =
static_cast<int32_t>(sizeof(Crcs));
static constexpr int32_t kMetadataFileSize = sizeof(Crcs) + sizeof(Info);
- static_assert(kMetadataFileSize == 20, "");
+ static_assert(kMetadataFileSize == 24, "");
static constexpr WorkingPathType kWorkingPathType =
WorkingPathType::kDirectory;
@@ -90,6 +94,8 @@ class IntegerIndex : public NumericIndex<int64_t> {
// related files will be stored under this directory. See
// PersistentStorage for more details about the concept of
// working_path.
+ // num_data_threshold_for_bucket_split: see IntegerIndexStorage::Options for
+ // more details.
// pre_mapping_fbv: flag indicating whether memory map max possible file size
// for underlying FileBackedVector before growing the actual
// file size.
@@ -101,7 +107,7 @@ class IntegerIndex : public NumericIndex<int64_t> {
// - Any FileBackedVector/MemoryMappedFile errors.
static libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>> Create(
const Filesystem& filesystem, std::string working_path,
- bool pre_mapping_fbv);
+ int32_t num_data_threshold_for_bucket_split, bool pre_mapping_fbv);
// Deletes IntegerIndex under working_path.
//
@@ -122,7 +128,8 @@ class IntegerIndex : public NumericIndex<int64_t> {
std::string_view property_path, DocumentId document_id,
SectionId section_id) override {
return std::make_unique<Editor>(property_path, document_id, section_id,
- *this, pre_mapping_fbv_);
+ *this, num_data_threshold_for_bucket_split_,
+ pre_mapping_fbv_);
}
// Returns a DocHitInfoIterator for iterating through all docs which have the
@@ -172,6 +179,8 @@ class IntegerIndex : public NumericIndex<int64_t> {
}
void set_last_added_document_id(DocumentId document_id) override {
+ SetInfoDirty();
+
Info& info_ref = info();
if (info_ref.last_added_document_id == kInvalidDocumentId ||
document_id > info_ref.last_added_document_id) {
@@ -189,9 +198,12 @@ class IntegerIndex : public NumericIndex<int64_t> {
public:
explicit Editor(std::string_view property_path, DocumentId document_id,
SectionId section_id, IntegerIndex& integer_index,
+ int32_t num_data_threshold_for_bucket_split,
bool pre_mapping_fbv)
: NumericIndex<int64_t>::Editor(property_path, document_id, section_id),
integer_index_(integer_index),
+ num_data_threshold_for_bucket_split_(
+ num_data_threshold_for_bucket_split),
pre_mapping_fbv_(pre_mapping_fbv) {}
~Editor() override = default;
@@ -211,6 +223,8 @@ class IntegerIndex : public NumericIndex<int64_t> {
IntegerIndex& integer_index_; // Does not own.
+ int32_t num_data_threshold_for_bucket_split_;
+
// Flag indicating whether memory map max possible file size for underlying
// FileBackedVector before growing the actual file size.
bool pre_mapping_fbv_;
@@ -226,7 +240,7 @@ class IntegerIndex : public NumericIndex<int64_t> {
wildcard_property_storage,
std::unordered_set<std::string> wildcard_properties_set,
std::unique_ptr<icing::lib::IntegerIndexStorage> wildcard_index_storage,
- bool pre_mapping_fbv)
+ int32_t num_data_threshold_for_bucket_split, bool pre_mapping_fbv)
: NumericIndex<int64_t>(filesystem, std::move(working_path),
kWorkingPathType),
posting_list_serializer_(std::move(posting_list_serializer)),
@@ -235,15 +249,22 @@ class IntegerIndex : public NumericIndex<int64_t> {
wildcard_property_storage_(std::move(wildcard_property_storage)),
wildcard_properties_set_(std::move(wildcard_properties_set)),
wildcard_index_storage_(std::move(wildcard_index_storage)),
- pre_mapping_fbv_(pre_mapping_fbv) {}
+ num_data_threshold_for_bucket_split_(
+ num_data_threshold_for_bucket_split),
+ pre_mapping_fbv_(pre_mapping_fbv),
+ is_info_dirty_(false),
+ is_storage_dirty_(false) {}
static libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>>
InitializeNewFiles(const Filesystem& filesystem, std::string&& working_path,
+ int32_t num_data_threshold_for_bucket_split,
bool pre_mapping_fbv);
static libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>>
InitializeExistingFiles(const Filesystem& filesystem,
- std::string&& working_path, bool pre_mapping_fbv);
+ std::string&& working_path,
+ int32_t num_data_threshold_for_bucket_split,
+ bool pre_mapping_fbv);
// Adds the property path to the list of properties using wildcard storage.
// This will both update the in-memory list (wildcard_properties_set_) and
@@ -296,20 +317,20 @@ class IntegerIndex : public NumericIndex<int64_t> {
// Returns:
// - OK on success
// - INTERNAL_ERROR on I/O error
- libtextclassifier3::Status PersistStoragesToDisk() override;
+ libtextclassifier3::Status PersistStoragesToDisk(bool force) override;
// Flushes contents of metadata file.
//
// Returns:
// - OK on success
// - INTERNAL_ERROR on I/O error
- libtextclassifier3::Status PersistMetadataToDisk() override;
+ libtextclassifier3::Status PersistMetadataToDisk(bool force) override;
// Computes and returns Info checksum.
//
// Returns:
// - Crc of the Info on success
- libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum() override;
+ libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum(bool force) override;
// Computes and returns all storages checksum. Checksums of (storage_crc,
// property_path) for all existing property paths will be combined together by
@@ -318,7 +339,8 @@ class IntegerIndex : public NumericIndex<int64_t> {
// Returns:
// - Crc of all storages on success
// - INTERNAL_ERROR if any data inconsistency
- libtextclassifier3::StatusOr<Crc32> ComputeStoragesChecksum() override;
+ libtextclassifier3::StatusOr<Crc32> ComputeStoragesChecksum(
+ bool force) override;
Crcs& crcs() override {
return *reinterpret_cast<Crcs*>(metadata_mmapped_file_->mutable_region() +
@@ -340,6 +362,17 @@ class IntegerIndex : public NumericIndex<int64_t> {
kInfoMetadataFileOffset);
}
+ void SetInfoDirty() { is_info_dirty_ = true; }
+ // When storage is dirty, we have to set info dirty as well. So just expose
+ // SetDirty to set both.
+ void SetDirty() {
+ is_info_dirty_ = true;
+ is_storage_dirty_ = true;
+ }
+
+ bool is_info_dirty() const { return is_info_dirty_; }
+ bool is_storage_dirty() const { return is_storage_dirty_; }
+
std::unique_ptr<PostingListIntegerIndexSerializer> posting_list_serializer_;
std::unique_ptr<MemoryMappedFile> metadata_mmapped_file_;
@@ -360,9 +393,14 @@ class IntegerIndex : public NumericIndex<int64_t> {
// kMaxPropertyStorages in property_to_storage_map.
std::unique_ptr<icing::lib::IntegerIndexStorage> wildcard_index_storage_;
+ int32_t num_data_threshold_for_bucket_split_;
+
// Flag indicating whether memory map max possible file size for underlying
// FileBackedVector before growing the actual file size.
bool pre_mapping_fbv_;
+
+ bool is_info_dirty_;
+ bool is_storage_dirty_;
};
} // namespace lib
diff --git a/icing/index/numeric/integer-index_test.cc b/icing/index/numeric/integer-index_test.cc
index 8a7acb9..b2e3fbe 100644
--- a/icing/index/numeric/integer-index_test.cc
+++ b/icing/index/numeric/integer-index_test.cc
@@ -83,13 +83,14 @@ class NumericIndexIntegerTest : public ::testing::Test {
filesystem_.CreateDirectoryRecursively(document_store_dir.c_str()));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult doc_store_create_result,
- DocumentStore::Create(&filesystem_, document_store_dir, &clock_,
- schema_store_.get(),
- /*force_recovery_and_revalidate_documents=*/false,
- /*namespace_id_fingerprint=*/false,
- PortableFileBackedProtoLog<
- DocumentWrapper>::kDeflateCompressionLevel,
- /*initialize_stats=*/nullptr));
+ DocumentStore::Create(
+ &filesystem_, document_store_dir, &clock_, schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
doc_store_ = std::move(doc_store_create_result.document_store);
}
@@ -114,8 +115,10 @@ class NumericIndexIntegerTest : public ::testing::Test {
template <>
libtextclassifier3::StatusOr<std::unique_ptr<NumericIndex<int64_t>>>
CreateIntegerIndex<IntegerIndex>() {
- return IntegerIndex::Create(filesystem_, working_path_,
- /*pre_mapping_fbv=*/false);
+ return IntegerIndex::Create(
+ filesystem_, working_path_, /*num_data_threshold_for_bucket_split=*/
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit,
+ /*pre_mapping_fbv=*/false);
}
template <typename NotIntegerIndexType>
@@ -138,8 +141,7 @@ class NumericIndexIntegerTest : public ::testing::Test {
}
ICING_ASSIGN_OR_RETURN(
std::vector<DocumentId> docid_map,
- doc_store_->OptimizeInto(document_store_compact_dir, nullptr,
- /*namespace_id_fingerprint=*/false));
+ doc_store_->OptimizeInto(document_store_compact_dir, nullptr));
doc_store_.reset();
if (!filesystem_.SwapFiles(document_store_dir.c_str(),
@@ -153,13 +155,14 @@ class NumericIndexIntegerTest : public ::testing::Test {
ICING_ASSIGN_OR_RETURN(
DocumentStore::CreateResult doc_store_create_result,
- DocumentStore::Create(&filesystem_, document_store_dir, &clock_,
- schema_store_.get(),
- /*force_recovery_and_revalidate_documents=*/false,
- /*namespace_id_fingerprint=*/false,
- PortableFileBackedProtoLog<
- DocumentWrapper>::kDeflateCompressionLevel,
- /*initialize_stats=*/nullptr));
+ DocumentStore::Create(
+ &filesystem_, document_store_dir, &clock_, schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
doc_store_ = std::move(doc_store_create_result.document_store);
return docid_map;
}
@@ -1126,14 +1129,28 @@ TYPED_TEST(NumericIndexIntegerTest, Clear) {
/*document_id=*/4, std::vector<SectionId>{kDefaultSectionId}))));
}
+struct IntegerIndexTestParam {
+ int32_t num_data_threshold_for_bucket_split;
+ bool pre_mapping_fbv;
+
+ explicit IntegerIndexTestParam(int32_t num_data_threshold_for_bucket_split_in,
+ bool pre_mapping_fbv_in)
+ : num_data_threshold_for_bucket_split(
+ num_data_threshold_for_bucket_split_in),
+ pre_mapping_fbv(pre_mapping_fbv_in) {}
+};
+
// Tests for persistent integer index only
-class IntegerIndexTest : public NumericIndexIntegerTest<IntegerIndex>,
- public ::testing::WithParamInterface<bool> {};
+class IntegerIndexTest
+ : public NumericIndexIntegerTest<IntegerIndex>,
+ public ::testing::WithParamInterface<IntegerIndexTestParam> {};
TEST_P(IntegerIndexTest, InvalidWorkingPath) {
- EXPECT_THAT(IntegerIndex::Create(filesystem_, "/dev/null/integer_index_test",
- /*pre_mapping_fbv=*/GetParam()),
- StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+ EXPECT_THAT(
+ IntegerIndex::Create(filesystem_, "/dev/null/integer_index_test",
+ GetParam().num_data_threshold_for_bucket_split,
+ GetParam().pre_mapping_fbv),
+ StatusIs(libtextclassifier3::StatusCode::INTERNAL));
}
TEST_P(IntegerIndexTest, InitializeNewFiles) {
@@ -1142,7 +1159,8 @@ TEST_P(IntegerIndexTest, InitializeNewFiles) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndex> integer_index,
IntegerIndex::Create(filesystem_, working_path_,
- /*pre_mapping_fbv=*/GetParam()));
+ GetParam().num_data_threshold_for_bucket_split,
+ GetParam().pre_mapping_fbv));
ICING_ASSERT_OK(integer_index->PersistToDisk());
}
@@ -1160,6 +1178,8 @@ TEST_P(IntegerIndexTest, InitializeNewFiles) {
IntegerIndex::kInfoMetadataFileOffset));
EXPECT_THAT(info.magic, Eq(Info::kMagic));
EXPECT_THAT(info.last_added_document_id, Eq(kInvalidDocumentId));
+ EXPECT_THAT(info.num_data_threshold_for_bucket_split,
+ Eq(GetParam().num_data_threshold_for_bucket_split));
// Check crcs section
Crcs crcs;
@@ -1183,7 +1203,8 @@ TEST_P(IntegerIndexTest,
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndex> integer_index,
IntegerIndex::Create(filesystem_, working_path_,
- /*pre_mapping_fbv=*/GetParam()));
+ GetParam().num_data_threshold_for_bucket_split,
+ GetParam().pre_mapping_fbv));
// Insert some data.
Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
@@ -1195,16 +1216,19 @@ TEST_P(IntegerIndexTest,
// Without calling PersistToDisk, checksums will not be recomputed or synced
// to disk, so initializing another instance on the same files should fail.
- EXPECT_THAT(IntegerIndex::Create(filesystem_, working_path_,
- /*pre_mapping_fbv=*/GetParam()),
- StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ EXPECT_THAT(
+ IntegerIndex::Create(filesystem_, working_path_,
+ GetParam().num_data_threshold_for_bucket_split,
+ GetParam().pre_mapping_fbv),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
}
TEST_P(IntegerIndexTest, InitializationShouldSucceedWithPersistToDisk) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndex> integer_index1,
IntegerIndex::Create(filesystem_, working_path_,
- /*pre_mapping_fbv=*/GetParam()));
+ GetParam().num_data_threshold_for_bucket_split,
+ GetParam().pre_mapping_fbv));
// Insert some data.
Index(integer_index1.get(), kDefaultTestPropertyPath, /*document_id=*/0,
@@ -1228,7 +1252,8 @@ TEST_P(IntegerIndexTest, InitializationShouldSucceedWithPersistToDisk) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndex> integer_index2,
IntegerIndex::Create(filesystem_, working_path_,
- /*pre_mapping_fbv=*/GetParam()));
+ GetParam().num_data_threshold_for_bucket_split,
+ GetParam().pre_mapping_fbv));
EXPECT_THAT(integer_index2->last_added_document_id(), Eq(2));
EXPECT_THAT(Query(integer_index2.get(), kDefaultTestPropertyPath,
/*key_lower=*/std::numeric_limits<int64_t>::min(),
@@ -1243,7 +1268,8 @@ TEST_P(IntegerIndexTest, InitializationShouldSucceedAfterDestruction) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndex> integer_index,
IntegerIndex::Create(filesystem_, working_path_,
- /*pre_mapping_fbv=*/GetParam()));
+ GetParam().num_data_threshold_for_bucket_split,
+ GetParam().pre_mapping_fbv));
// Insert some data.
Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
@@ -1268,7 +1294,8 @@ TEST_P(IntegerIndexTest, InitializationShouldSucceedAfterDestruction) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndex> integer_index,
IntegerIndex::Create(filesystem_, working_path_,
- /*pre_mapping_fbv=*/GetParam()));
+ GetParam().num_data_threshold_for_bucket_split,
+ GetParam().pre_mapping_fbv));
EXPECT_THAT(integer_index->last_added_document_id(), Eq(2));
EXPECT_THAT(Query(integer_index.get(), kDefaultTestPropertyPath,
/*key_lower=*/std::numeric_limits<int64_t>::min(),
@@ -1283,7 +1310,8 @@ TEST_P(IntegerIndexTest, InitializeExistingFilesWithWrongAllCrcShouldFail) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndex> integer_index,
IntegerIndex::Create(filesystem_, working_path_,
- /*pre_mapping_fbv=*/GetParam()));
+ GetParam().num_data_threshold_for_bucket_split,
+ GetParam().pre_mapping_fbv));
// Insert some data.
Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
/*section_id=*/20, /*keys=*/{0, 100, -100});
@@ -1315,8 +1343,10 @@ TEST_P(IntegerIndexTest, InitializeExistingFilesWithWrongAllCrcShouldFail) {
// Attempt to create the integer index with metadata containing corrupted
// all_crc. This should fail.
libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>>
- integer_index_or = IntegerIndex::Create(filesystem_, working_path_,
- /*pre_mapping_fbv=*/GetParam());
+ integer_index_or =
+ IntegerIndex::Create(filesystem_, working_path_,
+ GetParam().num_data_threshold_for_bucket_split,
+ GetParam().pre_mapping_fbv);
EXPECT_THAT(integer_index_or,
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
EXPECT_THAT(integer_index_or.status().error_message(),
@@ -1329,7 +1359,8 @@ TEST_P(IntegerIndexTest, InitializeExistingFilesWithCorruptedInfoShouldFail) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndex> integer_index,
IntegerIndex::Create(filesystem_, working_path_,
- /*pre_mapping_fbv=*/GetParam()));
+ GetParam().num_data_threshold_for_bucket_split,
+ GetParam().pre_mapping_fbv));
// Insert some data.
Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
/*section_id=*/20, /*keys=*/{0, 100, -100});
@@ -1362,8 +1393,10 @@ TEST_P(IntegerIndexTest, InitializeExistingFilesWithCorruptedInfoShouldFail) {
// Attempt to create the integer index with info that doesn't match its
// checksum and confirm that it fails.
libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>>
- integer_index_or = IntegerIndex::Create(filesystem_, working_path_,
- /*pre_mapping_fbv=*/GetParam());
+ integer_index_or =
+ IntegerIndex::Create(filesystem_, working_path_,
+ GetParam().num_data_threshold_for_bucket_split,
+ GetParam().pre_mapping_fbv);
EXPECT_THAT(integer_index_or,
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
EXPECT_THAT(integer_index_or.status().error_message(),
@@ -1377,7 +1410,8 @@ TEST_P(IntegerIndexTest,
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndex> integer_index,
IntegerIndex::Create(filesystem_, working_path_,
- /*pre_mapping_fbv=*/GetParam()));
+ GetParam().num_data_threshold_for_bucket_split,
+ GetParam().pre_mapping_fbv));
// Insert some data.
Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
/*section_id=*/20, /*keys=*/{0, 100, -100});
@@ -1400,7 +1434,9 @@ TEST_P(IntegerIndexTest,
std::unique_ptr<IntegerIndexStorage> storage,
IntegerIndexStorage::Create(
filesystem_, std::move(storage_working_path),
- IntegerIndexStorage::Options(/*pre_mapping_fbv=*/GetParam()),
+ IntegerIndexStorage::Options(
+ GetParam().num_data_threshold_for_bucket_split,
+ GetParam().pre_mapping_fbv),
&posting_list_integer_index_serializer));
ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/3, /*section_id=*/4,
/*new_keys=*/{3, 4, 5}));
@@ -1412,8 +1448,10 @@ TEST_P(IntegerIndexTest,
// Attempt to create the integer index with corrupted storages. This should
// fail.
libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>>
- integer_index_or = IntegerIndex::Create(filesystem_, working_path_,
- /*pre_mapping_fbv=*/GetParam());
+ integer_index_or =
+ IntegerIndex::Create(filesystem_, working_path_,
+ GetParam().num_data_threshold_for_bucket_split,
+ GetParam().pre_mapping_fbv);
EXPECT_THAT(integer_index_or,
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
EXPECT_THAT(integer_index_or.status().error_message(),
@@ -1421,6 +1459,41 @@ TEST_P(IntegerIndexTest,
}
}
+TEST_P(
+ IntegerIndexTest,
+ InitializeExistingFilesWithMismatchNumDataThresholdForBucketSplitShouldFail) {
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndex> integer_index,
+ IntegerIndex::Create(filesystem_, working_path_,
+ GetParam().num_data_threshold_for_bucket_split,
+ GetParam().pre_mapping_fbv));
+ // Insert some data.
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0,
+ /*section_id=*/20, /*keys=*/{0, 100, -100});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/1,
+ /*section_id=*/2, /*keys=*/{3, -1000, 500});
+ Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/2,
+ /*section_id=*/15, /*keys=*/{-6, 321, 98});
+
+ ICING_ASSERT_OK(integer_index->PersistToDisk());
+ }
+
+ {
+ // Attempt to create the integer index with different
+ // num_data_threshold_for_bucket_split. This should fail.
+ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>>
+ integer_index_or = IntegerIndex::Create(
+ filesystem_, working_path_,
+ GetParam().num_data_threshold_for_bucket_split + 1,
+ GetParam().pre_mapping_fbv);
+ EXPECT_THAT(integer_index_or,
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ EXPECT_THAT(integer_index_or.status().error_message(),
+ HasSubstr("Mismatch num_data_threshold_for_bucket_split"));
+ }
+}
+
TEST_P(IntegerIndexTest, WildcardStoragePersistenceQuery) {
// This test sets its schema assuming that max property storages == 32.
ASSERT_THAT(IntegerIndex::kMaxPropertyStorages, Eq(32));
@@ -1586,7 +1659,8 @@ TEST_P(IntegerIndexTest, WildcardStoragePersistenceQuery) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndex> integer_index,
IntegerIndex::Create(filesystem_, working_path_,
- /*pre_mapping_fbv=*/GetParam()));
+ GetParam().num_data_threshold_for_bucket_split,
+ GetParam().pre_mapping_fbv));
// Index numeric content for other properties to force our property into the
// wildcard storage.
@@ -1651,7 +1725,8 @@ TEST_P(IntegerIndexTest, WildcardStoragePersistenceQuery) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndex> integer_index,
IntegerIndex::Create(filesystem_, working_path_,
- /*pre_mapping_fbv=*/GetParam()));
+ GetParam().num_data_threshold_for_bucket_split,
+ GetParam().pre_mapping_fbv));
EXPECT_THAT(integer_index->num_property_indices(), Eq(33));
@@ -1691,7 +1766,8 @@ TEST_P(IntegerIndexTest,
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndex> integer_index,
IntegerIndex::Create(filesystem_, working_path_,
- /*pre_mapping_fbv=*/GetParam()));
+ GetParam().num_data_threshold_for_bucket_split,
+ GetParam().pre_mapping_fbv));
// Doc id = 1: insert 2 data for "prop1", "prop2"
Index(integer_index.get(), kPropertyPath2, /*document_id=*/1, kSectionId2,
@@ -1742,7 +1818,8 @@ TEST_P(IntegerIndexTest,
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndex> integer_index,
IntegerIndex::Create(filesystem_, working_path_,
- /*pre_mapping_fbv=*/GetParam()));
+ GetParam().num_data_threshold_for_bucket_split,
+ GetParam().pre_mapping_fbv));
// Key = 1
EXPECT_THAT(Query(integer_index.get(), kPropertyPath1, /*key_lower=*/1,
@@ -1968,7 +2045,8 @@ TEST_P(IntegerIndexTest, WildcardStorageWorksAfterOptimize) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndex> integer_index,
IntegerIndex::Create(filesystem_, working_path_,
- /*pre_mapping_fbv=*/GetParam()));
+ GetParam().num_data_threshold_for_bucket_split,
+ GetParam().pre_mapping_fbv));
// Index numeric content for other properties to force our property into the
// wildcard storage.
@@ -2067,7 +2145,8 @@ TEST_P(IntegerIndexTest, WildcardStorageWorksAfterOptimize) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndex> integer_index,
IntegerIndex::Create(filesystem_, working_path_,
- /*pre_mapping_fbv=*/GetParam()));
+ GetParam().num_data_threshold_for_bucket_split,
+ GetParam().pre_mapping_fbv));
EXPECT_THAT(integer_index->num_property_indices(), Eq(33));
@@ -2236,7 +2315,8 @@ TEST_P(IntegerIndexTest, WildcardStorageAvailableIndicesAfterOptimize) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndex> integer_index,
IntegerIndex::Create(filesystem_, working_path_,
- /*pre_mapping_fbv=*/GetParam()));
+ GetParam().num_data_threshold_for_bucket_split,
+ GetParam().pre_mapping_fbv));
// Index numeric content for other properties to force our property into the
// wildcard storage.
@@ -2317,7 +2397,8 @@ TEST_P(IntegerIndexTest, WildcardStorageAvailableIndicesAfterOptimize) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<IntegerIndex> integer_index,
IntegerIndex::Create(filesystem_, working_path_,
- /*pre_mapping_fbv=*/GetParam()));
+ GetParam().num_data_threshold_for_bucket_split,
+ GetParam().pre_mapping_fbv));
EXPECT_THAT(integer_index->num_property_indices(), Eq(1));
@@ -2363,8 +2444,20 @@ TEST_P(IntegerIndexTest, WildcardStorageAvailableIndicesAfterOptimize) {
/*document_id=*/7, expected_sections_typea))));
}
-INSTANTIATE_TEST_SUITE_P(IntegerIndexTest, IntegerIndexTest,
- testing::Values(true, false));
+INSTANTIATE_TEST_SUITE_P(
+ IntegerIndexTest, IntegerIndexTest,
+ testing::Values(
+ IntegerIndexTestParam(/*num_data_threshold_for_bucket_split_in=*/341,
+ /*pre_mapping_fbv_in=*/false),
+ IntegerIndexTestParam(/*num_data_threshold_for_bucket_split_in=*/341,
+ /*pre_mapping_fbv_in=*/true),
+
+ IntegerIndexTestParam(/*num_data_threshold_for_bucket_split_in=*/16384,
+ /*pre_mapping_fbv_in=*/false),
+ IntegerIndexTestParam(/*num_data_threshold_for_bucket_split_in=*/32768,
+ /*pre_mapping_fbv_in=*/false),
+ IntegerIndexTestParam(/*num_data_threshold_for_bucket_split_in=*/65536,
+ /*pre_mapping_fbv_in=*/false)));
} // namespace
diff --git a/icing/index/numeric/numeric-index.h b/icing/index/numeric/numeric-index.h
index 24b81e7..57911de 100644
--- a/icing/index/numeric/numeric-index.h
+++ b/icing/index/numeric/numeric-index.h
@@ -177,15 +177,17 @@ class NumericIndex : public PersistentStorage {
: PersistentStorage(filesystem, std::move(working_path),
working_path_type) {}
- virtual libtextclassifier3::Status PersistStoragesToDisk() override = 0;
+ virtual libtextclassifier3::Status PersistStoragesToDisk(
+ bool force) override = 0;
- virtual libtextclassifier3::Status PersistMetadataToDisk() override = 0;
+ virtual libtextclassifier3::Status PersistMetadataToDisk(
+ bool force) override = 0;
- virtual libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum()
- override = 0;
+ virtual libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum(
+ bool force) override = 0;
- virtual libtextclassifier3::StatusOr<Crc32> ComputeStoragesChecksum()
- override = 0;
+ virtual libtextclassifier3::StatusOr<Crc32> ComputeStoragesChecksum(
+ bool force) override = 0;
virtual Crcs& crcs() override = 0;
virtual const Crcs& crcs() const override = 0;
diff --git a/icing/index/numeric/posting-list-integer-index-accessor.h b/icing/index/numeric/posting-list-integer-index-accessor.h
index f0d3d25..4f667a0 100644
--- a/icing/index/numeric/posting-list-integer-index-accessor.h
+++ b/icing/index/numeric/posting-list-integer-index-accessor.h
@@ -100,16 +100,6 @@ class PostingListIntegerIndexAccessor : public PostingListAccessor {
// posting list.
libtextclassifier3::Status PrependData(const IntegerIndexData& data);
- bool WantsSplit() const {
- const PostingListUsed* current_pl =
- preexisting_posting_list_ != nullptr
- ? &preexisting_posting_list_->posting_list
- : &in_memory_posting_list_;
- // Only max-sized PLs get split. Smaller PLs just get copied to larger PLs.
- return current_pl->size_in_bytes() == storage_->max_posting_list_bytes() &&
- serializer_->IsFull(current_pl);
- }
-
private:
explicit PostingListIntegerIndexAccessor(
FlashIndexStorage* storage, PostingListUsed in_memory_posting_list,
diff --git a/icing/join/join-processor_test.cc b/icing/join/join-processor_test.cc
index 3dd7c87..f503442 100644
--- a/icing/join/join-processor_test.cc
+++ b/icing/join/join-processor_test.cc
@@ -118,13 +118,14 @@ class JoinProcessorTest : public ::testing::Test {
IsTrue());
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, doc_store_dir_, &fake_clock_,
- schema_store_.get(),
- /*force_recovery_and_revalidate_documents=*/false,
- /*namespace_id_fingerprint=*/false,
- PortableFileBackedProtoLog<
- DocumentWrapper>::kDeflateCompressionLevel,
- /*initialize_stats=*/nullptr));
+ DocumentStore::Create(
+ &filesystem_, doc_store_dir_, &fake_clock_, schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
doc_store_ = std::move(create_result.document_store);
ICING_ASSERT_OK_AND_ASSIGN(
diff --git a/icing/join/qualified-id-join-index.cc b/icing/join/qualified-id-join-index.cc
index 86f1921..07b5627 100644
--- a/icing/join/qualified-id-join-index.cc
+++ b/icing/join/qualified-id-join-index.cc
@@ -103,6 +103,8 @@ QualifiedIdJoinIndex::~QualifiedIdJoinIndex() {
libtextclassifier3::Status QualifiedIdJoinIndex::Put(
const DocJoinInfo& doc_join_info, std::string_view ref_qualified_id_str) {
+ SetDirty();
+
if (!doc_join_info.is_valid()) {
return absl_ports::InvalidArgumentError(
"Cannot put data for an invalid DocJoinInfo");
@@ -215,6 +217,8 @@ libtextclassifier3::Status QualifiedIdJoinIndex::Optimize(
}
libtextclassifier3::Status QualifiedIdJoinIndex::Clear() {
+ SetDirty();
+
doc_join_info_mapper_.reset();
// Discard and reinitialize doc join info mapper.
std::string doc_join_info_mapper_path =
@@ -400,7 +404,12 @@ libtextclassifier3::Status QualifiedIdJoinIndex::TransferIndex(
return libtextclassifier3::Status::OK;
}
-libtextclassifier3::Status QualifiedIdJoinIndex::PersistMetadataToDisk() {
+libtextclassifier3::Status QualifiedIdJoinIndex::PersistMetadataToDisk(
+ bool force) {
+ if (!force && !is_info_dirty() && !is_storage_dirty()) {
+ return libtextclassifier3::Status::OK;
+ }
+
std::string metadata_file_path = GetMetadataFilePath(working_path_);
ScopedFd sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
@@ -420,19 +429,32 @@ libtextclassifier3::Status QualifiedIdJoinIndex::PersistMetadataToDisk() {
return libtextclassifier3::Status::OK;
}
-libtextclassifier3::Status QualifiedIdJoinIndex::PersistStoragesToDisk() {
+libtextclassifier3::Status QualifiedIdJoinIndex::PersistStoragesToDisk(
+ bool force) {
+ if (!force && !is_storage_dirty()) {
+ return libtextclassifier3::Status::OK;
+ }
+
ICING_RETURN_IF_ERROR(doc_join_info_mapper_->PersistToDisk());
ICING_RETURN_IF_ERROR(qualified_id_storage_->PersistToDisk());
return libtextclassifier3::Status::OK;
}
-libtextclassifier3::StatusOr<Crc32>
-QualifiedIdJoinIndex::ComputeInfoChecksum() {
+libtextclassifier3::StatusOr<Crc32> QualifiedIdJoinIndex::ComputeInfoChecksum(
+ bool force) {
+ if (!force && !is_info_dirty()) {
+ return Crc32(crcs().component_crcs.info_crc);
+ }
+
return info().ComputeChecksum();
}
libtextclassifier3::StatusOr<Crc32>
-QualifiedIdJoinIndex::ComputeStoragesChecksum() {
+QualifiedIdJoinIndex::ComputeStoragesChecksum(bool force) {
+ if (!force && !is_storage_dirty()) {
+ return Crc32(crcs().component_crcs.storages_crc);
+ }
+
ICING_ASSIGN_OR_RETURN(Crc32 doc_join_info_mapper_crc,
doc_join_info_mapper_->ComputeChecksum());
ICING_ASSIGN_OR_RETURN(Crc32 qualified_id_storage_crc,
diff --git a/icing/join/qualified-id-join-index.h b/icing/join/qualified-id-join-index.h
index 6accfaf..86297cd 100644
--- a/icing/join/qualified-id-join-index.h
+++ b/icing/join/qualified-id-join-index.h
@@ -172,6 +172,8 @@ class QualifiedIdJoinIndex : public PersistentStorage {
}
void set_last_added_document_id(DocumentId document_id) {
+ SetInfoDirty();
+
Info& info_ref = info();
if (info_ref.last_added_document_id == kInvalidDocumentId ||
document_id > info_ref.last_added_document_id) {
@@ -192,7 +194,9 @@ class QualifiedIdJoinIndex : public PersistentStorage {
doc_join_info_mapper_(std::move(doc_join_info_mapper)),
qualified_id_storage_(std::move(qualified_id_storage)),
pre_mapping_fbv_(pre_mapping_fbv),
- use_persistent_hash_map_(use_persistent_hash_map) {}
+ use_persistent_hash_map_(use_persistent_hash_map),
+ is_info_dirty_(false),
+ is_storage_dirty_(false) {}
static libtextclassifier3::StatusOr<std::unique_ptr<QualifiedIdJoinIndex>>
InitializeNewFiles(const Filesystem& filesystem, std::string&& working_path,
@@ -219,27 +223,28 @@ class QualifiedIdJoinIndex : public PersistentStorage {
// Returns:
// - OK on success
// - INTERNAL_ERROR on I/O error
- libtextclassifier3::Status PersistMetadataToDisk() override;
+ libtextclassifier3::Status PersistMetadataToDisk(bool force) override;
// Flushes contents of all storages to underlying files.
//
// Returns:
// - OK on success
// - INTERNAL_ERROR on I/O error
- libtextclassifier3::Status PersistStoragesToDisk() override;
+ libtextclassifier3::Status PersistStoragesToDisk(bool force) override;
// Computes and returns Info checksum.
//
// Returns:
// - Crc of the Info on success
- libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum() override;
+ libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum(bool force) override;
// Computes and returns all storages checksum.
//
// Returns:
// - Crc of all storages on success
// - INTERNAL_ERROR if any data inconsistency
- libtextclassifier3::StatusOr<Crc32> ComputeStoragesChecksum() override;
+ libtextclassifier3::StatusOr<Crc32> ComputeStoragesChecksum(
+ bool force) override;
Crcs& crcs() override {
return *reinterpret_cast<Crcs*>(metadata_buffer_.get() +
@@ -261,6 +266,17 @@ class QualifiedIdJoinIndex : public PersistentStorage {
kInfoMetadataBufferOffset);
}
+ void SetInfoDirty() { is_info_dirty_ = true; }
+ // When storage is dirty, we have to set info dirty as well. So just expose
+ // SetDirty to set both.
+ void SetDirty() {
+ is_info_dirty_ = true;
+ is_storage_dirty_ = true;
+ }
+
+ bool is_info_dirty() const { return is_info_dirty_; }
+ bool is_storage_dirty() const { return is_storage_dirty_; }
+
// Metadata buffer
std::unique_ptr<uint8_t[]> metadata_buffer_;
@@ -281,6 +297,9 @@ class QualifiedIdJoinIndex : public PersistentStorage {
// Flag indicating whether use persistent hash map as the key mapper (if
// false, then fall back to dynamic trie key mapper).
bool use_persistent_hash_map_;
+
+ bool is_info_dirty_;
+ bool is_storage_dirty_;
};
} // namespace lib
diff --git a/icing/query/advanced_query_parser/query-visitor_test.cc b/icing/query/advanced_query_parser/query-visitor_test.cc
index 0d7ba6d..d118691 100644
--- a/icing/query/advanced_query_parser/query-visitor_test.cc
+++ b/icing/query/advanced_query_parser/query-visitor_test.cc
@@ -114,13 +114,14 @@ class QueryVisitorTest : public ::testing::TestWithParam<QueryType> {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, store_dir_, &clock_,
- schema_store_.get(),
- /*force_recovery_and_revalidate_documents=*/false,
- /*namespace_id_fingerprint=*/false,
- PortableFileBackedProtoLog<
- DocumentWrapper>::kDeflateCompressionLevel,
- /*initialize_stats=*/nullptr));
+ DocumentStore::Create(
+ &filesystem_, store_dir_, &clock_, schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
document_store_ = std::move(create_result.document_store);
Index::Options options(index_dir_.c_str(),
diff --git a/icing/query/query-processor_benchmark.cc b/icing/query/query-processor_benchmark.cc
index 89f3b54..626f352 100644
--- a/icing/query/query-processor_benchmark.cc
+++ b/icing/query/query-processor_benchmark.cc
@@ -98,7 +98,8 @@ libtextclassifier3::StatusOr<DocumentStore::CreateResult> CreateDocumentStore(
return DocumentStore::Create(
filesystem, base_dir, clock, schema_store,
/*force_recovery_and_revalidate_documents=*/false,
- /*namespace_id_fingerprint=*/false,
+ /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
PortableFileBackedProtoLog<DocumentWrapper>::kDeflateCompressionLevel,
/*initialize_stats=*/nullptr);
}
diff --git a/icing/query/query-processor_test.cc b/icing/query/query-processor_test.cc
index 3d3cf48..5bbf9ba 100644
--- a/icing/query/query-processor_test.cc
+++ b/icing/query/query-processor_test.cc
@@ -69,7 +69,8 @@ libtextclassifier3::StatusOr<DocumentStore::CreateResult> CreateDocumentStore(
return DocumentStore::Create(
filesystem, base_dir, clock, schema_store,
/*force_recovery_and_revalidate_documents=*/false,
- /*namespace_id_fingerprint=*/false,
+ /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
PortableFileBackedProtoLog<DocumentWrapper>::kDeflateCompressionLevel,
/*initialize_stats=*/nullptr);
}
diff --git a/icing/query/suggestion-processor_test.cc b/icing/query/suggestion-processor_test.cc
index b1336b3..49f8d43 100644
--- a/icing/query/suggestion-processor_test.cc
+++ b/icing/query/suggestion-processor_test.cc
@@ -85,13 +85,14 @@ class SuggestionProcessorTest : public Test {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
- schema_store_.get(),
- /*force_recovery_and_revalidate_documents=*/false,
- /*namespace_id_fingerprint=*/false,
- PortableFileBackedProtoLog<
- DocumentWrapper>::kDeflateCompressionLevel,
- /*initialize_stats=*/nullptr));
+ DocumentStore::Create(
+ &filesystem_, store_dir_, &fake_clock_, schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
document_store_ = std::move(create_result.document_store);
Index::Options options(index_dir_,
diff --git a/icing/result/result-retriever-v2_group-result-limiter_test.cc b/icing/result/result-retriever-v2_group-result-limiter_test.cc
index 5d8b589..2914a8d 100644
--- a/icing/result/result-retriever-v2_group-result-limiter_test.cc
+++ b/icing/result/result-retriever-v2_group-result-limiter_test.cc
@@ -89,13 +89,14 @@ class ResultRetrieverV2GroupResultLimiterTest : public testing::Test {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get(),
- /*force_recovery_and_revalidate_documents=*/false,
- /*namespace_id_fingerprint=*/false,
- PortableFileBackedProtoLog<
- DocumentWrapper>::kDeflateCompressionLevel,
- /*initialize_stats=*/nullptr));
+ DocumentStore::Create(
+ &filesystem_, test_dir_, &fake_clock_, schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
document_store_ = std::move(create_result.document_store);
}
diff --git a/icing/result/result-retriever-v2_projection_test.cc b/icing/result/result-retriever-v2_projection_test.cc
index 6b868a5..1a75631 100644
--- a/icing/result/result-retriever-v2_projection_test.cc
+++ b/icing/result/result-retriever-v2_projection_test.cc
@@ -184,13 +184,14 @@ class ResultRetrieverV2ProjectionTest : public testing::Test {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get(),
- /*force_recovery_and_revalidate_documents=*/false,
- /*namespace_id_fingerprint=*/false,
- PortableFileBackedProtoLog<
- DocumentWrapper>::kDeflateCompressionLevel,
- /*initialize_stats=*/nullptr));
+ DocumentStore::Create(
+ &filesystem_, test_dir_, &fake_clock_, schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
document_store_ = std::move(create_result.document_store);
}
diff --git a/icing/result/result-retriever-v2_snippet_test.cc b/icing/result/result-retriever-v2_snippet_test.cc
index 27f16a0..440d31c 100644
--- a/icing/result/result-retriever-v2_snippet_test.cc
+++ b/icing/result/result-retriever-v2_snippet_test.cc
@@ -109,13 +109,14 @@ class ResultRetrieverV2SnippetTest : public testing::Test {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
- schema_store_.get(),
- /*force_recovery_and_revalidate_documents=*/false,
- /*namespace_id_fingerprint=*/false,
- PortableFileBackedProtoLog<
- DocumentWrapper>::kDeflateCompressionLevel,
- /*initialize_stats=*/nullptr));
+ DocumentStore::Create(
+ &filesystem_, test_dir_, &fake_clock_, schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
document_store_ = std::move(create_result.document_store);
}
diff --git a/icing/result/result-retriever-v2_test.cc b/icing/result/result-retriever-v2_test.cc
index 889dc60..0bd40cc 100644
--- a/icing/result/result-retriever-v2_test.cc
+++ b/icing/result/result-retriever-v2_test.cc
@@ -220,7 +220,8 @@ libtextclassifier3::StatusOr<DocumentStore::CreateResult> CreateDocumentStore(
return DocumentStore::Create(
filesystem, base_dir, clock, schema_store,
/*force_recovery_and_revalidate_documents=*/false,
- /*namespace_id_fingerprint=*/false,
+ /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
PortableFileBackedProtoLog<DocumentWrapper>::kDeflateCompressionLevel,
/*initialize_stats=*/nullptr);
}
diff --git a/icing/result/result-state-manager_test.cc b/icing/result/result-state-manager_test.cc
index 38d67e8..75d1d93 100644
--- a/icing/result/result-state-manager_test.cc
+++ b/icing/result/result-state-manager_test.cc
@@ -107,13 +107,14 @@ class ResultStateManagerTest : public testing::Test {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult result,
- DocumentStore::Create(&filesystem_, test_dir_, clock_.get(),
- schema_store_.get(),
- /*force_recovery_and_revalidate_documents=*/false,
- /*namespace_id_fingerprint=*/false,
- PortableFileBackedProtoLog<
- DocumentWrapper>::kDeflateCompressionLevel,
- /*initialize_stats=*/nullptr));
+ DocumentStore::Create(
+ &filesystem_, test_dir_, clock_.get(), schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
document_store_ = std::move(result.document_store);
ICING_ASSERT_OK_AND_ASSIGN(
diff --git a/icing/result/result-state-manager_thread-safety_test.cc b/icing/result/result-state-manager_thread-safety_test.cc
index 53745e6..7e7e13c 100644
--- a/icing/result/result-state-manager_thread-safety_test.cc
+++ b/icing/result/result-state-manager_thread-safety_test.cc
@@ -100,13 +100,14 @@ class ResultStateManagerThreadSafetyTest : public testing::Test {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult result,
- DocumentStore::Create(&filesystem_, test_dir_, clock_.get(),
- schema_store_.get(),
- /*force_recovery_and_revalidate_documents=*/false,
- /*namespace_id_fingerprint=*/false,
- PortableFileBackedProtoLog<
- DocumentWrapper>::kDeflateCompressionLevel,
- /*initialize_stats=*/nullptr));
+ DocumentStore::Create(
+ &filesystem_, test_dir_, clock_.get(), schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
document_store_ = std::move(result.document_store);
ICING_ASSERT_OK_AND_ASSIGN(
diff --git a/icing/result/result-state-v2_test.cc b/icing/result/result-state-v2_test.cc
index ab29d6e..0f88023 100644
--- a/icing/result/result-state-v2_test.cc
+++ b/icing/result/result-state-v2_test.cc
@@ -76,13 +76,14 @@ class ResultStateV2Test : public ::testing::Test {
filesystem_.CreateDirectoryRecursively(doc_store_base_dir_.c_str());
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult result,
- DocumentStore::Create(&filesystem_, doc_store_base_dir_, &clock_,
- schema_store_.get(),
- /*force_recovery_and_revalidate_documents=*/false,
- /*namespace_id_fingerprint=*/false,
- PortableFileBackedProtoLog<
- DocumentWrapper>::kDeflateCompressionLevel,
- /*initialize_stats=*/nullptr));
+ DocumentStore::Create(
+ &filesystem_, doc_store_base_dir_, &clock_, schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
document_store_ = std::move(result.document_store);
num_total_hits_ = 0;
diff --git a/icing/schema/schema-store.cc b/icing/schema/schema-store.cc
index bcc7c2c..bcacdce 100644
--- a/icing/schema/schema-store.cc
+++ b/icing/schema/schema-store.cc
@@ -485,9 +485,13 @@ libtextclassifier3::Status SchemaStore::RegenerateDerivedFiles(
std::make_unique<SchemaProto>(std::move(base_schema));
ICING_RETURN_IF_ERROR(schema_file_->Write(std::move(base_schema_ptr)));
+ // LINT.IfChange(min_overlay_version_compatibility)
+ // Although the current version is 2, the schema is compatible with
+ // version 1, so min_overlay_version_compatibility should be 1.
+ int32_t min_overlay_version_compatibility = version_util::kVersionOne;
+ // LINT.ThenChange(//depot/google3/icing/file/version-util.h:kVersion)
header_->SetOverlayInfo(
- /*overlay_created=*/true,
- /*min_overlay_version_compatibility=*/version_util::kVersionOne);
+ /*overlay_created=*/true, min_overlay_version_compatibility);
// Rebuild in memory data - references to the old schema will be invalid
// now.
BuildInMemoryCache();
diff --git a/icing/schema/schema-store_test.cc b/icing/schema/schema-store_test.cc
index 3298b75..8fc51e7 100644
--- a/icing/schema/schema-store_test.cc
+++ b/icing/schema/schema-store_test.cc
@@ -2722,7 +2722,8 @@ TEST_F(SchemaStoreTest, MigrateSchemaVersionZeroUpgradeNoChange) {
}
}
-TEST_F(SchemaStoreTest, MigrateSchemaRollbackDiscardsOverlaySchema) {
+TEST_F(SchemaStoreTest,
+ MigrateSchemaRollbackDiscardsIncompatibleOverlaySchema) {
// Because we are upgrading from version zero, the schema must be compatible
// with version zero.
SchemaTypeConfigProto type_a =
@@ -2749,12 +2750,12 @@ TEST_F(SchemaStoreTest, MigrateSchemaRollbackDiscardsOverlaySchema) {
IsOkAndHolds(Pointee(EqualsProto(schema))));
}
- // Rollback to a version before kVersion. The schema header will declare that
- // the overlay is compatible with any version starting with kVersion. So
- // kVersion - 1 is incompatible and will throw out the schema.
+ // Rollback to a version before kVersionOne. The schema header will declare
+ // that the overlay is compatible with any version starting with kVersionOne.
+ // So kVersionOne - 1 is incompatible and will throw out the schema.
ICING_EXPECT_OK(SchemaStore::MigrateSchema(
&filesystem_, schema_store_dir_, version_util::StateChange::kRollBack,
- version_util::kVersion - 1));
+ version_util::kVersionOne - 1));
{
// Create a new of the schema store and check that we fell back to the
@@ -2777,7 +2778,7 @@ TEST_F(SchemaStoreTest, MigrateSchemaRollbackDiscardsOverlaySchema) {
}
}
-TEST_F(SchemaStoreTest, MigrateSchemaCompatibleRollbackKeepsOverlaySchema) {
+TEST_F(SchemaStoreTest, MigrateSchemaRollbackKeepsCompatibleOverlaySchema) {
// Because we are upgrading from version zero, the schema must be compatible
// with version zero.
SchemaTypeConfigProto type_a =
@@ -2846,12 +2847,12 @@ TEST_F(SchemaStoreTest, MigrateSchemaRollforwardRetainsBaseSchema) {
IsOkAndHolds(Pointee(EqualsProto(schema))));
}
- // Rollback to a version before kVersion. The schema header will declare that
- // the overlay is compatible with any version starting with kVersion. So
- // kVersion - 1 is incompatible and will throw out the schema.
+ // Rollback to a version before kVersionOne. The schema header will declare
+ // that the overlay is compatible with any version starting with kVersionOne.
+ // So kVersionOne - 1 is incompatible and will throw out the schema.
ICING_EXPECT_OK(SchemaStore::MigrateSchema(
&filesystem_, schema_store_dir_, version_util::StateChange::kRollBack,
- version_util::kVersion - 1));
+ version_util::kVersionOne - 1));
SchemaTypeConfigProto other_type_a =
SchemaTypeConfigBuilder()
diff --git a/icing/scoring/advanced_scoring/advanced-scorer_fuzz_test.cc b/icing/scoring/advanced_scoring/advanced-scorer_fuzz_test.cc
index bdafa28..3612359 100644
--- a/icing/scoring/advanced_scoring/advanced-scorer_fuzz_test.cc
+++ b/icing/scoring/advanced_scoring/advanced-scorer_fuzz_test.cc
@@ -37,13 +37,13 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
SchemaStore::Create(&filesystem, schema_store_dir, &fake_clock)
.ValueOrDie();
std::unique_ptr<DocumentStore> document_store =
- DocumentStore::Create(&filesystem, doc_store_dir, &fake_clock,
- schema_store.get(),
- /*force_recovery_and_revalidate_documents=*/false,
- /*namespace_id_fingerprint=*/false,
- PortableFileBackedProtoLog<
- DocumentWrapper>::kDeflateCompressionLevel,
- /*initialize_stats=*/nullptr)
+ DocumentStore::Create(
+ &filesystem, doc_store_dir, &fake_clock, schema_store.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
+ PortableFileBackedProtoLog<DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr)
.ValueOrDie()
.document_store;
diff --git a/icing/scoring/advanced_scoring/advanced-scorer_test.cc b/icing/scoring/advanced_scoring/advanced-scorer_test.cc
index 0ecc21d..cc1d413 100644
--- a/icing/scoring/advanced_scoring/advanced-scorer_test.cc
+++ b/icing/scoring/advanced_scoring/advanced-scorer_test.cc
@@ -64,13 +64,14 @@ class AdvancedScorerTest : public testing::Test {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, doc_store_dir_, &fake_clock_,
- schema_store_.get(),
- /*force_recovery_and_revalidate_documents=*/false,
- /*namespace_id_fingerprint=*/false,
- PortableFileBackedProtoLog<
- DocumentWrapper>::kDeflateCompressionLevel,
- /*initialize_stats=*/nullptr));
+ DocumentStore::Create(
+ &filesystem_, doc_store_dir_, &fake_clock_, schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
document_store_ = std::move(create_result.document_store);
// Creates a simple email schema
diff --git a/icing/scoring/score-and-rank_benchmark.cc b/icing/scoring/score-and-rank_benchmark.cc
index abb019f..7cb5a95 100644
--- a/icing/scoring/score-and-rank_benchmark.cc
+++ b/icing/scoring/score-and-rank_benchmark.cc
@@ -95,7 +95,8 @@ libtextclassifier3::StatusOr<DocumentStore::CreateResult> CreateDocumentStore(
return DocumentStore::Create(
filesystem, base_dir, clock, schema_store,
/*force_recovery_and_revalidate_documents=*/false,
- /*namespace_id_fingerprint=*/false,
+ /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
PortableFileBackedProtoLog<DocumentWrapper>::kDeflateCompressionLevel,
/*initialize_stats=*/nullptr);
}
diff --git a/icing/scoring/scorer_test.cc b/icing/scoring/scorer_test.cc
index 4a97a87..5194c7f 100644
--- a/icing/scoring/scorer_test.cc
+++ b/icing/scoring/scorer_test.cc
@@ -64,13 +64,14 @@ class ScorerTest : public ::testing::TestWithParam<ScorerTestingMode> {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, doc_store_dir_, &fake_clock1_,
- schema_store_.get(),
- /*force_recovery_and_revalidate_documents=*/false,
- /*namespace_id_fingerprint=*/false,
- PortableFileBackedProtoLog<
- DocumentWrapper>::kDeflateCompressionLevel,
- /*initialize_stats=*/nullptr));
+ DocumentStore::Create(
+ &filesystem_, doc_store_dir_, &fake_clock1_, schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
document_store_ = std::move(create_result.document_store);
// Creates a simple email schema
diff --git a/icing/scoring/scoring-processor_test.cc b/icing/scoring/scoring-processor_test.cc
index 644e013..deddff8 100644
--- a/icing/scoring/scoring-processor_test.cc
+++ b/icing/scoring/scoring-processor_test.cc
@@ -62,13 +62,14 @@ class ScoringProcessorTest
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, doc_store_dir_, &fake_clock_,
- schema_store_.get(),
- /*force_recovery_and_revalidate_documents=*/false,
- /*namespace_id_fingerprint=*/false,
- PortableFileBackedProtoLog<
- DocumentWrapper>::kDeflateCompressionLevel,
- /*initialize_stats=*/nullptr));
+ DocumentStore::Create(
+ &filesystem_, doc_store_dir_, &fake_clock_, schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
document_store_ = std::move(create_result.document_store);
// Creates a simple email schema
diff --git a/icing/store/document-store.cc b/icing/store/document-store.cc
index e99bacf..30de410 100644
--- a/icing/store/document-store.cc
+++ b/icing/store/document-store.cc
@@ -54,6 +54,7 @@
#include "icing/store/document-log-creator.h"
#include "icing/store/dynamic-trie-key-mapper.h"
#include "icing/store/namespace-id.h"
+#include "icing/store/persistent-hash-map-key-mapper.h"
#include "icing/store/usage-store.h"
#include "icing/tokenization/language-segmenter.h"
#include "icing/util/clock.h"
@@ -73,6 +74,7 @@ namespace {
// Used in DocumentId mapper to mark a document as deleted
constexpr int64_t kDocDeletedFlag = -1;
constexpr char kDocumentIdMapperFilename[] = "document_id_mapper";
+constexpr char kUriHashMapperWorkingPath[] = "uri_mapper";
constexpr char kDocumentStoreHeaderFilename[] = "document_store_header";
constexpr char kScoreCacheFilename[] = "score_cache";
constexpr char kCorpusScoreCache[] = "corpus_score_cache";
@@ -81,9 +83,17 @@ constexpr char kNamespaceMapperFilename[] = "namespace_mapper";
constexpr char kUsageStoreDirectoryName[] = "usage_store";
constexpr char kCorpusIdMapperFilename[] = "corpus_mapper";
-// Determined through manual testing to allow for 1 million uris. 1 million
-// because we allow up to 1 million DocumentIds.
-constexpr int32_t kUriMapperMaxSize = 36 * 1024 * 1024; // 36 MiB
+// Determined through manual testing to allow for 4 million uris. 4 million
+// because we allow up to 4 million DocumentIds.
+constexpr int32_t kUriDynamicTrieKeyMapperMaxSize =
+ 144 * 1024 * 1024; // 144 MiB
+
+constexpr int32_t kUriHashKeyMapperMaxNumEntries =
+ kMaxDocumentId + 1; // 1 << 22, 4M
+// - Key: namespace_id_str (3 bytes) + fingerprinted_uri (10 bytes) + '\0' (1
+// byte)
+// - Value: DocumentId (4 bytes)
+constexpr int32_t kUriHashKeyMapperKVByteSize = 13 + 1 + sizeof(DocumentId);
// 384 KiB for a DynamicTrieKeyMapper would allow each internal array to have a
// max of 128 KiB for storage.
@@ -100,6 +110,10 @@ std::string MakeHeaderFilename(const std::string& base_dir) {
return absl_ports::StrCat(base_dir, "/", kDocumentStoreHeaderFilename);
}
+std::string MakeUriHashMapperWorkingPath(const std::string& base_dir) {
+ return absl_ports::StrCat(base_dir, "/", kUriHashMapperWorkingPath);
+}
+
std::string MakeDocumentIdMapperFilename(const std::string& base_dir) {
return absl_ports::StrCat(base_dir, "/", kDocumentIdMapperFilename);
}
@@ -207,6 +221,41 @@ std::unordered_map<NamespaceId, std::string> GetNamespaceIdsToNamespaces(
return namespace_ids_to_namespaces;
}
+libtextclassifier3::StatusOr<std::unique_ptr<
+ KeyMapper<DocumentId, fingerprint_util::FingerprintStringFormatter>>>
+CreateUriMapper(const Filesystem& filesystem, const std::string& base_dir,
+ bool pre_mapping_fbv, bool use_persistent_hash_map) {
+ std::string uri_hash_mapper_working_path =
+ MakeUriHashMapperWorkingPath(base_dir);
+ // Due to historic issue, we use document store's base_dir directly as
+ // DynamicTrieKeyMapper's working directory for uri mapper.
+ // DynamicTrieKeyMapper also creates a subdirectory "key_mapper_dir", so the
+ // actual files will be put under "<base_dir>/key_mapper_dir/".
+ bool dynamic_trie_key_mapper_dir_exists = filesystem.DirectoryExists(
+ absl_ports::StrCat(base_dir, "/key_mapper_dir").c_str());
+ bool persistent_hash_map_dir_exists =
+ filesystem.DirectoryExists(uri_hash_mapper_working_path.c_str());
+ if ((use_persistent_hash_map && dynamic_trie_key_mapper_dir_exists) ||
+ (!use_persistent_hash_map && persistent_hash_map_dir_exists)) {
+ // Return a failure here so that the caller can properly delete and rebuild
+ // this component.
+ return absl_ports::FailedPreconditionError("Key mapper type mismatch");
+ }
+
+ if (use_persistent_hash_map) {
+ return PersistentHashMapKeyMapper<
+ DocumentId, fingerprint_util::FingerprintStringFormatter>::
+ Create(filesystem, std::move(uri_hash_mapper_working_path),
+ pre_mapping_fbv,
+ /*max_num_entries=*/kUriHashKeyMapperMaxNumEntries,
+ /*average_kv_byte_size=*/kUriHashKeyMapperKVByteSize);
+ } else {
+ return DynamicTrieKeyMapper<DocumentId,
+ fingerprint_util::FingerprintStringFormatter>::
+ Create(filesystem, base_dir, kUriDynamicTrieKeyMapperMaxSize);
+ }
+}
+
} // namespace
std::string DocumentStore::MakeFingerprint(
@@ -231,6 +280,7 @@ DocumentStore::DocumentStore(const Filesystem* filesystem,
const Clock* clock,
const SchemaStore* schema_store,
bool namespace_id_fingerprint,
+ bool pre_mapping_fbv, bool use_persistent_hash_map,
int32_t compression_level)
: filesystem_(filesystem),
base_dir_(base_dir),
@@ -238,6 +288,8 @@ DocumentStore::DocumentStore(const Filesystem* filesystem,
schema_store_(schema_store),
document_validator_(schema_store),
namespace_id_fingerprint_(namespace_id_fingerprint),
+ pre_mapping_fbv_(pre_mapping_fbv),
+ use_persistent_hash_map_(use_persistent_hash_map),
compression_level_(compression_level) {}
libtextclassifier3::StatusOr<DocumentId> DocumentStore::Put(
@@ -266,6 +318,7 @@ libtextclassifier3::StatusOr<DocumentStore::CreateResult> DocumentStore::Create(
const Filesystem* filesystem, const std::string& base_dir,
const Clock* clock, const SchemaStore* schema_store,
bool force_recovery_and_revalidate_documents, bool namespace_id_fingerprint,
+ bool pre_mapping_fbv, bool use_persistent_hash_map,
int32_t compression_level, InitializeStatsProto* initialize_stats) {
ICING_RETURN_ERROR_IF_NULL(filesystem);
ICING_RETURN_ERROR_IF_NULL(clock);
@@ -273,7 +326,7 @@ libtextclassifier3::StatusOr<DocumentStore::CreateResult> DocumentStore::Create(
auto document_store = std::unique_ptr<DocumentStore>(new DocumentStore(
filesystem, base_dir, clock, schema_store, namespace_id_fingerprint,
- compression_level));
+ pre_mapping_fbv, use_persistent_hash_map, compression_level));
ICING_ASSIGN_OR_RETURN(
DataLoss data_loss,
document_store->Initialize(force_recovery_and_revalidate_documents,
@@ -293,9 +346,12 @@ libtextclassifier3::StatusOr<DocumentStore::CreateResult> DocumentStore::Create(
return absl_ports::InternalError("Couldn't delete header file");
}
- // Document key mapper
+ // Document key mapper. Doesn't hurt to delete both dynamic trie and
+ // persistent hash map without checking.
ICING_RETURN_IF_ERROR(
DynamicTrieKeyMapper<DocumentId>::Delete(*filesystem, base_dir));
+ ICING_RETURN_IF_ERROR(PersistentHashMapKeyMapper<DocumentId>::Delete(
+ *filesystem, MakeUriHashMapperWorkingPath(base_dir)));
// Document id mapper
ICING_RETURN_IF_ERROR(FileBackedVector<int64_t>::Delete(
@@ -429,11 +485,8 @@ libtextclassifier3::Status DocumentStore::InitializeExistingDerivedFiles() {
// TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
// that can support error logging.
- auto document_key_mapper_or = DynamicTrieKeyMapper<
- DocumentId,
- fingerprint_util::FingerprintStringFormatter>::Create(*filesystem_,
- base_dir_,
- kUriMapperMaxSize);
+ auto document_key_mapper_or = CreateUriMapper(
+ *filesystem_, base_dir_, pre_mapping_fbv_, use_persistent_hash_map_);
if (!document_key_mapper_or.ok()) {
ICING_LOG(ERROR) << document_key_mapper_or.status().error_message()
<< "Failed to initialize KeyMapper";
@@ -646,6 +699,10 @@ libtextclassifier3::Status DocumentStore::RegenerateDerivedFiles(
}
libtextclassifier3::Status DocumentStore::ResetDocumentKeyMapper() {
+ // Only one type of KeyMapper (either DynamicTrieKeyMapper or
+ // PersistentHashMapKeyMapper) will actually exist at any moment, but it is ok
+ // to call Delete() for both since Delete() returns OK if any of them doesn't
+ // exist.
// TODO(b/139734457): Replace ptr.reset()->Delete->Create flow with Reset().
document_key_mapper_.reset();
// TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
@@ -654,17 +711,21 @@ libtextclassifier3::Status DocumentStore::ResetDocumentKeyMapper() {
DynamicTrieKeyMapper<DocumentId>::Delete(*filesystem_, base_dir_);
if (!status.ok()) {
ICING_LOG(ERROR) << status.error_message()
- << "Failed to delete old key mapper";
+ << "Failed to delete old dynamic trie key mapper";
+ return status;
+ }
+ status = PersistentHashMapKeyMapper<DocumentId>::Delete(
+ *filesystem_, MakeUriHashMapperWorkingPath(base_dir_));
+ if (!status.ok()) {
+ ICING_LOG(ERROR) << status.error_message()
+ << "Failed to delete old persistent hash map key mapper";
return status;
}
// TODO(b/216487496): Implement a more robust version of TC_ASSIGN_OR_RETURN
// that can support error logging.
- auto document_key_mapper_or = DynamicTrieKeyMapper<
- DocumentId,
- fingerprint_util::FingerprintStringFormatter>::Create(*filesystem_,
- base_dir_,
- kUriMapperMaxSize);
+ auto document_key_mapper_or = CreateUriMapper(
+ *filesystem_, base_dir_, pre_mapping_fbv_, use_persistent_hash_map_);
if (!document_key_mapper_or.ok()) {
ICING_LOG(ERROR) << document_key_mapper_or.status().error_message()
<< "Failed to re-init key mapper";
@@ -1771,7 +1832,6 @@ libtextclassifier3::Status DocumentStore::Optimize() {
libtextclassifier3::StatusOr<std::vector<DocumentId>>
DocumentStore::OptimizeInto(const std::string& new_directory,
const LanguageSegmenter* lang_segmenter,
- bool namespace_id_fingerprint,
OptimizeStatsProto* stats) {
// Validates directory
if (new_directory == base_dir_) {
@@ -1783,7 +1843,8 @@ DocumentStore::OptimizeInto(const std::string& new_directory,
auto doc_store_create_result,
DocumentStore::Create(filesystem_, new_directory, &clock_, schema_store_,
/*force_recovery_and_revalidate_documents=*/false,
- namespace_id_fingerprint, compression_level_,
+ namespace_id_fingerprint_, pre_mapping_fbv_,
+ use_persistent_hash_map_, compression_level_,
/*initialize_stats=*/nullptr));
std::unique_ptr<DocumentStore> new_doc_store =
std::move(doc_store_create_result.document_store);
diff --git a/icing/store/document-store.h b/icing/store/document-store.h
index 3941f6d..92d4286 100644
--- a/icing/store/document-store.h
+++ b/icing/store/document-store.h
@@ -142,8 +142,8 @@ class DocumentStore {
const Filesystem* filesystem, const std::string& base_dir,
const Clock* clock, const SchemaStore* schema_store,
bool force_recovery_and_revalidate_documents,
- bool namespace_id_fingerprint,
- int32_t compression_level,
+ bool namespace_id_fingerprint, bool pre_mapping_fbv,
+ bool use_persistent_hash_map, int32_t compression_level,
InitializeStatsProto* initialize_stats);
// Discards all derived data in the document store.
@@ -456,7 +456,7 @@ class DocumentStore {
// INTERNAL_ERROR on IO error
libtextclassifier3::StatusOr<std::vector<DocumentId>> OptimizeInto(
const std::string& new_directory, const LanguageSegmenter* lang_segmenter,
- bool namespace_id_fingerprint, OptimizeStatsProto* stats = nullptr);
+ OptimizeStatsProto* stats = nullptr);
// Calculates status for a potential Optimize call. Includes how many docs
// there are vs how many would be optimized away. And also includes an
@@ -488,9 +488,12 @@ class DocumentStore {
private:
// Use DocumentStore::Create() to instantiate.
- DocumentStore(const Filesystem* filesystem, std::string_view base_dir,
- const Clock* clock, const SchemaStore* schema_store,
- bool namespace_id_fingerprint, int32_t compression_level);
+ explicit DocumentStore(const Filesystem* filesystem,
+ std::string_view base_dir, const Clock* clock,
+ const SchemaStore* schema_store,
+ bool namespace_id_fingerprint, bool pre_mapping_fbv,
+ bool use_persistent_hash_map,
+ int32_t compression_level);
const Filesystem* const filesystem_;
const std::string base_dir_;
@@ -507,6 +510,15 @@ class DocumentStore {
// document_key_mapper_ and corpus_mapper_.
bool namespace_id_fingerprint_;
+ // Flag indicating whether memory map max possible file size for underlying
+ // FileBackedVector before growing the actual file size.
+ bool pre_mapping_fbv_;
+
+ // Flag indicating whether use persistent hash map as the key mapper (if
+ // false, then fall back to dynamic trie key mapper). Note: we only use
+ // persistent hash map for uri mapper if it is true.
+ bool use_persistent_hash_map_;
+
const int32_t compression_level_;
// A log used to store all documents, it serves as a ground truth of doc
diff --git a/icing/store/document-store_benchmark.cc b/icing/store/document-store_benchmark.cc
index 75995e9..5b9c568 100644
--- a/icing/store/document-store_benchmark.cc
+++ b/icing/store/document-store_benchmark.cc
@@ -132,7 +132,8 @@ libtextclassifier3::StatusOr<DocumentStore::CreateResult> CreateDocumentStore(
return DocumentStore::Create(
filesystem, base_dir, clock, schema_store,
/*force_recovery_and_revalidate_documents=*/false,
- /*namespace_id_fingerprint=*/false,
+ /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
+ /*use_persistent_hash_map=*/false,
PortableFileBackedProtoLog<DocumentWrapper>::kDeflateCompressionLevel,
/*initialize_stats=*/nullptr);
}
diff --git a/icing/store/document-store_test.cc b/icing/store/document-store_test.cc
index 9a1f4a6..a9c47f0 100644
--- a/icing/store/document-store_test.cc
+++ b/icing/store/document-store_test.cc
@@ -71,6 +71,7 @@ using ::testing::Ge;
using ::testing::Gt;
using ::testing::HasSubstr;
using ::testing::IsEmpty;
+using ::testing::IsFalse;
using ::testing::IsTrue;
using ::testing::Not;
using ::testing::Return;
@@ -120,7 +121,21 @@ void WriteDocumentLogHeader(
sizeof(PortableFileBackedProtoLog<DocumentWrapper>::Header));
}
-class DocumentStoreTest : public ::testing::Test {
+struct DocumentStoreTestParam {
+ bool namespace_id_fingerprint;
+ bool pre_mapping_fbv;
+ bool use_persistent_hash_map;
+
+ explicit DocumentStoreTestParam(bool namespace_id_fingerprint_in,
+ bool pre_mapping_fbv_in,
+ bool use_persistent_hash_map_in)
+ : namespace_id_fingerprint(namespace_id_fingerprint_in),
+ pre_mapping_fbv(pre_mapping_fbv_in),
+ use_persistent_hash_map(use_persistent_hash_map_in) {}
+};
+
+class DocumentStoreTest
+ : public ::testing::TestWithParam<DocumentStoreTestParam> {
protected:
DocumentStoreTest()
: test_dir_(GetTestTempDir() + "/icing"),
@@ -213,7 +228,7 @@ class DocumentStoreTest : public ::testing::Test {
absl_ports::StrCat(document_store_dir_, "/document_store_header");
DocumentStore::Header header;
header.magic = DocumentStore::Header::GetCurrentMagic(
- /*namespace_id_fingerprint=*/false);
+ GetParam().namespace_id_fingerprint);
header.checksum = 10; // Arbitrary garbage checksum
filesystem_.DeleteFile(header_file.c_str());
filesystem_.Write(header_file.c_str(), &header, sizeof(header));
@@ -225,7 +240,8 @@ class DocumentStoreTest : public ::testing::Test {
return DocumentStore::Create(
filesystem, base_dir, clock, schema_store,
/*force_recovery_and_revalidate_documents=*/false,
- /*namespace_id_fingerprint=*/false,
+ GetParam().namespace_id_fingerprint, GetParam().pre_mapping_fbv,
+ GetParam().use_persistent_hash_map,
PortableFileBackedProtoLog<DocumentWrapper>::kDeflateCompressionLevel,
/*initialize_stats=*/nullptr);
}
@@ -254,7 +270,7 @@ class DocumentStoreTest : public ::testing::Test {
const int64_t document2_expiration_timestamp_ = 3; // creation + ttl
};
-TEST_F(DocumentStoreTest, CreationWithNullPointerShouldFail) {
+TEST_P(DocumentStoreTest, CreationWithNullPointerShouldFail) {
EXPECT_THAT(CreateDocumentStore(/*filesystem=*/nullptr, document_store_dir_,
&fake_clock_, schema_store_.get()),
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
@@ -268,7 +284,7 @@ TEST_F(DocumentStoreTest, CreationWithNullPointerShouldFail) {
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
}
-TEST_F(DocumentStoreTest, CreationWithBadFilesystemShouldFail) {
+TEST_P(DocumentStoreTest, CreationWithBadFilesystemShouldFail) {
MockFilesystem mock_filesystem;
ON_CALL(mock_filesystem, OpenForWrite(_)).WillByDefault(Return(false));
@@ -277,7 +293,7 @@ TEST_F(DocumentStoreTest, CreationWithBadFilesystemShouldFail) {
StatusIs(libtextclassifier3::StatusCode::INTERNAL));
}
-TEST_F(DocumentStoreTest, PutAndGetInSameNamespaceOk) {
+TEST_P(DocumentStoreTest, PutAndGetInSameNamespaceOk) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
@@ -297,7 +313,7 @@ TEST_F(DocumentStoreTest, PutAndGetInSameNamespaceOk) {
IsOkAndHolds(EqualsProto(test_document2_)));
}
-TEST_F(DocumentStoreTest, PutAndGetAcrossNamespacesOk) {
+TEST_P(DocumentStoreTest, PutAndGetAcrossNamespacesOk) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
@@ -330,7 +346,7 @@ TEST_F(DocumentStoreTest, PutAndGetAcrossNamespacesOk) {
// Validates that putting an document with the same key will overwrite previous
// document and old doc ids are not getting reused.
-TEST_F(DocumentStoreTest, PutSameKey) {
+TEST_P(DocumentStoreTest, PutSameKey) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
@@ -359,7 +375,7 @@ TEST_F(DocumentStoreTest, PutSameKey) {
EXPECT_THAT(doc_store->Put(document3), IsOkAndHolds(Not(document_id1)));
}
-TEST_F(DocumentStoreTest, IsDocumentExistingWithoutStatus) {
+TEST_P(DocumentStoreTest, IsDocumentExistingWithoutStatus) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
@@ -395,7 +411,7 @@ TEST_F(DocumentStoreTest, IsDocumentExistingWithoutStatus) {
fake_clock_.GetSystemTimeMilliseconds()));
}
-TEST_F(DocumentStoreTest, GetDeletedDocumentNotFound) {
+TEST_P(DocumentStoreTest, GetDeletedDocumentNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
@@ -416,7 +432,7 @@ TEST_F(DocumentStoreTest, GetDeletedDocumentNotFound) {
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
-TEST_F(DocumentStoreTest, GetExpiredDocumentNotFound) {
+TEST_P(DocumentStoreTest, GetExpiredDocumentNotFound) {
DocumentProto document = DocumentBuilder()
.SetKey("namespace", "uri")
.SetSchema("email")
@@ -451,7 +467,7 @@ TEST_F(DocumentStoreTest, GetExpiredDocumentNotFound) {
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
-TEST_F(DocumentStoreTest, GetInvalidDocumentId) {
+TEST_P(DocumentStoreTest, GetInvalidDocumentId) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
@@ -478,7 +494,7 @@ TEST_F(DocumentStoreTest, GetInvalidDocumentId) {
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
-TEST_F(DocumentStoreTest, DeleteNonexistentDocumentNotFound) {
+TEST_P(DocumentStoreTest, DeleteNonexistentDocumentNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
@@ -504,7 +520,7 @@ TEST_F(DocumentStoreTest, DeleteNonexistentDocumentNotFound) {
EXPECT_THAT(document_log_size_before, Eq(document_log_size_after));
}
-TEST_F(DocumentStoreTest, DeleteNonexistentDocumentPrintableErrorMessage) {
+TEST_P(DocumentStoreTest, DeleteNonexistentDocumentPrintableErrorMessage) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
@@ -533,7 +549,7 @@ TEST_F(DocumentStoreTest, DeleteNonexistentDocumentPrintableErrorMessage) {
EXPECT_THAT(document_log_size_before, Eq(document_log_size_after));
}
-TEST_F(DocumentStoreTest, DeleteAlreadyDeletedDocumentNotFound) {
+TEST_P(DocumentStoreTest, DeleteAlreadyDeletedDocumentNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
@@ -555,7 +571,7 @@ TEST_F(DocumentStoreTest, DeleteAlreadyDeletedDocumentNotFound) {
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
-TEST_F(DocumentStoreTest, DeleteByNamespaceOk) {
+TEST_P(DocumentStoreTest, DeleteByNamespaceOk) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
@@ -599,7 +615,7 @@ TEST_F(DocumentStoreTest, DeleteByNamespaceOk) {
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
-TEST_F(DocumentStoreTest, DeleteByNamespaceNonexistentNamespaceNotFound) {
+TEST_P(DocumentStoreTest, DeleteByNamespaceNonexistentNamespaceNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
@@ -624,7 +640,7 @@ TEST_F(DocumentStoreTest, DeleteByNamespaceNonexistentNamespaceNotFound) {
EXPECT_THAT(document_log_size_before, Eq(document_log_size_after));
}
-TEST_F(DocumentStoreTest, DeleteByNamespaceNoExistingDocumentsNotFound) {
+TEST_P(DocumentStoreTest, DeleteByNamespaceNoExistingDocumentsNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
@@ -645,7 +661,7 @@ TEST_F(DocumentStoreTest, DeleteByNamespaceNoExistingDocumentsNotFound) {
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
-TEST_F(DocumentStoreTest, DeleteByNamespaceRecoversOk) {
+TEST_P(DocumentStoreTest, DeleteByNamespaceRecoversOk) {
DocumentProto document1 = test_document1_;
document1.set_namespace_("namespace.1");
document1.set_uri("uri1");
@@ -715,7 +731,7 @@ TEST_F(DocumentStoreTest, DeleteByNamespaceRecoversOk) {
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
-TEST_F(DocumentStoreTest, DeleteBySchemaTypeOk) {
+TEST_P(DocumentStoreTest, DeleteBySchemaTypeOk) {
SchemaProto schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
@@ -802,7 +818,7 @@ TEST_F(DocumentStoreTest, DeleteBySchemaTypeOk) {
IsOkAndHolds(EqualsProto(person_document)));
}
-TEST_F(DocumentStoreTest, DeleteBySchemaTypeNonexistentSchemaTypeNotFound) {
+TEST_P(DocumentStoreTest, DeleteBySchemaTypeNonexistentSchemaTypeNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
@@ -828,7 +844,7 @@ TEST_F(DocumentStoreTest, DeleteBySchemaTypeNonexistentSchemaTypeNotFound) {
EXPECT_THAT(document_log_size_before, Eq(document_log_size_after));
}
-TEST_F(DocumentStoreTest, DeleteBySchemaTypeNoExistingDocumentsNotFound) {
+TEST_P(DocumentStoreTest, DeleteBySchemaTypeNoExistingDocumentsNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
@@ -846,7 +862,7 @@ TEST_F(DocumentStoreTest, DeleteBySchemaTypeNoExistingDocumentsNotFound) {
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
-TEST_F(DocumentStoreTest, DeleteBySchemaTypeRecoversOk) {
+TEST_P(DocumentStoreTest, DeleteBySchemaTypeRecoversOk) {
SchemaProto schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
@@ -926,7 +942,7 @@ TEST_F(DocumentStoreTest, DeleteBySchemaTypeRecoversOk) {
IsOkAndHolds(EqualsProto(message_document)));
}
-TEST_F(DocumentStoreTest, PutDeleteThenPut) {
+TEST_P(DocumentStoreTest, PutDeleteThenPut) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
@@ -940,7 +956,7 @@ TEST_F(DocumentStoreTest, PutDeleteThenPut) {
ICING_EXPECT_OK(doc_store->Put(test_document1_));
}
-TEST_F(DocumentStoreTest, DeletedSchemaTypeFromSchemaStoreRecoversOk) {
+TEST_P(DocumentStoreTest, DeletedSchemaTypeFromSchemaStoreRecoversOk) {
SchemaProto schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
@@ -1034,7 +1050,7 @@ TEST_F(DocumentStoreTest, DeletedSchemaTypeFromSchemaStoreRecoversOk) {
IsOkAndHolds(EqualsProto(message_document)));
}
-TEST_F(DocumentStoreTest, OptimizeInto) {
+TEST_P(DocumentStoreTest, OptimizeInto) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
@@ -1078,8 +1094,7 @@ TEST_F(DocumentStoreTest, OptimizeInto) {
// Optimizing into the same directory is not allowed
EXPECT_THAT(
- doc_store->OptimizeInto(document_store_dir_, lang_segmenter_.get(),
- /*namespace_id_fingerprint=*/false),
+ doc_store->OptimizeInto(document_store_dir_, lang_segmenter_.get()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
HasSubstr("directory is the same")));
@@ -1091,8 +1106,7 @@ TEST_F(DocumentStoreTest, OptimizeInto) {
// deleted
ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str()));
ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str()));
- EXPECT_THAT(doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get(),
- /*namespace_id_fingerprint=*/false),
+ EXPECT_THAT(doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()),
IsOkAndHolds(ElementsAre(0, 1, 2)));
int64_t optimized_size1 =
filesystem_.GetFileSize(optimized_document_log.c_str());
@@ -1105,8 +1119,7 @@ TEST_F(DocumentStoreTest, OptimizeInto) {
ICING_ASSERT_OK(doc_store->Delete("namespace", "uri1",
fake_clock_.GetSystemTimeMilliseconds()));
// DocumentId 0 is removed.
- EXPECT_THAT(doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get(),
- /*namespace_id_fingerprint=*/false),
+ EXPECT_THAT(doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()),
IsOkAndHolds(ElementsAre(kInvalidDocumentId, 0, 1)));
int64_t optimized_size2 =
filesystem_.GetFileSize(optimized_document_log.c_str());
@@ -1122,8 +1135,7 @@ TEST_F(DocumentStoreTest, OptimizeInto) {
ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str()));
// DocumentId 0 is removed, and DocumentId 2 is expired.
EXPECT_THAT(
- doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get(),
- /*namespace_id_fingerprint=*/false),
+ doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()),
IsOkAndHolds(ElementsAre(kInvalidDocumentId, 0, kInvalidDocumentId)));
int64_t optimized_size3 =
filesystem_.GetFileSize(optimized_document_log.c_str());
@@ -1135,8 +1147,7 @@ TEST_F(DocumentStoreTest, OptimizeInto) {
ICING_ASSERT_OK(doc_store->Delete("namespace", "uri2",
fake_clock_.GetSystemTimeMilliseconds()));
// DocumentId 0 and 1 is removed, and DocumentId 2 is expired.
- EXPECT_THAT(doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get(),
- /*namespace_id_fingerprint=*/false),
+ EXPECT_THAT(doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()),
IsOkAndHolds(ElementsAre(kInvalidDocumentId, kInvalidDocumentId,
kInvalidDocumentId)));
int64_t optimized_size4 =
@@ -1144,7 +1155,7 @@ TEST_F(DocumentStoreTest, OptimizeInto) {
EXPECT_THAT(optimized_size3, Gt(optimized_size4));
}
-TEST_F(DocumentStoreTest, OptimizeIntoForEmptyDocumentStore) {
+TEST_P(DocumentStoreTest, OptimizeIntoForEmptyDocumentStore) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
@@ -1154,12 +1165,11 @@ TEST_F(DocumentStoreTest, OptimizeIntoForEmptyDocumentStore) {
std::string optimized_dir = document_store_dir_ + "_optimize";
ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str()));
ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str()));
- EXPECT_THAT(doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get(),
- /*namespace_id_fingerprint=*/false),
+ EXPECT_THAT(doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()),
IsOkAndHolds(IsEmpty()));
}
-TEST_F(DocumentStoreTest, ShouldRecoverFromDataLoss) {
+TEST_P(DocumentStoreTest, ShouldRecoverFromDataLoss) {
DocumentId document_id1, document_id2;
{
// Can put and delete fine.
@@ -1250,7 +1260,7 @@ TEST_F(DocumentStoreTest, ShouldRecoverFromDataLoss) {
/*num_docs=*/1, /*sum_length_in_tokens=*/4)));
}
-TEST_F(DocumentStoreTest, ShouldRecoverFromCorruptDerivedFile) {
+TEST_P(DocumentStoreTest, ShouldRecoverFromCorruptDerivedFile) {
DocumentId document_id1, document_id2;
{
// Can put and delete fine.
@@ -1361,7 +1371,7 @@ TEST_F(DocumentStoreTest, ShouldRecoverFromCorruptDerivedFile) {
EXPECT_THAT(actual_scores, Eq(expected_scores));
}
-TEST_F(DocumentStoreTest, ShouldRecoverFromDiscardDerivedFiles) {
+TEST_P(DocumentStoreTest, ShouldRecoverFromDiscardDerivedFiles) {
DocumentId document_id1, document_id2;
{
// Can put and delete fine.
@@ -1459,7 +1469,7 @@ TEST_F(DocumentStoreTest, ShouldRecoverFromDiscardDerivedFiles) {
EXPECT_THAT(actual_scores, Eq(expected_scores));
}
-TEST_F(DocumentStoreTest, ShouldRecoverFromBadChecksum) {
+TEST_P(DocumentStoreTest, ShouldRecoverFromBadChecksum) {
DocumentId document_id1, document_id2;
{
// Can put and delete fine.
@@ -1537,7 +1547,7 @@ TEST_F(DocumentStoreTest, ShouldRecoverFromBadChecksum) {
/*num_docs=*/1, /*sum_length_in_tokens=*/4)));
}
-TEST_F(DocumentStoreTest, GetStorageInfo) {
+TEST_P(DocumentStoreTest, GetStorageInfo) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
@@ -1580,7 +1590,7 @@ TEST_F(DocumentStoreTest, GetStorageInfo) {
EXPECT_THAT(doc_store_storage_info.document_store_size(), Eq(-1));
}
-TEST_F(DocumentStoreTest, MaxDocumentId) {
+TEST_P(DocumentStoreTest, MaxDocumentId) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
@@ -1605,7 +1615,7 @@ TEST_F(DocumentStoreTest, MaxDocumentId) {
EXPECT_THAT(doc_store->last_added_document_id(), Eq(document_id2));
}
-TEST_F(DocumentStoreTest, GetNamespaceId) {
+TEST_P(DocumentStoreTest, GetNamespaceId) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
@@ -1638,7 +1648,7 @@ TEST_F(DocumentStoreTest, GetNamespaceId) {
EXPECT_THAT(doc_store->GetNamespaceId("namespace1"), IsOkAndHolds(Eq(0)));
}
-TEST_F(DocumentStoreTest, GetDuplicateNamespaceId) {
+TEST_P(DocumentStoreTest, GetDuplicateNamespaceId) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
@@ -1658,7 +1668,7 @@ TEST_F(DocumentStoreTest, GetDuplicateNamespaceId) {
EXPECT_THAT(doc_store->GetNamespaceId("namespace"), IsOkAndHolds(Eq(0)));
}
-TEST_F(DocumentStoreTest, NonexistentNamespaceNotFound) {
+TEST_P(DocumentStoreTest, NonexistentNamespaceNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
@@ -1670,7 +1680,7 @@ TEST_F(DocumentStoreTest, NonexistentNamespaceNotFound) {
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
-TEST_F(DocumentStoreTest, GetCorpusDuplicateCorpusId) {
+TEST_P(DocumentStoreTest, GetCorpusDuplicateCorpusId) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
@@ -1691,7 +1701,7 @@ TEST_F(DocumentStoreTest, GetCorpusDuplicateCorpusId) {
IsOkAndHolds(Eq(0)));
}
-TEST_F(DocumentStoreTest, GetCorpusId) {
+TEST_P(DocumentStoreTest, GetCorpusId) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
@@ -1726,7 +1736,7 @@ TEST_F(DocumentStoreTest, GetCorpusId) {
EXPECT_THAT(doc_store->GetNamespaceId("namespace1"), IsOkAndHolds(Eq(0)));
}
-TEST_F(DocumentStoreTest, NonexistentCorpusNotFound) {
+TEST_P(DocumentStoreTest, NonexistentCorpusNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
@@ -1750,7 +1760,7 @@ TEST_F(DocumentStoreTest, NonexistentCorpusNotFound) {
StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
}
-TEST_F(DocumentStoreTest, GetCorpusAssociatedScoreDataSameCorpus) {
+TEST_P(DocumentStoreTest, GetCorpusAssociatedScoreDataSameCorpus) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
@@ -1775,7 +1785,7 @@ TEST_F(DocumentStoreTest, GetCorpusAssociatedScoreDataSameCorpus) {
StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
}
-TEST_F(DocumentStoreTest, GetCorpusAssociatedScoreData) {
+TEST_P(DocumentStoreTest, GetCorpusAssociatedScoreData) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
@@ -1813,7 +1823,7 @@ TEST_F(DocumentStoreTest, GetCorpusAssociatedScoreData) {
/*num_docs=*/1, /*sum_length_in_tokens=*/5)));
}
-TEST_F(DocumentStoreTest, NonexistentCorpusAssociatedScoreDataOutOfRange) {
+TEST_P(DocumentStoreTest, NonexistentCorpusAssociatedScoreDataOutOfRange) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
@@ -1825,7 +1835,7 @@ TEST_F(DocumentStoreTest, NonexistentCorpusAssociatedScoreDataOutOfRange) {
StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
}
-TEST_F(DocumentStoreTest, GetDocumentAssociatedScoreDataSameCorpus) {
+TEST_P(DocumentStoreTest, GetDocumentAssociatedScoreDataSameCorpus) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
@@ -1869,7 +1879,7 @@ TEST_F(DocumentStoreTest, GetDocumentAssociatedScoreDataSameCorpus) {
/*length_in_tokens=*/7)));
}
-TEST_F(DocumentStoreTest, GetDocumentAssociatedScoreDataDifferentCorpus) {
+TEST_P(DocumentStoreTest, GetDocumentAssociatedScoreDataDifferentCorpus) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
@@ -1913,7 +1923,7 @@ TEST_F(DocumentStoreTest, GetDocumentAssociatedScoreDataDifferentCorpus) {
/*length_in_tokens=*/7)));
}
-TEST_F(DocumentStoreTest, NonexistentDocumentAssociatedScoreDataNotFound) {
+TEST_P(DocumentStoreTest, NonexistentDocumentAssociatedScoreDataNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
@@ -1925,7 +1935,7 @@ TEST_F(DocumentStoreTest, NonexistentDocumentAssociatedScoreDataNotFound) {
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
-TEST_F(DocumentStoreTest, NonexistentDocumentFilterDataNotFound) {
+TEST_P(DocumentStoreTest, NonexistentDocumentFilterDataNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
@@ -1937,7 +1947,7 @@ TEST_F(DocumentStoreTest, NonexistentDocumentFilterDataNotFound) {
/*document_id=*/0, fake_clock_.GetSystemTimeMilliseconds()));
}
-TEST_F(DocumentStoreTest, DeleteClearsFilterCache) {
+TEST_P(DocumentStoreTest, DeleteClearsFilterCache) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
@@ -1964,7 +1974,7 @@ TEST_F(DocumentStoreTest, DeleteClearsFilterCache) {
document_id, fake_clock_.GetSystemTimeMilliseconds()));
}
-TEST_F(DocumentStoreTest, DeleteClearsScoreCache) {
+TEST_P(DocumentStoreTest, DeleteClearsScoreCache) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
@@ -1993,7 +2003,7 @@ TEST_F(DocumentStoreTest, DeleteClearsScoreCache) {
/*length_in_tokens=*/0)));
}
-TEST_F(DocumentStoreTest, DeleteShouldPreventUsageScores) {
+TEST_P(DocumentStoreTest, DeleteShouldPreventUsageScores) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
@@ -2032,7 +2042,7 @@ TEST_F(DocumentStoreTest, DeleteShouldPreventUsageScores) {
document_id, fake_clock_.GetSystemTimeMilliseconds()));
}
-TEST_F(DocumentStoreTest, ExpirationShouldPreventUsageScores) {
+TEST_P(DocumentStoreTest, ExpirationShouldPreventUsageScores) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
@@ -2082,7 +2092,7 @@ TEST_F(DocumentStoreTest, ExpirationShouldPreventUsageScores) {
document_id, fake_clock_.GetSystemTimeMilliseconds()));
}
-TEST_F(DocumentStoreTest,
+TEST_P(DocumentStoreTest,
ExpirationTimestampIsSumOfNonZeroTtlAndCreationTimestamp) {
DocumentProto document = DocumentBuilder()
.SetKey("namespace1", "1")
@@ -2109,7 +2119,7 @@ TEST_F(DocumentStoreTest,
/*expiration_timestamp_ms=*/1100)));
}
-TEST_F(DocumentStoreTest, ExpirationTimestampIsInt64MaxIfTtlIsZero) {
+TEST_P(DocumentStoreTest, ExpirationTimestampIsInt64MaxIfTtlIsZero) {
DocumentProto document = DocumentBuilder()
.SetKey("namespace1", "1")
.SetSchema("email")
@@ -2139,7 +2149,7 @@ TEST_F(DocumentStoreTest, ExpirationTimestampIsInt64MaxIfTtlIsZero) {
/*expiration_timestamp_ms=*/std::numeric_limits<int64_t>::max())));
}
-TEST_F(DocumentStoreTest, ExpirationTimestampIsInt64MaxOnOverflow) {
+TEST_P(DocumentStoreTest, ExpirationTimestampIsInt64MaxOnOverflow) {
DocumentProto document =
DocumentBuilder()
.SetKey("namespace1", "1")
@@ -2170,7 +2180,7 @@ TEST_F(DocumentStoreTest, ExpirationTimestampIsInt64MaxOnOverflow) {
/*expiration_timestamp_ms=*/std::numeric_limits<int64_t>::max())));
}
-TEST_F(DocumentStoreTest, CreationTimestampShouldBePopulated) {
+TEST_P(DocumentStoreTest, CreationTimestampShouldBePopulated) {
// Creates a document without a given creation timestamp
DocumentProto document_without_creation_timestamp =
DocumentBuilder()
@@ -2201,7 +2211,7 @@ TEST_F(DocumentStoreTest, CreationTimestampShouldBePopulated) {
Eq(fake_real_time));
}
-TEST_F(DocumentStoreTest, ShouldWriteAndReadScoresCorrectly) {
+TEST_P(DocumentStoreTest, ShouldWriteAndReadScoresCorrectly) {
DocumentProto document1 = DocumentBuilder()
.SetKey("icing", "email/1")
.SetSchema("email")
@@ -2240,7 +2250,7 @@ TEST_F(DocumentStoreTest, ShouldWriteAndReadScoresCorrectly) {
/*length_in_tokens=*/0)));
}
-TEST_F(DocumentStoreTest, ComputeChecksumSameBetweenCalls) {
+TEST_P(DocumentStoreTest, ComputeChecksumSameBetweenCalls) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
@@ -2255,7 +2265,7 @@ TEST_F(DocumentStoreTest, ComputeChecksumSameBetweenCalls) {
EXPECT_THAT(document_store->ComputeChecksum(), IsOkAndHolds(checksum));
}
-TEST_F(DocumentStoreTest, ComputeChecksumSameAcrossInstances) {
+TEST_P(DocumentStoreTest, ComputeChecksumSameAcrossInstances) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
@@ -2276,7 +2286,7 @@ TEST_F(DocumentStoreTest, ComputeChecksumSameAcrossInstances) {
EXPECT_THAT(document_store->ComputeChecksum(), IsOkAndHolds(checksum));
}
-TEST_F(DocumentStoreTest, ComputeChecksumChangesOnNewDocument) {
+TEST_P(DocumentStoreTest, ComputeChecksumChangesOnNewDocument) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
@@ -2292,7 +2302,7 @@ TEST_F(DocumentStoreTest, ComputeChecksumChangesOnNewDocument) {
IsOkAndHolds(Not(Eq(checksum))));
}
-TEST_F(DocumentStoreTest, ComputeChecksumDoesntChangeOnNewUsage) {
+TEST_P(DocumentStoreTest, ComputeChecksumDoesntChangeOnNewUsage) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
@@ -2310,7 +2320,7 @@ TEST_F(DocumentStoreTest, ComputeChecksumDoesntChangeOnNewUsage) {
EXPECT_THAT(document_store->ComputeChecksum(), IsOkAndHolds(Eq(checksum)));
}
-TEST_F(DocumentStoreTest, RegenerateDerivedFilesSkipsUnknownSchemaTypeIds) {
+TEST_P(DocumentStoreTest, RegenerateDerivedFilesSkipsUnknownSchemaTypeIds) {
const std::string schema_store_dir = schema_store_dir_ + "_custom";
DocumentId email_document_id;
@@ -2445,7 +2455,7 @@ TEST_F(DocumentStoreTest, RegenerateDerivedFilesSkipsUnknownSchemaTypeIds) {
Eq(message_expiration_timestamp));
}
-TEST_F(DocumentStoreTest, UpdateSchemaStoreUpdatesSchemaTypeIds) {
+TEST_P(DocumentStoreTest, UpdateSchemaStoreUpdatesSchemaTypeIds) {
const std::string schema_store_dir = test_dir_ + "_custom";
filesystem_.DeleteDirectoryRecursively(schema_store_dir.c_str());
filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
@@ -2541,7 +2551,7 @@ TEST_F(DocumentStoreTest, UpdateSchemaStoreUpdatesSchemaTypeIds) {
EXPECT_THAT(message_data.schema_type_id(), Eq(new_message_schema_type_id));
}
-TEST_F(DocumentStoreTest, UpdateSchemaStoreDeletesInvalidDocuments) {
+TEST_P(DocumentStoreTest, UpdateSchemaStoreDeletesInvalidDocuments) {
const std::string schema_store_dir = test_dir_ + "_custom";
filesystem_.DeleteDirectoryRecursively(schema_store_dir.c_str());
filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
@@ -2617,7 +2627,7 @@ TEST_F(DocumentStoreTest, UpdateSchemaStoreDeletesInvalidDocuments) {
IsOkAndHolds(EqualsProto(email_with_subject)));
}
-TEST_F(DocumentStoreTest,
+TEST_P(DocumentStoreTest,
UpdateSchemaStoreDeletesDocumentsByDeletedSchemaType) {
const std::string schema_store_dir = test_dir_ + "_custom";
filesystem_.DeleteDirectoryRecursively(schema_store_dir.c_str());
@@ -2691,7 +2701,7 @@ TEST_F(DocumentStoreTest,
IsOkAndHolds(EqualsProto(message_document)));
}
-TEST_F(DocumentStoreTest, OptimizedUpdateSchemaStoreUpdatesSchemaTypeIds) {
+TEST_P(DocumentStoreTest, OptimizedUpdateSchemaStoreUpdatesSchemaTypeIds) {
const std::string schema_store_dir = test_dir_ + "_custom";
filesystem_.DeleteDirectoryRecursively(schema_store_dir.c_str());
filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
@@ -2790,7 +2800,7 @@ TEST_F(DocumentStoreTest, OptimizedUpdateSchemaStoreUpdatesSchemaTypeIds) {
EXPECT_THAT(message_data.schema_type_id(), Eq(new_message_schema_type_id));
}
-TEST_F(DocumentStoreTest, OptimizedUpdateSchemaStoreDeletesInvalidDocuments) {
+TEST_P(DocumentStoreTest, OptimizedUpdateSchemaStoreDeletesInvalidDocuments) {
const std::string schema_store_dir = test_dir_ + "_custom";
filesystem_.DeleteDirectoryRecursively(schema_store_dir.c_str());
filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
@@ -2869,7 +2879,7 @@ TEST_F(DocumentStoreTest, OptimizedUpdateSchemaStoreDeletesInvalidDocuments) {
IsOkAndHolds(EqualsProto(email_with_subject)));
}
-TEST_F(DocumentStoreTest,
+TEST_P(DocumentStoreTest,
OptimizedUpdateSchemaStoreDeletesDocumentsByDeletedSchemaType) {
const std::string schema_store_dir = test_dir_ + "_custom";
filesystem_.DeleteDirectoryRecursively(schema_store_dir.c_str());
@@ -2945,7 +2955,7 @@ TEST_F(DocumentStoreTest,
IsOkAndHolds(EqualsProto(message_document)));
}
-TEST_F(DocumentStoreTest, GetOptimizeInfo) {
+TEST_P(DocumentStoreTest, GetOptimizeInfo) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
@@ -2983,8 +2993,7 @@ TEST_F(DocumentStoreTest, GetOptimizeInfo) {
EXPECT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str()));
EXPECT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str()));
ICING_ASSERT_OK(
- document_store->OptimizeInto(optimized_dir, lang_segmenter_.get(),
- /*namespace_id_fingerprint=*/false));
+ document_store->OptimizeInto(optimized_dir, lang_segmenter_.get()));
document_store.reset();
ICING_ASSERT_OK_AND_ASSIGN(
create_result, CreateDocumentStore(&filesystem_, optimized_dir,
@@ -2999,7 +3008,7 @@ TEST_F(DocumentStoreTest, GetOptimizeInfo) {
EXPECT_THAT(optimize_info.estimated_optimizable_bytes, Eq(0));
}
-TEST_F(DocumentStoreTest, GetAllNamespaces) {
+TEST_P(DocumentStoreTest, GetAllNamespaces) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
@@ -3071,7 +3080,7 @@ TEST_F(DocumentStoreTest, GetAllNamespaces) {
UnorderedElementsAre("namespace1"));
}
-TEST_F(DocumentStoreTest, ReportUsageWithDifferentTimestampsAndGetUsageScores) {
+TEST_P(DocumentStoreTest, ReportUsageWithDifferentTimestampsAndGetUsageScores) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
@@ -3163,7 +3172,7 @@ TEST_F(DocumentStoreTest, ReportUsageWithDifferentTimestampsAndGetUsageScores) {
EXPECT_THAT(actual_scores, Eq(expected_scores));
}
-TEST_F(DocumentStoreTest, ReportUsageWithDifferentTypesAndGetUsageScores) {
+TEST_P(DocumentStoreTest, ReportUsageWithDifferentTypesAndGetUsageScores) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
@@ -3213,7 +3222,7 @@ TEST_F(DocumentStoreTest, ReportUsageWithDifferentTypesAndGetUsageScores) {
EXPECT_THAT(actual_scores, Eq(expected_scores));
}
-TEST_F(DocumentStoreTest, UsageScoresShouldNotBeClearedOnChecksumMismatch) {
+TEST_P(DocumentStoreTest, UsageScoresShouldNotBeClearedOnChecksumMismatch) {
UsageStore::UsageScores expected_scores;
DocumentId document_id;
{
@@ -3258,7 +3267,7 @@ TEST_F(DocumentStoreTest, UsageScoresShouldNotBeClearedOnChecksumMismatch) {
EXPECT_THAT(actual_scores, Eq(expected_scores));
}
-TEST_F(DocumentStoreTest, UsageScoresShouldBeAvailableAfterDataLoss) {
+TEST_P(DocumentStoreTest, UsageScoresShouldBeAvailableAfterDataLoss) {
UsageStore::UsageScores expected_scores;
DocumentId document_id;
{
@@ -3314,7 +3323,7 @@ TEST_F(DocumentStoreTest, UsageScoresShouldBeAvailableAfterDataLoss) {
EXPECT_THAT(actual_scores, Eq(expected_scores));
}
-TEST_F(DocumentStoreTest, UsageScoresShouldBeCopiedOverToUpdatedDocument) {
+TEST_P(DocumentStoreTest, UsageScoresShouldBeCopiedOverToUpdatedDocument) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
@@ -3355,7 +3364,7 @@ TEST_F(DocumentStoreTest, UsageScoresShouldBeCopiedOverToUpdatedDocument) {
EXPECT_THAT(actual_scores, Eq(expected_scores));
}
-TEST_F(DocumentStoreTest, UsageScoresShouldPersistOnOptimize) {
+TEST_P(DocumentStoreTest, UsageScoresShouldPersistOnOptimize) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
@@ -3390,8 +3399,7 @@ TEST_F(DocumentStoreTest, UsageScoresShouldPersistOnOptimize) {
std::string optimized_dir = document_store_dir_ + "/optimize_test";
filesystem_.CreateDirectoryRecursively(optimized_dir.c_str());
ICING_ASSERT_OK(
- document_store->OptimizeInto(optimized_dir, lang_segmenter_.get(),
- /*namespace_id_fingerprint=*/false));
+ document_store->OptimizeInto(optimized_dir, lang_segmenter_.get()));
// Get optimized document store
ICING_ASSERT_OK_AND_ASSIGN(
@@ -3409,7 +3417,7 @@ TEST_F(DocumentStoreTest, UsageScoresShouldPersistOnOptimize) {
EXPECT_THAT(actual_scores, Eq(expected_scores));
}
-TEST_F(DocumentStoreTest, DetectPartialDataLoss) {
+TEST_P(DocumentStoreTest, DetectPartialDataLoss) {
{
// Can put and delete fine.
ICING_ASSERT_OK_AND_ASSIGN(
@@ -3450,7 +3458,7 @@ TEST_F(DocumentStoreTest, DetectPartialDataLoss) {
ASSERT_THAT(create_result.data_loss, Eq(DataLoss::PARTIAL));
}
-TEST_F(DocumentStoreTest, DetectCompleteDataLoss) {
+TEST_P(DocumentStoreTest, DetectCompleteDataLoss) {
int64_t corruptible_offset;
const std::string document_log_file = absl_ports::StrCat(
document_store_dir_, "/", DocumentLogCreator::GetDocumentLogFilename());
@@ -3515,7 +3523,7 @@ TEST_F(DocumentStoreTest, DetectCompleteDataLoss) {
ASSERT_THAT(create_result.data_loss, Eq(DataLoss::COMPLETE));
}
-TEST_F(DocumentStoreTest, LoadScoreCacheAndInitializeSuccessfully) {
+TEST_P(DocumentStoreTest, LoadScoreCacheAndInitializeSuccessfully) {
// The directory testdata/score_cache_without_length_in_tokens/document_store
// contains only the scoring_cache and the document_store_header (holding the
// crc for the scoring_cache). If the current code is compatible with the
@@ -3557,7 +3565,8 @@ TEST_F(DocumentStoreTest, LoadScoreCacheAndInitializeSuccessfully) {
DocumentStore::Create(
&filesystem_, document_store_dir_, &fake_clock_, schema_store_.get(),
/*force_recovery_and_revalidate_documents=*/false,
- /*namespace_id_fingerprint=*/false,
+ GetParam().namespace_id_fingerprint, GetParam().pre_mapping_fbv,
+ GetParam().use_persistent_hash_map,
PortableFileBackedProtoLog<DocumentWrapper>::kDeflateCompressionLevel,
&initialize_stats));
std::unique_ptr<DocumentStore> doc_store =
@@ -3568,7 +3577,7 @@ TEST_F(DocumentStoreTest, LoadScoreCacheAndInitializeSuccessfully) {
InitializeStatsProto::LEGACY_DOCUMENT_LOG_FORMAT);
}
-TEST_F(DocumentStoreTest, DocumentStoreStorageInfo) {
+TEST_P(DocumentStoreTest, DocumentStoreStorageInfo) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
@@ -3678,7 +3687,7 @@ TEST_F(DocumentStoreTest, DocumentStoreStorageInfo) {
Eq(0));
}
-TEST_F(DocumentStoreTest, InitializeForceRecoveryUpdatesTypeIds) {
+TEST_P(DocumentStoreTest, InitializeForceRecoveryUpdatesTypeIds) {
// Start fresh and set the schema with one type.
filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
@@ -3768,13 +3777,14 @@ TEST_F(DocumentStoreTest, InitializeForceRecoveryUpdatesTypeIds) {
InitializeStatsProto initialize_stats;
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store.get(),
- /*force_recovery_and_revalidate_documents=*/true,
- /*namespace_id_fingerprint=*/false,
- PortableFileBackedProtoLog<
- DocumentWrapper>::kDeflateCompressionLevel,
- &initialize_stats));
+ DocumentStore::Create(
+ &filesystem_, document_store_dir_, &fake_clock_, schema_store.get(),
+ /*force_recovery_and_revalidate_documents=*/true,
+ GetParam().namespace_id_fingerprint, GetParam().pre_mapping_fbv,
+ GetParam().use_persistent_hash_map,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ &initialize_stats));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -3789,7 +3799,7 @@ TEST_F(DocumentStoreTest, InitializeForceRecoveryUpdatesTypeIds) {
}
}
-TEST_F(DocumentStoreTest, InitializeDontForceRecoveryDoesntUpdateTypeIds) {
+TEST_P(DocumentStoreTest, InitializeDontForceRecoveryDoesntUpdateTypeIds) {
// Start fresh and set the schema with one type.
filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
@@ -3892,7 +3902,7 @@ TEST_F(DocumentStoreTest, InitializeDontForceRecoveryDoesntUpdateTypeIds) {
}
}
-TEST_F(DocumentStoreTest, InitializeForceRecoveryDeletesInvalidDocument) {
+TEST_P(DocumentStoreTest, InitializeForceRecoveryDeletesInvalidDocument) {
// Start fresh and set the schema with one type.
filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
@@ -3987,13 +3997,14 @@ TEST_F(DocumentStoreTest, InitializeForceRecoveryDeletesInvalidDocument) {
CorruptDocStoreHeaderChecksumFile();
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store.get(),
- /*force_recovery_and_revalidate_documents=*/true,
- /*namespace_id_fingerprint=*/false,
- PortableFileBackedProtoLog<
- DocumentWrapper>::kDeflateCompressionLevel,
- /*initialize_stats=*/nullptr));
+ DocumentStore::Create(
+ &filesystem_, document_store_dir_, &fake_clock_, schema_store.get(),
+ /*force_recovery_and_revalidate_documents=*/true,
+ GetParam().namespace_id_fingerprint, GetParam().pre_mapping_fbv,
+ GetParam().use_persistent_hash_map,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -4005,7 +4016,7 @@ TEST_F(DocumentStoreTest, InitializeForceRecoveryDeletesInvalidDocument) {
}
}
-TEST_F(DocumentStoreTest, InitializeDontForceRecoveryKeepsInvalidDocument) {
+TEST_P(DocumentStoreTest, InitializeDontForceRecoveryKeepsInvalidDocument) {
// Start fresh and set the schema with one type.
filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
@@ -4114,7 +4125,7 @@ TEST_F(DocumentStoreTest, InitializeDontForceRecoveryKeepsInvalidDocument) {
}
}
-TEST_F(DocumentStoreTest, MigrateToPortableFileBackedProtoLog) {
+TEST_P(DocumentStoreTest, MigrateToPortableFileBackedProtoLog) {
// Set up schema.
SchemaProto schema =
SchemaBuilder()
@@ -4182,7 +4193,8 @@ TEST_F(DocumentStoreTest, MigrateToPortableFileBackedProtoLog) {
DocumentStore::Create(
&filesystem_, document_store_dir, &fake_clock_, schema_store.get(),
/*force_recovery_and_revalidate_documents=*/false,
- /*namespace_id_fingerprint=*/false,
+ GetParam().pre_mapping_fbv, GetParam().use_persistent_hash_map,
+ GetParam().namespace_id_fingerprint,
PortableFileBackedProtoLog<DocumentWrapper>::kDeflateCompressionLevel,
&initialize_stats));
std::unique_ptr<DocumentStore> document_store =
@@ -4240,7 +4252,7 @@ TEST_F(DocumentStoreTest, MigrateToPortableFileBackedProtoLog) {
IsOkAndHolds(EqualsProto(document3)));
}
-TEST_F(DocumentStoreTest, GetDebugInfo) {
+TEST_P(DocumentStoreTest, GetDebugInfo) {
SchemaProto schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder()
@@ -4365,7 +4377,7 @@ TEST_F(DocumentStoreTest, GetDebugInfo) {
EXPECT_THAT(out3.corpus_info(), IsEmpty());
}
-TEST_F(DocumentStoreTest, GetDebugInfoWithoutSchema) {
+TEST_P(DocumentStoreTest, GetDebugInfoWithoutSchema) {
std::string schema_store_dir = schema_store_dir_ + "_custom";
filesystem_.DeleteDirectoryRecursively(schema_store_dir.c_str());
filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
@@ -4389,7 +4401,7 @@ TEST_F(DocumentStoreTest, GetDebugInfoWithoutSchema) {
EXPECT_THAT(out.corpus_info(), IsEmpty());
}
-TEST_F(DocumentStoreTest, GetDebugInfoForEmptyDocumentStore) {
+TEST_P(DocumentStoreTest, GetDebugInfoForEmptyDocumentStore) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
@@ -4406,6 +4418,198 @@ TEST_F(DocumentStoreTest, GetDebugInfoForEmptyDocumentStore) {
EXPECT_THAT(out.corpus_info(), IsEmpty());
}
+TEST_P(DocumentStoreTest, SwitchKeyMapperTypeShouldRegenerateDerivedFiles) {
+ std::string dynamic_trie_uri_mapper_dir =
+ document_store_dir_ + "/key_mapper_dir";
+ std::string persistent_hash_map_uri_mapper_dir =
+ document_store_dir_ + "/uri_mapper";
+ DocumentId document_id1;
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ GetParam().namespace_id_fingerprint,
+ GetParam().pre_mapping_fbv,
+ GetParam().use_persistent_hash_map,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
+
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+ ICING_ASSERT_OK_AND_ASSIGN(document_id1, doc_store->Put(test_document1_));
+
+ if (GetParam().use_persistent_hash_map) {
+ EXPECT_THAT(filesystem_.DirectoryExists(
+ persistent_hash_map_uri_mapper_dir.c_str()),
+ IsTrue());
+ EXPECT_THAT(
+ filesystem_.DirectoryExists(dynamic_trie_uri_mapper_dir.c_str()),
+ IsFalse());
+ } else {
+ EXPECT_THAT(filesystem_.DirectoryExists(
+ persistent_hash_map_uri_mapper_dir.c_str()),
+ IsFalse());
+ EXPECT_THAT(
+ filesystem_.DirectoryExists(dynamic_trie_uri_mapper_dir.c_str()),
+ IsTrue());
+ }
+ }
+
+ // Switch key mapper. We should get I/O error and derived files should be
+ // regenerated.
+ {
+ bool switch_key_mapper_flag = !GetParam().use_persistent_hash_map;
+ InitializeStatsProto initialize_stats;
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(
+ &filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ GetParam().namespace_id_fingerprint, GetParam().pre_mapping_fbv,
+ /*use_persistent_hash_map=*/switch_key_mapper_flag,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ &initialize_stats));
+ EXPECT_THAT(initialize_stats.document_store_recovery_cause(),
+ Eq(InitializeStatsProto::IO_ERROR));
+
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+ EXPECT_THAT(doc_store->GetDocumentId(test_document1_.namespace_(),
+ test_document1_.uri()),
+ IsOkAndHolds(document_id1));
+
+ if (switch_key_mapper_flag) {
+ EXPECT_THAT(filesystem_.DirectoryExists(
+ persistent_hash_map_uri_mapper_dir.c_str()),
+ IsTrue());
+ EXPECT_THAT(
+ filesystem_.DirectoryExists(dynamic_trie_uri_mapper_dir.c_str()),
+ IsFalse());
+ } else {
+ EXPECT_THAT(filesystem_.DirectoryExists(
+ persistent_hash_map_uri_mapper_dir.c_str()),
+ IsFalse());
+ EXPECT_THAT(
+ filesystem_.DirectoryExists(dynamic_trie_uri_mapper_dir.c_str()),
+ IsTrue());
+ }
+ }
+}
+
+TEST_P(DocumentStoreTest, SameKeyMapperTypeShouldNotRegenerateDerivedFiles) {
+ std::string dynamic_trie_uri_mapper_dir =
+ document_store_dir_ + "/key_mapper_dir";
+ std::string persistent_hash_map_uri_mapper_dir =
+ document_store_dir_ + "/uri_mapper";
+ DocumentId document_id1;
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ GetParam().namespace_id_fingerprint,
+ GetParam().pre_mapping_fbv,
+ GetParam().use_persistent_hash_map,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
+
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+ ICING_ASSERT_OK_AND_ASSIGN(document_id1, doc_store->Put(test_document1_));
+
+ if (GetParam().use_persistent_hash_map) {
+ EXPECT_THAT(filesystem_.DirectoryExists(
+ persistent_hash_map_uri_mapper_dir.c_str()),
+ IsTrue());
+ EXPECT_THAT(
+ filesystem_.DirectoryExists(dynamic_trie_uri_mapper_dir.c_str()),
+ IsFalse());
+ } else {
+ EXPECT_THAT(filesystem_.DirectoryExists(
+ persistent_hash_map_uri_mapper_dir.c_str()),
+ IsFalse());
+ EXPECT_THAT(
+ filesystem_.DirectoryExists(dynamic_trie_uri_mapper_dir.c_str()),
+ IsTrue());
+ }
+ }
+
+ // Use the same key mapper type. Derived files should not be regenerated.
+ {
+ InitializeStatsProto initialize_stats;
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ GetParam().namespace_id_fingerprint,
+ GetParam().pre_mapping_fbv,
+ GetParam().use_persistent_hash_map,
+ PortableFileBackedProtoLog<
+ DocumentWrapper>::kDeflateCompressionLevel,
+ &initialize_stats));
+ EXPECT_THAT(initialize_stats.document_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+ EXPECT_THAT(doc_store->GetDocumentId(test_document1_.namespace_(),
+ test_document1_.uri()),
+ IsOkAndHolds(document_id1));
+
+ if (GetParam().use_persistent_hash_map) {
+ EXPECT_THAT(filesystem_.DirectoryExists(
+ persistent_hash_map_uri_mapper_dir.c_str()),
+ IsTrue());
+ EXPECT_THAT(
+ filesystem_.DirectoryExists(dynamic_trie_uri_mapper_dir.c_str()),
+ IsFalse());
+ } else {
+ EXPECT_THAT(filesystem_.DirectoryExists(
+ persistent_hash_map_uri_mapper_dir.c_str()),
+ IsFalse());
+ EXPECT_THAT(
+ filesystem_.DirectoryExists(dynamic_trie_uri_mapper_dir.c_str()),
+ IsTrue());
+ }
+ }
+}
+
+INSTANTIATE_TEST_SUITE_P(
+ DocumentStoreTest, DocumentStoreTest,
+ testing::Values(
+ DocumentStoreTestParam(/*namespace_id_fingerprint_in=*/false,
+ /*pre_mapping_fbv_in=*/false,
+ /*use_persistent_hash_map_in=*/false),
+ DocumentStoreTestParam(/*namespace_id_fingerprint_in=*/true,
+ /*pre_mapping_fbv_in=*/false,
+ /*use_persistent_hash_map_in=*/false),
+ DocumentStoreTestParam(/*namespace_id_fingerprint_in=*/false,
+ /*pre_mapping_fbv_in=*/true,
+ /*use_persistent_hash_map_in=*/false),
+ DocumentStoreTestParam(/*namespace_id_fingerprint_in=*/true,
+ /*pre_mapping_fbv_in=*/true,
+ /*use_persistent_hash_map_in=*/false),
+ DocumentStoreTestParam(/*namespace_id_fingerprint_in=*/false,
+ /*pre_mapping_fbv_in=*/false,
+ /*use_persistent_hash_map_in=*/true),
+ DocumentStoreTestParam(/*namespace_id_fingerprint_in=*/true,
+ /*pre_mapping_fbv_in=*/false,
+ /*use_persistent_hash_map_in=*/true),
+ DocumentStoreTestParam(/*namespace_id_fingerprint_in=*/false,
+ /*pre_mapping_fbv_in=*/true,
+ /*use_persistent_hash_map_in=*/true),
+ DocumentStoreTestParam(/*namespace_id_fingerprint_in=*/true,
+ /*pre_mapping_fbv_in=*/true,
+ /*use_persistent_hash_map_in=*/true)));
+
} // namespace
} // namespace lib
diff --git a/proto/icing/proto/initialize.proto b/proto/icing/proto/initialize.proto
index d4b1aee..507e745 100644
--- a/proto/icing/proto/initialize.proto
+++ b/proto/icing/proto/initialize.proto
@@ -23,7 +23,7 @@ option java_package = "com.google.android.icing.proto";
option java_multiple_files = true;
option objc_class_prefix = "ICNG";
-// Next tag: 11
+// Next tag: 12
message IcingSearchEngineOptions {
// Directory to persist files for Icing. Required.
// If Icing was previously initialized with this directory, it will reload
@@ -104,6 +104,9 @@ message IcingSearchEngineOptions {
// to dynamic trie key mapper).
optional bool use_persistent_hash_map = 10;
+ // Integer index bucket split threshold.
+ optional int32 integer_index_bucket_split_threshold = 11 [default = 65536];
+
reserved 2;
}
diff --git a/synced_AOSP_CL_number.txt b/synced_AOSP_CL_number.txt
index 20e7738..9a7bd49 100644
--- a/synced_AOSP_CL_number.txt
+++ b/synced_AOSP_CL_number.txt
@@ -1 +1 @@
-set(synced_AOSP_CL_number=544124118)
+set(synced_AOSP_CL_number=546080947)